Coverage for integrations / coding_agent / aider_core / repomap.py: 4.6%

500 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-12 04:49 +0000

1import colorsys 

2import math 

3import os 

4import random 

5import shutil 

6import sqlite3 

7import sys 

8import time 

9import warnings 

10from collections import Counter, defaultdict, namedtuple 

11from importlib import resources 

12from pathlib import Path 

13 

14from diskcache import Cache 

15from grep_ast import TreeContext, filename_to_lang 

16from pygments.lexers import guess_lexer_for_filename 

17from pygments.token import Token 

18from tqdm import tqdm 

19from tree_sitter import Query 

20 

21from .dump import dump 

22from .special import filter_important_files 

23from .waiting import Spinner 

24 

25# tree_sitter is throwing a FutureWarning 

26warnings.simplefilter("ignore", category=FutureWarning) 

27from grep_ast.tsl import USING_TSL_PACK, get_language, get_parser # noqa: E402 

28 

29Tag = namedtuple("Tag", "rel_fname fname line name kind".split()) 

30 

31 

32SQLITE_ERRORS = (sqlite3.OperationalError, sqlite3.DatabaseError, OSError) 

33 

34 

35CACHE_VERSION = 3 

36if USING_TSL_PACK: 

37 CACHE_VERSION = 4 

38 

39UPDATING_REPO_MAP_MESSAGE = "Updating repo map" 

40 

41 

42class RepoMap: 

43 TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}" 

44 

45 warned_files = set() 

46 

47 def __init__( 

48 self, 

49 map_tokens=1024, 

50 root=None, 

51 main_model=None, 

52 io=None, 

53 repo_content_prefix=None, 

54 verbose=False, 

55 max_context_window=None, 

56 map_mul_no_files=8, 

57 refresh="auto", 

58 ): 

59 self.io = io 

60 self.verbose = verbose 

61 self.refresh = refresh 

62 

63 if not root: 

64 root = os.getcwd() 

65 self.root = root 

66 

67 self.load_tags_cache() 

68 self.cache_threshold = 0.95 

69 

70 self.max_map_tokens = map_tokens 

71 self.map_mul_no_files = map_mul_no_files 

72 self.max_context_window = max_context_window 

73 

74 self.repo_content_prefix = repo_content_prefix 

75 

76 self.main_model = main_model 

77 

78 self.tree_cache = {} 

79 self.tree_context_cache = {} 

80 self.map_cache = {} 

81 self.map_processing_time = 0 

82 self.last_map = None 

83 

84 if self.verbose: 

85 self.io.tool_output( 

86 f"RepoMap initialized with map_mul_no_files: {self.map_mul_no_files}" 

87 ) 

88 

89 def token_count(self, text): 

90 len_text = len(text) 

91 if len_text < 200: 

92 return self.main_model.token_count(text) 

93 

94 lines = text.splitlines(keepends=True) 

95 num_lines = len(lines) 

96 step = num_lines // 100 or 1 

97 lines = lines[::step] 

98 sample_text = "".join(lines) 

99 sample_tokens = self.main_model.token_count(sample_text) 

100 est_tokens = sample_tokens / len(sample_text) * len_text 

101 return est_tokens 

102 

103 def get_repo_map( 

104 self, 

105 chat_files, 

106 other_files, 

107 mentioned_fnames=None, 

108 mentioned_idents=None, 

109 force_refresh=False, 

110 ): 

111 if self.max_map_tokens <= 0: 

112 return 

113 if not other_files: 

114 return 

115 if not mentioned_fnames: 

116 mentioned_fnames = set() 

117 if not mentioned_idents: 

118 mentioned_idents = set() 

119 

120 max_map_tokens = self.max_map_tokens 

121 

122 # With no files in the chat, give a bigger view of the entire repo 

123 padding = 4096 

124 if max_map_tokens and self.max_context_window: 

125 target = min( 

126 int(max_map_tokens * self.map_mul_no_files), 

127 self.max_context_window - padding, 

128 ) 

129 else: 

130 target = 0 

131 if not chat_files and self.max_context_window and target > 0: 

132 max_map_tokens = target 

133 

134 try: 

135 files_listing = self.get_ranked_tags_map( 

136 chat_files, 

137 other_files, 

138 max_map_tokens, 

139 mentioned_fnames, 

140 mentioned_idents, 

141 force_refresh, 

142 ) 

143 except RecursionError: 

144 self.io.tool_error("Disabling repo map, git repo too large?") 

145 self.max_map_tokens = 0 

146 return 

147 

148 if not files_listing: 

149 return 

150 

151 if self.verbose: 

152 num_tokens = self.token_count(files_listing) 

153 self.io.tool_output(f"Repo-map: {num_tokens / 1024:.1f} k-tokens") 

154 

155 if chat_files: 

156 other = "other " 

157 else: 

158 other = "" 

159 

160 if self.repo_content_prefix: 

161 repo_content = self.repo_content_prefix.format(other=other) 

162 else: 

163 repo_content = "" 

164 

165 repo_content += files_listing 

166 

167 return repo_content 

168 

169 def get_rel_fname(self, fname): 

170 try: 

171 return os.path.relpath(fname, self.root) 

172 except ValueError: 

173 # Issue #1288: ValueError: path is on mount 'C:', start on mount 'D:' 

174 # Just return the full fname. 

175 return fname 

176 

177 def tags_cache_error(self, original_error=None): 

178 """Handle SQLite errors by trying to recreate cache, falling back to dict if needed""" 

179 

180 if self.verbose and original_error: 

181 self.io.tool_warning(f"Tags cache error: {str(original_error)}") 

182 

183 if isinstance(getattr(self, "TAGS_CACHE", None), dict): 

184 return 

185 

186 path = Path(self.root) / self.TAGS_CACHE_DIR 

187 

188 # Try to recreate the cache 

189 try: 

190 # Delete existing cache dir 

191 if path.exists(): 

192 shutil.rmtree(path) 

193 

194 # Try to create new cache 

195 new_cache = Cache(path) 

196 

197 # Test that it works 

198 test_key = "test" 

199 new_cache[test_key] = "test" 

200 _ = new_cache[test_key] 

201 del new_cache[test_key] 

202 

203 # If we got here, the new cache works 

204 self.TAGS_CACHE = new_cache 

205 return 

206 

207 except SQLITE_ERRORS as e: 

208 # If anything goes wrong, warn and fall back to dict 

209 self.io.tool_warning( 

210 f"Unable to use tags cache at {path}, falling back to memory cache" 

211 ) 

212 if self.verbose: 

213 self.io.tool_warning(f"Cache recreation error: {str(e)}") 

214 

215 self.TAGS_CACHE = dict() 

216 

217 def load_tags_cache(self): 

218 path = Path(self.root) / self.TAGS_CACHE_DIR 

219 try: 

220 self.TAGS_CACHE = Cache(path) 

221 except SQLITE_ERRORS as e: 

222 self.tags_cache_error(e) 

223 

224 def save_tags_cache(self): 

225 pass 

226 

227 def get_mtime(self, fname): 

228 try: 

229 return os.path.getmtime(fname) 

230 except FileNotFoundError: 

231 self.io.tool_warning(f"File not found error: {fname}") 

232 

233 def get_tags(self, fname, rel_fname): 

234 # Check if the file is in the cache and if the modification time has not changed 

235 file_mtime = self.get_mtime(fname) 

236 if file_mtime is None: 

237 return [] 

238 

239 cache_key = fname 

240 try: 

241 val = self.TAGS_CACHE.get(cache_key) # Issue #1308 

242 except SQLITE_ERRORS as e: 

243 self.tags_cache_error(e) 

244 val = self.TAGS_CACHE.get(cache_key) 

245 

246 if val is not None and val.get("mtime") == file_mtime: 

247 try: 

248 return self.TAGS_CACHE[cache_key]["data"] 

249 except SQLITE_ERRORS as e: 

250 self.tags_cache_error(e) 

251 return self.TAGS_CACHE[cache_key]["data"] 

252 

253 # miss! 

254 data = list(self.get_tags_raw(fname, rel_fname)) 

255 

256 # Update the cache 

257 try: 

258 self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data} 

259 self.save_tags_cache() 

260 except SQLITE_ERRORS as e: 

261 self.tags_cache_error(e) 

262 self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data} 

263 

264 return data 

265 

266 def _run_captures(self, query: Query, node): 

267 # tree-sitter 0.23.2's python bindings had captures directly on the Query object 

268 # but 0.24.0 moved it to a separate QueryCursor class. Support both. 

269 if hasattr(query, "captures"): 

270 # Old API 

271 return query.captures(node) 

272 

273 # New API 

274 from tree_sitter import QueryCursor 

275 

276 cursor = QueryCursor(query) 

277 return cursor.captures(node) 

278 

279 def get_tags_raw(self, fname, rel_fname): 

280 lang = filename_to_lang(fname) 

281 if not lang: 

282 return 

283 

284 try: 

285 language = get_language(lang) 

286 parser = get_parser(lang) 

287 except Exception as err: 

288 print(f"Skipping file {fname}: {err}") 

289 return 

290 

291 query_scm = get_scm_fname(lang) 

292 if not query_scm.exists(): 

293 return 

294 query_scm = query_scm.read_text() 

295 

296 code = self.io.read_text(fname) 

297 if not code: 

298 return 

299 tree = parser.parse(bytes(code, "utf-8")) 

300 

301 # Run the tags queries 

302 captures = self._run_captures(Query(language, query_scm), tree.root_node) 

303 

304 captures_by_tag = defaultdict(list) 

305 matches = [] 

306 for tag, nodes in captures.items(): 

307 for node in nodes: 

308 captures_by_tag[tag].append(node) 

309 captures_by_tag[tag].append(node) 

310 matches.append((node, tag)) 

311 

312 if USING_TSL_PACK: 

313 all_nodes = [(node, tag) for tag, nodes in captures_by_tag.items() for node in nodes] 

314 else: 

315 all_nodes = matches 

316 

317 saw = set() 

318 for node, tag in all_nodes: 

319 if tag.startswith("name.definition."): 

320 kind = "def" 

321 elif tag.startswith("name.reference."): 

322 kind = "ref" 

323 else: 

324 continue 

325 

326 saw.add(kind) 

327 

328 result = Tag( 

329 rel_fname=rel_fname, 

330 fname=fname, 

331 name=node.text.decode("utf-8"), 

332 kind=kind, 

333 line=node.start_point[0], 

334 ) 

335 

336 yield result 

337 

338 if "ref" in saw: 

339 return 

340 if "def" not in saw: 

341 return 

342 

343 # We saw defs, without any refs 

344 # Some tags files only provide defs (cpp, for example) 

345 # Use pygments to backfill refs 

346 

347 try: 

348 lexer = guess_lexer_for_filename(fname, code) 

349 except Exception: # On Windows, bad ref to time.clock which is deprecated? 

350 # self.io.tool_error(f"Error lexing {fname}") 

351 return 

352 

353 tokens = list(lexer.get_tokens(code)) 

354 tokens = [token[1] for token in tokens if token[0] in Token.Name] 

355 

356 for token in tokens: 

357 yield Tag( 

358 rel_fname=rel_fname, 

359 fname=fname, 

360 name=token, 

361 kind="ref", 

362 line=-1, 

363 ) 

364 

365 def get_ranked_tags( 

366 self, chat_fnames, other_fnames, mentioned_fnames, mentioned_idents, progress=None 

367 ): 

368 import networkx as nx 

369 

370 defines = defaultdict(set) 

371 references = defaultdict(list) 

372 definitions = defaultdict(set) 

373 

374 personalization = dict() 

375 

376 fnames = set(chat_fnames).union(set(other_fnames)) 

377 chat_rel_fnames = set() 

378 

379 fnames = sorted(fnames) 

380 

381 # Default personalization for unspecified files is 1/num_nodes 

382 # https://networkx.org/documentation/stable/_modules/networkx/algorithms/link_analysis/pagerank_alg.html#pagerank 

383 personalize = 100 / len(fnames) 

384 

385 try: 

386 cache_size = len(self.TAGS_CACHE) 

387 except SQLITE_ERRORS as e: 

388 self.tags_cache_error(e) 

389 cache_size = len(self.TAGS_CACHE) 

390 

391 if len(fnames) - cache_size > 100: 

392 self.io.tool_output( 

393 "Initial repo scan can be slow in larger repos, but only happens once." 

394 ) 

395 fnames = tqdm(fnames, desc="Scanning repo") 

396 showing_bar = True 

397 else: 

398 showing_bar = False 

399 

400 for fname in fnames: 

401 if self.verbose: 

402 self.io.tool_output(f"Processing {fname}") 

403 if progress and not showing_bar: 

404 progress(f"{UPDATING_REPO_MAP_MESSAGE}: {fname}") 

405 

406 try: 

407 file_ok = Path(fname).is_file() 

408 except OSError: 

409 file_ok = False 

410 

411 if not file_ok: 

412 if fname not in self.warned_files: 

413 self.io.tool_warning(f"Repo-map can't include {fname}") 

414 self.io.tool_output( 

415 "Has it been deleted from the file system but not from git?" 

416 ) 

417 self.warned_files.add(fname) 

418 continue 

419 

420 # dump(fname) 

421 rel_fname = self.get_rel_fname(fname) 

422 current_pers = 0.0 # Start with 0 personalization score 

423 

424 if fname in chat_fnames: 

425 current_pers += personalize 

426 chat_rel_fnames.add(rel_fname) 

427 

428 if rel_fname in mentioned_fnames: 

429 # Use max to avoid double counting if in chat_fnames and mentioned_fnames 

430 current_pers = max(current_pers, personalize) 

431 

432 # Check path components against mentioned_idents 

433 path_obj = Path(rel_fname) 

434 path_components = set(path_obj.parts) 

435 basename_with_ext = path_obj.name 

436 basename_without_ext, _ = os.path.splitext(basename_with_ext) 

437 components_to_check = path_components.union({basename_with_ext, basename_without_ext}) 

438 

439 matched_idents = components_to_check.intersection(mentioned_idents) 

440 if matched_idents: 

441 # Add personalization *once* if any path component matches a mentioned ident 

442 current_pers += personalize 

443 

444 if current_pers > 0: 

445 personalization[rel_fname] = current_pers # Assign the final calculated value 

446 

447 tags = list(self.get_tags(fname, rel_fname)) 

448 if tags is None: 

449 continue 

450 

451 for tag in tags: 

452 if tag.kind == "def": 

453 defines[tag.name].add(rel_fname) 

454 key = (rel_fname, tag.name) 

455 definitions[key].add(tag) 

456 

457 elif tag.kind == "ref": 

458 references[tag.name].append(rel_fname) 

459 

460 ## 

461 # dump(defines) 

462 # dump(references) 

463 # dump(personalization) 

464 

465 if not references: 

466 references = dict((k, list(v)) for k, v in defines.items()) 

467 

468 idents = set(defines.keys()).intersection(set(references.keys())) 

469 

470 G = nx.MultiDiGraph() 

471 

472 # Add a small self-edge for every definition that has no references 

473 # Helps with tree-sitter 0.23.2 with ruby, where "def greet(name)" 

474 # isn't counted as a def AND a ref. tree-sitter 0.24.0 does. 

475 for ident in defines.keys(): 

476 if ident in references: 

477 continue 

478 for definer in defines[ident]: 

479 G.add_edge(definer, definer, weight=0.1, ident=ident) 

480 

481 for ident in idents: 

482 if progress: 

483 progress(f"{UPDATING_REPO_MAP_MESSAGE}: {ident}") 

484 

485 definers = defines[ident] 

486 

487 mul = 1.0 

488 

489 is_snake = ("_" in ident) and any(c.isalpha() for c in ident) 

490 is_kebab = ("-" in ident) and any(c.isalpha() for c in ident) 

491 is_camel = any(c.isupper() for c in ident) and any(c.islower() for c in ident) 

492 if ident in mentioned_idents: 

493 mul *= 10 

494 if (is_snake or is_kebab or is_camel) and len(ident) >= 8: 

495 mul *= 10 

496 if ident.startswith("_"): 

497 mul *= 0.1 

498 if len(defines[ident]) > 5: 

499 mul *= 0.1 

500 

501 for referencer, num_refs in Counter(references[ident]).items(): 

502 for definer in definers: 

503 # dump(referencer, definer, num_refs, mul) 

504 # if referencer == definer: 

505 # continue 

506 

507 use_mul = mul 

508 if referencer in chat_rel_fnames: 

509 use_mul *= 50 

510 

511 # scale down so high freq (low value) mentions don't dominate 

512 num_refs = math.sqrt(num_refs) 

513 

514 G.add_edge(referencer, definer, weight=use_mul * num_refs, ident=ident) 

515 

516 if not references: 

517 pass 

518 

519 if personalization: 

520 pers_args = dict(personalization=personalization, dangling=personalization) 

521 else: 

522 pers_args = dict() 

523 

524 try: 

525 ranked = nx.pagerank(G, weight="weight", **pers_args) 

526 except ZeroDivisionError: 

527 # Issue #1536 

528 try: 

529 ranked = nx.pagerank(G, weight="weight") 

530 except ZeroDivisionError: 

531 return [] 

532 

533 # distribute the rank from each source node, across all of its out edges 

534 ranked_definitions = defaultdict(float) 

535 for src in G.nodes: 

536 if progress: 

537 progress(f"{UPDATING_REPO_MAP_MESSAGE}: {src}") 

538 

539 src_rank = ranked[src] 

540 total_weight = sum(data["weight"] for _src, _dst, data in G.out_edges(src, data=True)) 

541 # dump(src, src_rank, total_weight) 

542 for _src, dst, data in G.out_edges(src, data=True): 

543 data["rank"] = src_rank * data["weight"] / total_weight 

544 ident = data["ident"] 

545 ranked_definitions[(dst, ident)] += data["rank"] 

546 

547 ranked_tags = [] 

548 ranked_definitions = sorted( 

549 ranked_definitions.items(), reverse=True, key=lambda x: (x[1], x[0]) 

550 ) 

551 

552 # dump(ranked_definitions) 

553 

554 for (fname, ident), rank in ranked_definitions: 

555 # print(f"{rank:.03f} {fname} {ident}") 

556 if fname in chat_rel_fnames: 

557 continue 

558 ranked_tags += list(definitions.get((fname, ident), [])) 

559 

560 rel_other_fnames_without_tags = set(self.get_rel_fname(fname) for fname in other_fnames) 

561 

562 fnames_already_included = set(rt[0] for rt in ranked_tags) 

563 

564 top_rank = sorted([(rank, node) for (node, rank) in ranked.items()], reverse=True) 

565 for rank, fname in top_rank: 

566 if fname in rel_other_fnames_without_tags: 

567 rel_other_fnames_without_tags.remove(fname) 

568 if fname not in fnames_already_included: 

569 ranked_tags.append((fname,)) 

570 

571 for fname in rel_other_fnames_without_tags: 

572 ranked_tags.append((fname,)) 

573 

574 return ranked_tags 

575 

576 def get_ranked_tags_map( 

577 self, 

578 chat_fnames, 

579 other_fnames=None, 

580 max_map_tokens=None, 

581 mentioned_fnames=None, 

582 mentioned_idents=None, 

583 force_refresh=False, 

584 ): 

585 # Create a cache key 

586 cache_key = [ 

587 tuple(sorted(chat_fnames)) if chat_fnames else None, 

588 tuple(sorted(other_fnames)) if other_fnames else None, 

589 max_map_tokens, 

590 ] 

591 

592 if self.refresh == "auto": 

593 cache_key += [ 

594 tuple(sorted(mentioned_fnames)) if mentioned_fnames else None, 

595 tuple(sorted(mentioned_idents)) if mentioned_idents else None, 

596 ] 

597 cache_key = tuple(cache_key) 

598 

599 use_cache = False 

600 if not force_refresh: 

601 if self.refresh == "manual" and self.last_map: 

602 return self.last_map 

603 

604 if self.refresh == "always": 

605 use_cache = False 

606 elif self.refresh == "files": 

607 use_cache = True 

608 elif self.refresh == "auto": 

609 use_cache = self.map_processing_time > 1.0 

610 

611 # Check if the result is in the cache 

612 if use_cache and cache_key in self.map_cache: 

613 return self.map_cache[cache_key] 

614 

615 # If not in cache or force_refresh is True, generate the map 

616 start_time = time.time() 

617 result = self.get_ranked_tags_map_uncached( 

618 chat_fnames, other_fnames, max_map_tokens, mentioned_fnames, mentioned_idents 

619 ) 

620 end_time = time.time() 

621 self.map_processing_time = end_time - start_time 

622 

623 # Store the result in the cache 

624 self.map_cache[cache_key] = result 

625 self.last_map = result 

626 

627 return result 

628 

629 def get_ranked_tags_map_uncached( 

630 self, 

631 chat_fnames, 

632 other_fnames=None, 

633 max_map_tokens=None, 

634 mentioned_fnames=None, 

635 mentioned_idents=None, 

636 ): 

637 if not other_fnames: 

638 other_fnames = list() 

639 if not max_map_tokens: 

640 max_map_tokens = self.max_map_tokens 

641 if not mentioned_fnames: 

642 mentioned_fnames = set() 

643 if not mentioned_idents: 

644 mentioned_idents = set() 

645 

646 spin = Spinner(UPDATING_REPO_MAP_MESSAGE) 

647 

648 ranked_tags = self.get_ranked_tags( 

649 chat_fnames, 

650 other_fnames, 

651 mentioned_fnames, 

652 mentioned_idents, 

653 progress=spin.step, 

654 ) 

655 

656 other_rel_fnames = sorted(set(self.get_rel_fname(fname) for fname in other_fnames)) 

657 special_fnames = filter_important_files(other_rel_fnames) 

658 ranked_tags_fnames = set(tag[0] for tag in ranked_tags) 

659 special_fnames = [fn for fn in special_fnames if fn not in ranked_tags_fnames] 

660 special_fnames = [(fn,) for fn in special_fnames] 

661 

662 ranked_tags = special_fnames + ranked_tags 

663 

664 spin.step() 

665 

666 num_tags = len(ranked_tags) 

667 lower_bound = 0 

668 upper_bound = num_tags 

669 best_tree = None 

670 best_tree_tokens = 0 

671 

672 chat_rel_fnames = set(self.get_rel_fname(fname) for fname in chat_fnames) 

673 

674 self.tree_cache = dict() 

675 

676 middle = min(int(max_map_tokens // 25), num_tags) 

677 while lower_bound <= upper_bound: 

678 # dump(lower_bound, middle, upper_bound) 

679 

680 if middle > 1500: 

681 show_tokens = f"{middle / 1000.0:.1f}K" 

682 else: 

683 show_tokens = str(middle) 

684 spin.step(f"{UPDATING_REPO_MAP_MESSAGE}: {show_tokens} tokens") 

685 

686 tree = self.to_tree(ranked_tags[:middle], chat_rel_fnames) 

687 num_tokens = self.token_count(tree) 

688 

689 pct_err = abs(num_tokens - max_map_tokens) / max_map_tokens 

690 ok_err = 0.15 

691 if (num_tokens <= max_map_tokens and num_tokens > best_tree_tokens) or pct_err < ok_err: 

692 best_tree = tree 

693 best_tree_tokens = num_tokens 

694 

695 if pct_err < ok_err: 

696 break 

697 

698 if num_tokens < max_map_tokens: 

699 lower_bound = middle + 1 

700 else: 

701 upper_bound = middle - 1 

702 

703 middle = int((lower_bound + upper_bound) // 2) 

704 

705 spin.end() 

706 return best_tree 

707 

708 tree_cache = dict() 

709 

710 def render_tree(self, abs_fname, rel_fname, lois): 

711 mtime = self.get_mtime(abs_fname) 

712 key = (rel_fname, tuple(sorted(lois)), mtime) 

713 

714 if key in self.tree_cache: 

715 return self.tree_cache[key] 

716 

717 if ( 

718 rel_fname not in self.tree_context_cache 

719 or self.tree_context_cache[rel_fname]["mtime"] != mtime 

720 ): 

721 code = self.io.read_text(abs_fname) or "" 

722 if not code.endswith("\n"): 

723 code += "\n" 

724 

725 context = TreeContext( 

726 rel_fname, 

727 code, 

728 color=False, 

729 line_number=False, 

730 child_context=False, 

731 last_line=False, 

732 margin=0, 

733 mark_lois=False, 

734 loi_pad=0, 

735 # header_max=30, 

736 show_top_of_file_parent_scope=False, 

737 ) 

738 self.tree_context_cache[rel_fname] = {"context": context, "mtime": mtime} 

739 

740 context = self.tree_context_cache[rel_fname]["context"] 

741 context.lines_of_interest = set() 

742 context.add_lines_of_interest(lois) 

743 context.add_context() 

744 res = context.format() 

745 self.tree_cache[key] = res 

746 return res 

747 

748 def to_tree(self, tags, chat_rel_fnames): 

749 if not tags: 

750 return "" 

751 

752 cur_fname = None 

753 cur_abs_fname = None 

754 lois = None 

755 output = "" 

756 

757 # add a bogus tag at the end so we trip the this_fname != cur_fname... 

758 dummy_tag = (None,) 

759 for tag in sorted(tags) + [dummy_tag]: 

760 this_rel_fname = tag[0] 

761 if this_rel_fname in chat_rel_fnames: 

762 continue 

763 

764 # ... here ... to output the final real entry in the list 

765 if this_rel_fname != cur_fname: 

766 if lois is not None: 

767 output += "\n" 

768 output += cur_fname + ":\n" 

769 output += self.render_tree(cur_abs_fname, cur_fname, lois) 

770 lois = None 

771 elif cur_fname: 

772 output += "\n" + cur_fname + "\n" 

773 if type(tag) is Tag: 

774 lois = [] 

775 cur_abs_fname = tag.fname 

776 cur_fname = this_rel_fname 

777 

778 if lois is not None: 

779 lois.append(tag.line) 

780 

781 # truncate long lines, in case we get minified js or something else crazy 

782 output = "\n".join([line[:100] for line in output.splitlines()]) + "\n" 

783 

784 return output 

785 

786 

787def find_src_files(directory): 

788 if not os.path.isdir(directory): 

789 return [directory] 

790 

791 src_files = [] 

792 for root, dirs, files in os.walk(directory): 

793 for file in files: 

794 src_files.append(os.path.join(root, file)) 

795 return src_files 

796 

797 

798def get_random_color(): 

799 hue = random.random() 

800 r, g, b = [int(x * 255) for x in colorsys.hsv_to_rgb(hue, 1, 0.75)] 

801 res = f"#{r:02x}{g:02x}{b:02x}" 

802 return res 

803 

804 

805def get_scm_fname(lang): 

806 # Load the tags queries 

807 if USING_TSL_PACK: 

808 subdir = "tree-sitter-language-pack" 

809 try: 

810 path = resources.files(__package__).joinpath( 

811 "queries", 

812 subdir, 

813 f"{lang}-tags.scm", 

814 ) 

815 if path.exists(): 

816 return path 

817 except KeyError: 

818 pass 

819 

820 # Fall back to tree-sitter-languages 

821 subdir = "tree-sitter-languages" 

822 try: 

823 return resources.files(__package__).joinpath( 

824 "queries", 

825 subdir, 

826 f"{lang}-tags.scm", 

827 ) 

828 except KeyError: 

829 return 

830 

831 

832def get_supported_languages_md(): 

833 from grep_ast.parsers import PARSERS 

834 

835 res = """ 

836| Language | File extension | Repo map | Linter | 

837|:--------:|:--------------:|:--------:|:------:| 

838""" 

839 data = sorted((lang, ex) for ex, lang in PARSERS.items()) 

840 

841 for lang, ext in data: 

842 fn = get_scm_fname(lang) 

843 repo_map = "✓" if Path(fn).exists() else "" 

844 linter_support = "✓" 

845 res += f"| {lang:20} | {ext:20} | {repo_map:^8} | {linter_support:^6} |\n" 

846 

847 res += "\n" 

848 

849 return res 

850 

851 

852if __name__ == "__main__": 

853 fnames = sys.argv[1:] 

854 

855 chat_fnames = [] 

856 other_fnames = [] 

857 for fname in sys.argv[1:]: 

858 if Path(fname).is_dir(): 

859 chat_fnames += find_src_files(fname) 

860 else: 

861 chat_fnames.append(fname) 

862 

863 rm = RepoMap(root=".") 

864 repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames) 

865 

866 dump(len(repo_map)) 

867 print(repo_map)