mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-12-12 19:05:18 +00:00
153 lines
5 KiB
Python
153 lines
5 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
from grep_ast import TreeContext
|
|
from grep_ast.parsers import PARSERS
|
|
# from pygments.lexers import guess_lexer_for_filename
|
|
# from pygments.token import Token
|
|
from tree_sitter_languages import get_language, get_parser
|
|
|
|
|
|
class FileSummary:
|
|
"""
|
|
This class is used to summarize the content of a file using tree-sitter queries.
|
|
Supported languages: C, C++, C#, elisp, elixir, go, java, javascript, ocaml, php, python, ql, ruby, rust, typescript
|
|
"""
|
|
def __init__(self, fname_full_path: str, project_base_path, parent_context=True, child_context=False, header_max=0):
|
|
self.fname_full_path = fname_full_path
|
|
self.project_base_path = project_base_path
|
|
self.fname_rel = os.path.relpath(fname_full_path, project_base_path)
|
|
self.main_queries_path = Path(__file__).parent.parent / 'queries'
|
|
if not os.path.exists(fname_full_path):
|
|
print(f"File {fname_full_path} does not exist")
|
|
with open(fname_full_path, "r") as f:
|
|
code = f.read()
|
|
if not code.endswith("\n"):
|
|
code += "\n"
|
|
self.code = code
|
|
self.parent_context = parent_context
|
|
self.child_context = child_context
|
|
self.header_max = header_max
|
|
|
|
def summarize(self):
|
|
query_results = self.get_query_results()
|
|
summary_str = self.query_processing(query_results)
|
|
return summary_str
|
|
|
|
def render_tree_sitter(self, lines_of_interest: list):
|
|
code = self.code
|
|
fname_rel = self.fname_rel
|
|
context = TreeContext(
|
|
fname_rel,
|
|
code,
|
|
color=False,
|
|
line_number=True, # number the lines (1-indexed)
|
|
parent_context=self.parent_context,
|
|
child_context=self.child_context,
|
|
last_line=False,
|
|
margin=0,
|
|
mark_lois=False,
|
|
loi_pad=0,
|
|
header_max=self.header_max, # max number of lines to show in a function header
|
|
show_top_of_file_parent_scope=False,
|
|
)
|
|
|
|
context.lines_of_interest = set()
|
|
context.add_lines_of_interest(lines_of_interest)
|
|
context.add_context()
|
|
res = context.format()
|
|
return res
|
|
|
|
def query_processing(self, query_results: list):
|
|
if not query_results:
|
|
return ""
|
|
|
|
output = ""
|
|
def_lines = [q['line'] for q in query_results if q['kind'] == "def"]
|
|
output += "\n"
|
|
output += query_results[0]['fname'] + ":\n"
|
|
output += self.render_tree_sitter(def_lines)
|
|
return output
|
|
|
|
def get_queries_scheme(self, lang) -> str:
|
|
# Load the relevant queries
|
|
try:
|
|
path = os.path.join(self.main_queries_path, f"tree-sitter-{lang}-tags.scm")
|
|
with open(path, "r") as f:
|
|
return f.read()
|
|
|
|
except KeyError:
|
|
return ""
|
|
|
|
def filename_to_lang(self, filename):
|
|
file_extension = os.path.splitext(filename)[1]
|
|
lang = PARSERS.get(file_extension)
|
|
return lang
|
|
|
|
def get_query_results(self):
|
|
fname_rel = self.fname_rel
|
|
code = self.code
|
|
lang = self.filename_to_lang(fname_rel)
|
|
if not lang:
|
|
return
|
|
|
|
try:
|
|
language = get_language(lang)
|
|
parser = get_parser(lang)
|
|
except Exception as err:
|
|
print(f"Skipping file {fname_rel}: {err}")
|
|
return
|
|
|
|
query_scheme_str = self.get_queries_scheme(lang)
|
|
tree = parser.parse(bytes(code, "utf-8"))
|
|
|
|
# Run the queries
|
|
query = language.query(query_scheme_str)
|
|
captures = list(query.captures(tree.root_node))
|
|
|
|
# Parse the results into a list of "def" and "ref" tags
|
|
visited_set = set()
|
|
results = []
|
|
for node, tag in captures:
|
|
if tag.startswith("name.definition."):
|
|
kind = "def"
|
|
elif tag.startswith("name.reference."):
|
|
kind = "ref"
|
|
else:
|
|
continue
|
|
|
|
visited_set.add(kind)
|
|
result = dict(
|
|
fname=fname_rel,
|
|
name=node.text.decode("utf-8"),
|
|
kind=kind,
|
|
line=node.start_point[0],
|
|
)
|
|
results.append(result)
|
|
|
|
if "ref" in visited_set:
|
|
return results
|
|
if "def" not in visited_set:
|
|
return results
|
|
|
|
## currently we are interested only in defs
|
|
# # We saw defs, without any refs
|
|
# # Some files only provide defs (cpp, for example)
|
|
# # Use pygments to backfill refs
|
|
# try:
|
|
# lexer = guess_lexer_for_filename(fname, code)
|
|
# except Exception:
|
|
# return
|
|
#
|
|
# tokens = list(lexer.get_tokens(code))
|
|
# tokens = [token[1] for token in tokens if token[0] in Token.Name]
|
|
#
|
|
# for token in tokens:
|
|
# result = dict(
|
|
# fname=fname,
|
|
# name=token,
|
|
# kind="ref",
|
|
# line=-1,
|
|
# )
|
|
# results.append(result)
|
|
return results
|