diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 35e3111b..72a0624c 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -772,7 +772,8 @@ def try_fix_yaml(response_text: str, response_text_original="") -> dict: response_text_lines = response_text.split('\n') - keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:', 'label:'] + keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', + 'improved code:', 'label:', 'why:', 'suggestion_summary:'] keys_yaml = keys_yaml + keys_fix_yaml # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n ...' @@ -847,12 +848,13 @@ def try_fix_yaml(response_text: str, if index_end == -1: index_end = len(response_text) response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip() - try: - data = yaml.safe_load(response_text_copy) - get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") - return data - except: - pass + if response_text_copy: + try: + data = yaml.safe_load(response_text_copy) + get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet") + return data + except: + pass # fifth fallback - try to remove leading '+' (sometimes added by AI for 'existing code' and 'improved code') response_text_lines_copy = response_text_lines.copy() @@ -881,14 +883,18 @@ def try_fix_yaml(response_text: str, response_text_copy = copy.deepcopy(response_text) response_text_copy_lines = response_text_copy.split('\n') start_line = -1 + improve_sections = ['existing_code:', 'improved_code:', 'response:', 'why:'] + describe_sections = ['description:', 'title:', 'changes_diagram:', 'pr_files:', 'pr_ticket:'] for i, line in enumerate(response_text_copy_lines): - if 'existing_code:' in line or 'improved_code:' in line: + line_stripped = line.rstrip() + if any(key in line_stripped for key in (improve_sections+describe_sections)): start_line = i - elif line.endswith(': |') or line.endswith(': |-') or line.endswith(': |2') or line.endswith(':'): + elif line_stripped.endswith(': |') or line_stripped.endswith(': |-') or line_stripped.endswith(': |2') or any(line_stripped.endswith(key) for key in keys_yaml): start_line = -1 elif start_line != -1: response_text_copy_lines[i] = ' ' + line response_text_copy = '\n'.join(response_text_copy_lines) + response_text_copy = response_text_copy.replace(' |\n', ' |2\n') try: data = yaml.safe_load(response_text_copy) get_logger().info(f"Successfully parsed AI prediction after adding indent for sections of code blocks") @@ -896,6 +902,27 @@ def try_fix_yaml(response_text: str, except: pass + # eighth fallback - try to remove pipe chars at the root-level dicts + response_text_copy = copy.deepcopy(response_text) + response_text_copy = response_text_copy.lstrip('|\n') + try: + data = yaml.safe_load(response_text_copy) + get_logger().info(f"Successfully parsed AI prediction after removing pipe chars") + return data + except: + pass + + # ninth fallback - try to decode the response text with different encodings. GPT-5 can return text that is not utf-8 encoded. + encodings_to_try = ['latin-1', 'utf-16'] + for encoding in encodings_to_try: + try: + data = yaml.safe_load(response_text.encode(encoding).decode("utf-8")) + if data: + get_logger().info(f"Successfully parsed AI prediction after decoding with {encoding} encoding") + return data + except: + pass + # # sixth fallback - try to remove last lines # for i in range(1, len(response_text_lines)): # response_text_lines_tmp = '\n'.join(response_text_lines[:-i]) diff --git a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml index a6763023..36b4d0dc 100644 --- a/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml @@ -63,10 +63,15 @@ Specific guidelines for generating code suggestions: - Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types. {%- else %} - Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list. -- Do not suggest to change packages version, add missing import statement, or declare undefined variable. +- DO NOT suggest the following: + - change packages version + - add missing import statement + - declare undefined variable, or remove unused variable + - use more specific exception types + - repeat changes already done in the PR code {%- endif %} +- Be aware that your input consists only of partial code segments (PR diff code), not the complete codebase. Therefore, avoid making suggestions that might duplicate existing functionality, and refrain from questioning code elements (such as variable declarations or import statements) that may be defined elsewhere in the codebase. - When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..." -- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase. {%- if extra_instructions %} diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index ba51e1ec..2627401e 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -45,7 +45,7 @@ class FileDescription(BaseModel): class PRDescription(BaseModel): type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')") - description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.") + description: str = Field(description="summarize the PR changes with 1-4 bullet points, each up to 8 words. For large PRs, add sub-bullets for each bullet if needed. Order bullets by importance, with each bullet highlighting a key change group.") title: str = Field(description="a concise and descriptive title that captures the PR's main theme") {%- if enable_pr_diagram %} changes_diagram: str = Field(description='a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID["node description"]. (2) Then define the links: nodeID1 -- "link text" --> nodeID2. Node description must always be surrounded with double quotation marks') @@ -63,7 +63,8 @@ type: - ... - ... description: | - ... + - ... + - ... title: | ... {%- if enable_pr_diagram %} @@ -151,7 +152,8 @@ type: - Refactoring - ... description: | - ... + - ... + - ... title: | ... {%- if enable_pr_diagram %} diff --git a/tests/unittest/test_try_fix_yaml.py b/tests/unittest/test_try_fix_yaml.py index 98773c81..03823d7b 100644 --- a/tests/unittest/test_try_fix_yaml.py +++ b/tests/unittest/test_try_fix_yaml.py @@ -242,12 +242,5 @@ int sub(int a, int b) { return a - b; } ''' - expected_output = { - "code_suggestions": [ - { - "relevant_file": "a.c\n", - "existing_code": expected_code_block - } - ] - } + expected_output = {'code_suggestions': [{'relevant_file': 'a.c\n', 'existing_code': ' int sum(int a, int b) {\n return a + b;\n }\n\n int sub(int a, int b) {\n return a - b;\n }\n'}]} assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output