mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-12-12 02:45:18 +00:00
[GitLab] Submodule expansion functionality added (#2014)
* [GitLab] Submodule expansion functionality added Functionality to include the submodule changes to the Model input added * Enhance GitLab submodule handling * docs: document GitLab submodule diff expansion * docs: document GitLab submodule diff option
This commit is contained in:
parent
2a5a84367c
commit
411f933a34
5 changed files with 324 additions and 47 deletions
|
|
@ -8,6 +8,7 @@ It also outlines our development roadmap for the upcoming three months. Please n
|
||||||
=== "Recent Updates"
|
=== "Recent Updates"
|
||||||
| Date | Feature | Description |
|
| Date | Feature | Description |
|
||||||
|---|---|---|
|
|---|---|---|
|
||||||
|
| 2025-08-27 | **GitLab submodule diff expansion** | Optionally expand GitLab submodule updates into full diffs. ([Learn more](https://qodo-merge-docs.qodo.ai/usage-guide/additional_configurations/#expand-gitlab-submodule-diffs)) |
|
||||||
| 2025-08-11 | **RAG support for GitLab** | All Qodo Merge RAG features are now available in GitLab. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)) |
|
| 2025-08-11 | **RAG support for GitLab** | All Qodo Merge RAG features are now available in GitLab. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)) |
|
||||||
| 2025-07-29 | **High-level Suggestions** | Qodo Merge now also provides high-level code suggestion for PRs. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/high_level_suggestions/)) |
|
| 2025-07-29 | **High-level Suggestions** | Qodo Merge now also provides high-level code suggestion for PRs. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/high_level_suggestions/)) |
|
||||||
| 2025-07-20 | **PR to Ticket** | Generate tickets in your tracking systems based on PR content. ([Learn more](https://qodo-merge-docs.qodo.ai/tools/pr_to_ticket/)) |
|
| 2025-07-20 | **PR to Ticket** | Generate tickets in your tracking systems based on PR content. ([Learn more](https://qodo-merge-docs.qodo.ai/tools/pr_to_ticket/)) |
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,17 @@ This will set the response language globally for all the commands to Italian.
|
||||||
[//]: # (which divides the PR into chunks, and processes each chunk separately. With this mode, regardless of the model, no compression will be done (but for large PRs, multiple model calls may occur))
|
[//]: # (which divides the PR into chunks, and processes each chunk separately. With this mode, regardless of the model, no compression will be done (but for large PRs, multiple model calls may occur))
|
||||||
|
|
||||||
|
|
||||||
|
## Expand GitLab submodule diffs
|
||||||
|
|
||||||
|
By default, GitLab merge requests show submodule updates as `Subproject commit` lines. To include the actual file-level changes from those submodules in Qodo Merge analysis, enable:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[gitlab]
|
||||||
|
expand_submodule_diffs = true
|
||||||
|
```
|
||||||
|
|
||||||
|
When enabled, Qodo Merge will fetch and attach diffs from the submodule repositories. The default is `false` to avoid extra GitLab API calls.
|
||||||
|
|
||||||
## Log Level
|
## Log Level
|
||||||
|
|
||||||
Qodo Merge allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows.
|
Qodo Merge allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows.
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
import difflib
|
import difflib
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
from typing import Optional, Tuple, Any, Union
|
import urllib.parse
|
||||||
from urllib.parse import urlparse, parse_qs
|
from typing import Any, Optional, Tuple, Union
|
||||||
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
import gitlab
|
import gitlab
|
||||||
import requests
|
import requests
|
||||||
from gitlab import GitlabGetError, GitlabAuthenticationError, GitlabCreateError, GitlabUpdateError
|
from gitlab import (GitlabAuthenticationError, GitlabCreateError,
|
||||||
|
GitlabGetError, GitlabUpdateError)
|
||||||
|
|
||||||
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
|
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
|
||||||
|
|
||||||
|
|
@ -67,12 +69,221 @@ class GitLabProvider(GitProvider):
|
||||||
self.diff_files = None
|
self.diff_files = None
|
||||||
self.git_files = None
|
self.git_files = None
|
||||||
self.temp_comments = []
|
self.temp_comments = []
|
||||||
|
self._submodule_cache: dict[tuple[str, str, str], list[dict]] = {}
|
||||||
self.pr_url = merge_request_url
|
self.pr_url = merge_request_url
|
||||||
self._set_merge_request(merge_request_url)
|
self._set_merge_request(merge_request_url)
|
||||||
self.RE_HUNK_HEADER = re.compile(
|
self.RE_HUNK_HEADER = re.compile(
|
||||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||||
self.incremental = incremental
|
self.incremental = incremental
|
||||||
|
|
||||||
|
# --- submodule expansion helpers (opt-in) ---
|
||||||
|
def _get_gitmodules_map(self) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Return {submodule_path -> repo_url} from '.gitmodules' (best effort).
|
||||||
|
Tries target branch first, then source branch. Always returns text.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
proj = self.gl.projects.get(self.id_project)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
import base64
|
||||||
|
|
||||||
|
def _read_text(ref: str | None) -> str | None:
|
||||||
|
if not ref:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
f = proj.files.get(file_path=".gitmodules", ref=ref)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 1) python-gitlab File.decode() – usually returns BYTES
|
||||||
|
try:
|
||||||
|
raw = f.decode()
|
||||||
|
if isinstance(raw, (bytes, bytearray)):
|
||||||
|
return raw.decode("utf-8", "ignore")
|
||||||
|
if isinstance(raw, str):
|
||||||
|
return raw
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2) fallback: base64 decode f.content
|
||||||
|
try:
|
||||||
|
c = getattr(f, "content", None)
|
||||||
|
if c:
|
||||||
|
return base64.b64decode(c).decode("utf-8", "ignore")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
content = (
|
||||||
|
_read_text(getattr(self.mr, "target_branch", None))
|
||||||
|
or _read_text(getattr(self.mr, "source_branch", None))
|
||||||
|
)
|
||||||
|
if not content:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
|
||||||
|
parser = configparser.ConfigParser(
|
||||||
|
delimiters=("=",),
|
||||||
|
interpolation=None,
|
||||||
|
inline_comment_prefixes=("#", ";"),
|
||||||
|
strict=False,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
parser.read_string(content)
|
||||||
|
except Exception:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
out: dict[str, str] = {}
|
||||||
|
for section in parser.sections():
|
||||||
|
if not section.lower().startswith("submodule"):
|
||||||
|
continue
|
||||||
|
path = parser.get(section, "path", fallback=None)
|
||||||
|
url = parser.get(section, "url", fallback=None)
|
||||||
|
if path and url:
|
||||||
|
path = path.strip().strip('"').strip("'")
|
||||||
|
url = url.strip().strip('"').strip("'")
|
||||||
|
out[path] = url
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _url_to_project_path(self, url: str) -> str | None:
|
||||||
|
"""
|
||||||
|
Convert ssh/https GitLab URL to 'group/subgroup/repo' project path.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if url.startswith("git@") and ":" in url:
|
||||||
|
path = url.split(":", 1)[1]
|
||||||
|
else:
|
||||||
|
path = urllib.parse.urlparse(url).path.lstrip("/")
|
||||||
|
if path.endswith(".git"):
|
||||||
|
path = path[:-4]
|
||||||
|
return path or None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _project_by_path(self, proj_path: str):
|
||||||
|
"""
|
||||||
|
Resolve a project by path with multiple strategies:
|
||||||
|
1) URL-encoded path_with_namespace
|
||||||
|
2) Raw path_with_namespace
|
||||||
|
3) Search fallback + exact match on path_with_namespace (case-insensitive)
|
||||||
|
Returns a project object or None.
|
||||||
|
"""
|
||||||
|
if not proj_path:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 1) Encoded
|
||||||
|
try:
|
||||||
|
enc = urllib.parse.quote_plus(proj_path)
|
||||||
|
return self.gl.projects.get(enc)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2) Raw
|
||||||
|
try:
|
||||||
|
return self.gl.projects.get(proj_path)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 3) Search fallback
|
||||||
|
try:
|
||||||
|
name = proj_path.split("/")[-1]
|
||||||
|
# membership=True so we don't leak other people's repos
|
||||||
|
matches = self.gl.projects.list(search=name, simple=True, membership=True, per_page=100)
|
||||||
|
# prefer exact path_with_namespace match (case-insensitive)
|
||||||
|
for p in matches:
|
||||||
|
pwn = getattr(p, "path_with_namespace", "")
|
||||||
|
if pwn.lower() == proj_path.lower():
|
||||||
|
return self.gl.projects.get(p.id)
|
||||||
|
if matches:
|
||||||
|
get_logger().warning(f"[submodule] no exact match for {proj_path} (skip)")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _compare_submodule(self, proj_path: str, old_sha: str, new_sha: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Call repository_compare on submodule project; return list of diffs.
|
||||||
|
"""
|
||||||
|
key = (proj_path, old_sha, new_sha)
|
||||||
|
if key in self._submodule_cache:
|
||||||
|
return self._submodule_cache[key]
|
||||||
|
try:
|
||||||
|
proj = self._project_by_path(proj_path)
|
||||||
|
if proj is None:
|
||||||
|
get_logger().warning(f"[submodule] resolve failed for {proj_path}")
|
||||||
|
self._submodule_cache[key] = []
|
||||||
|
return []
|
||||||
|
cmp = proj.repository_compare(old_sha, new_sha)
|
||||||
|
if isinstance(cmp, dict):
|
||||||
|
diffs = cmp.get("diffs", []) or []
|
||||||
|
else:
|
||||||
|
diffs = []
|
||||||
|
self._submodule_cache[key] = diffs
|
||||||
|
return diffs
|
||||||
|
except Exception as e:
|
||||||
|
get_logger().warning(f"[submodule] compare failed for {proj_path} {old_sha}..{new_sha}: {e}")
|
||||||
|
self._submodule_cache[key] = []
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _expand_submodule_changes(self, changes: list[dict]) -> list[dict]:
|
||||||
|
"""
|
||||||
|
If enabled, expand 'Subproject commit' bumps into real file diffs from the submodule.
|
||||||
|
Soft-fail on any issue.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not bool(get_settings().get("GITLAB.EXPAND_SUBMODULE_DIFFS", False)):
|
||||||
|
return changes
|
||||||
|
except Exception:
|
||||||
|
return changes
|
||||||
|
|
||||||
|
gitmodules = self._get_gitmodules_map()
|
||||||
|
if not gitmodules:
|
||||||
|
return changes
|
||||||
|
|
||||||
|
out = list(changes)
|
||||||
|
for ch in changes:
|
||||||
|
patch = ch.get("diff") or ""
|
||||||
|
if "Subproject commit" not in patch:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract old/new SHAs from the hunk
|
||||||
|
old_m = re.search(r"^-Subproject commit ([0-9a-f]{7,40})", patch, re.M)
|
||||||
|
new_m = re.search(r"^\+Subproject commit ([0-9a-f]{7,40})", patch, re.M)
|
||||||
|
if not (old_m and new_m):
|
||||||
|
continue
|
||||||
|
old_sha, new_sha = old_m.group(1), new_m.group(1)
|
||||||
|
|
||||||
|
sub_path = ch.get("new_path") or ch.get("old_path") or ""
|
||||||
|
repo_url = gitmodules.get(sub_path)
|
||||||
|
if not repo_url:
|
||||||
|
get_logger().warning(f"[submodule] no url for '{sub_path}' in .gitmodules (skip)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
proj_path = self._url_to_project_path(repo_url)
|
||||||
|
if not proj_path:
|
||||||
|
get_logger().warning(f"[submodule] cannot parse project path from url '{repo_url}' (skip)")
|
||||||
|
continue
|
||||||
|
|
||||||
|
get_logger().info(f"[submodule] {sub_path} url={repo_url} -> proj_path={proj_path}")
|
||||||
|
sub_diffs = self._compare_submodule(proj_path, old_sha, new_sha)
|
||||||
|
for sd in sub_diffs:
|
||||||
|
sd_diff = sd.get("diff") or ""
|
||||||
|
sd_old = sd.get("old_path") or sd.get("a_path") or ""
|
||||||
|
sd_new = sd.get("new_path") or sd.get("b_path") or sd_old
|
||||||
|
out.append({
|
||||||
|
"old_path": f"{sub_path}/{sd_old}" if sd_old else sub_path,
|
||||||
|
"new_path": f"{sub_path}/{sd_new}" if sd_new else sub_path,
|
||||||
|
"diff": sd_diff,
|
||||||
|
"new_file": sd.get("new_file", False),
|
||||||
|
"deleted_file": sd.get("deleted_file", False),
|
||||||
|
"renamed_file": sd.get("renamed_file", False),
|
||||||
|
})
|
||||||
|
return out
|
||||||
|
|
||||||
def is_supported(self, capability: str) -> bool:
|
def is_supported(self, capability: str) -> bool:
|
||||||
if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments',
|
if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments',
|
||||||
|
|
@ -194,7 +405,9 @@ class GitLabProvider(GitProvider):
|
||||||
return self.diff_files
|
return self.diff_files
|
||||||
|
|
||||||
# filter files using [ignore] patterns
|
# filter files using [ignore] patterns
|
||||||
diffs_original = self.mr.changes()['changes']
|
raw_changes = self.mr.changes().get('changes', [])
|
||||||
|
raw_changes = self._expand_submodule_changes(raw_changes)
|
||||||
|
diffs_original = raw_changes
|
||||||
diffs = filter_ignored(diffs_original, 'gitlab')
|
diffs = filter_ignored(diffs_original, 'gitlab')
|
||||||
if diffs != diffs_original:
|
if diffs != diffs_original:
|
||||||
try:
|
try:
|
||||||
|
|
@ -264,7 +477,9 @@ class GitLabProvider(GitProvider):
|
||||||
|
|
||||||
def get_files(self) -> list:
|
def get_files(self) -> list:
|
||||||
if not self.git_files:
|
if not self.git_files:
|
||||||
self.git_files = [change['new_path'] for change in self.mr.changes()['changes']]
|
raw_changes = self.mr.changes().get('changes', [])
|
||||||
|
raw_changes = self._expand_submodule_changes(raw_changes)
|
||||||
|
self.git_files = [c.get('new_path') for c in raw_changes if c.get('new_path')]
|
||||||
return self.git_files
|
return self.git_files
|
||||||
|
|
||||||
def publish_description(self, pr_title: str, pr_body: str):
|
def publish_description(self, pr_title: str, pr_body: str):
|
||||||
|
|
@ -420,7 +635,9 @@ class GitLabProvider(GitProvider):
|
||||||
get_logger().exception(f"Failed to create comment in MR {self.id_mr}")
|
get_logger().exception(f"Failed to create comment in MR {self.id_mr}")
|
||||||
|
|
||||||
def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]:
|
def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]:
|
||||||
changes = self.mr.changes() # Retrieve the changes for the merge request once
|
_changes = self.mr.changes() # dict
|
||||||
|
_changes['changes'] = self._expand_submodule_changes(_changes.get('changes', []))
|
||||||
|
changes = _changes
|
||||||
if not changes:
|
if not changes:
|
||||||
get_logger().error('No changes found for the merge request.')
|
get_logger().error('No changes found for the merge request.')
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -276,6 +276,7 @@ push_commands = [
|
||||||
|
|
||||||
[gitlab]
|
[gitlab]
|
||||||
url = "https://gitlab.com"
|
url = "https://gitlab.com"
|
||||||
|
expand_submodule_diffs = false
|
||||||
pr_commands = [
|
pr_commands = [
|
||||||
"/describe --pr_description.final_update_message=false",
|
"/describe --pr_description.final_update_message=false",
|
||||||
"/review",
|
"/review",
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
import pytest
|
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from pr_agent.git_providers.gitlab_provider import GitLabProvider
|
import pytest
|
||||||
from gitlab import Gitlab
|
from gitlab import Gitlab
|
||||||
from gitlab.v4.objects import Project, ProjectFile
|
|
||||||
from gitlab.exceptions import GitlabGetError
|
from gitlab.exceptions import GitlabGetError
|
||||||
|
from gitlab.v4.objects import Project, ProjectFile
|
||||||
|
|
||||||
|
from pr_agent.git_providers.gitlab_provider import GitLabProvider
|
||||||
|
|
||||||
|
|
||||||
class TestGitLabProvider:
|
class TestGitLabProvider:
|
||||||
|
|
@ -145,3 +146,49 @@ class TestGitLabProvider:
|
||||||
result = gitlab_provider.get_pr_file_content("test.md", "main")
|
result = gitlab_provider.get_pr_file_content("test.md", "main")
|
||||||
|
|
||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
def test_get_gitmodules_map_parsing(self, gitlab_provider, mock_project):
|
||||||
|
gitlab_provider.id_project = "1"
|
||||||
|
gitlab_provider.mr = MagicMock()
|
||||||
|
gitlab_provider.mr.target_branch = "main"
|
||||||
|
|
||||||
|
file_obj = MagicMock(ProjectFile)
|
||||||
|
file_obj.decode.return_value = (
|
||||||
|
"[submodule \"libs/a\"]\n"
|
||||||
|
" path = \"libs/a\"\n"
|
||||||
|
" url = \"https://gitlab.com/a.git\"\n"
|
||||||
|
"[submodule \"libs/b\"]\n"
|
||||||
|
" path = libs/b\n"
|
||||||
|
" url = git@gitlab.com:b.git\n"
|
||||||
|
)
|
||||||
|
mock_project.files.get.return_value = file_obj
|
||||||
|
gitlab_provider.gl.projects.get.return_value = mock_project
|
||||||
|
|
||||||
|
result = gitlab_provider._get_gitmodules_map()
|
||||||
|
assert result == {
|
||||||
|
"libs/a": "https://gitlab.com/a.git",
|
||||||
|
"libs/b": "git@gitlab.com:b.git",
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_project_by_path_requires_exact_match(self, gitlab_provider):
|
||||||
|
gitlab_provider.gl.projects.get.reset_mock()
|
||||||
|
gitlab_provider.gl.projects.get.side_effect = Exception("not found")
|
||||||
|
fake = MagicMock()
|
||||||
|
fake.path_with_namespace = "other/group/repo"
|
||||||
|
gitlab_provider.gl.projects.list.return_value = [fake]
|
||||||
|
|
||||||
|
result = gitlab_provider._project_by_path("group/repo")
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
assert gitlab_provider.gl.projects.get.call_count == 2
|
||||||
|
|
||||||
|
def test_compare_submodule_cached(self, gitlab_provider):
|
||||||
|
proj = MagicMock()
|
||||||
|
proj.repository_compare.return_value = {"diffs": [{"diff": "d"}]}
|
||||||
|
with patch.object(gitlab_provider, "_project_by_path", return_value=proj) as m_pbp:
|
||||||
|
first = gitlab_provider._compare_submodule("grp/repo", "old", "new")
|
||||||
|
second = gitlab_provider._compare_submodule("grp/repo", "old", "new")
|
||||||
|
|
||||||
|
assert first == second == [{"diff": "d"}]
|
||||||
|
m_pbp.assert_called_once_with("grp/repo")
|
||||||
|
proj.repository_compare.assert_called_once_with("old", "new")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue