Coverage for src / infra / epic_scope.py: 16%
68 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""Epic scope analysis for computing related commits from child issues."""
3from dataclasses import dataclass
4from pathlib import Path
6from src.core.protocols import CommandRunnerPort
9@dataclass
10class ScopedCommits:
11 """Result of scoped commit analysis."""
13 commit_shas: list[str]
14 commit_range: str | None # e.g., "abc123^..def456"
15 commit_summary: str # Formatted commit list
18class EpicScopeAnalyzer:
19 """Analyzes epic scope by computing related commits from child issues."""
21 def __init__(self, repo_path: Path, runner: CommandRunnerPort):
22 """Initialize EpicScopeAnalyzer.
24 Args:
25 repo_path: Path to the git repository.
26 runner: CommandRunner instance for executing git commands.
27 """
28 self._runner = runner
30 async def compute_scoped_commits(
31 self, child_ids: set[str], blocker_ids: set[str] | None = None
32 ) -> ScopedCommits:
33 """Compute scoped commits with range and summary.
35 Collects all commits matching bd-<issue_id>: prefix for each child
36 issue and blocker issue (remediation issues), skips merge commits,
37 and returns a ScopedCommits result with commit list, range, and summary.
39 Args:
40 child_ids: Set of child issue IDs.
41 blocker_ids: Optional set of blocker issue IDs (e.g., remediation
42 issues). Commits from these issues are also included in the
43 scope to capture work done to address epic verification failures.
45 Returns:
46 ScopedCommits containing commit SHAs, range hint, and formatted summary.
47 """
48 commit_shas = await self._compute_commit_list(child_ids, blocker_ids)
49 commit_range = await self._summarize_commit_range(commit_shas)
50 commit_summary = await self._format_commit_summary(commit_shas)
52 return ScopedCommits(
53 commit_shas=commit_shas,
54 commit_range=commit_range,
55 commit_summary=commit_summary,
56 )
58 async def _compute_commit_list(
59 self, child_ids: set[str], blocker_ids: set[str] | None = None
60 ) -> list[str]:
61 """Compute commit list from child and blocker issue commits.
63 Args:
64 child_ids: Set of child issue IDs.
65 blocker_ids: Optional set of blocker issue IDs.
67 Returns:
68 List of commit SHAs, or empty list if no commits found.
69 """
70 # Combine child IDs and blocker IDs
71 all_issue_ids = child_ids.copy()
72 if blocker_ids:
73 all_issue_ids.update(blocker_ids)
75 if not all_issue_ids:
76 return []
78 # Sort issue IDs for deterministic commit discovery order (Finding 4)
79 sorted_issue_ids = sorted(all_issue_ids)
81 # Build batched git log command with multiple --grep patterns (Finding 2)
82 # Use --fixed-strings to treat issue IDs as literals (Finding 1)
83 cmd = [
84 "git",
85 "log",
86 "--oneline",
87 "--no-merges",
88 "--fixed-strings", # Treat patterns as literal strings, not regex
89 "--format=%H",
90 ]
91 for issue_id in sorted_issue_ids:
92 cmd.append(f"--grep=bd-{issue_id}:")
94 result = await self._runner.run_async(cmd)
95 if not result.ok or not result.stdout.strip():
96 return []
98 all_commits = result.stdout.strip().split("\n")
100 # Deduplicate commits while preserving order
101 # A single commit may fix multiple child issues under the same epic
102 unique_commits = list(dict.fromkeys(all_commits))
104 return unique_commits
106 async def _summarize_commit_range(self, commits: list[str]) -> str | None:
107 """Summarize commit range from a list of commit SHAs.
109 Returns a git range hint covering all commits, or None if timestamps
110 cannot be retrieved. The agent still receives the authoritative commit
111 list even when this returns None.
113 Note: For non-linear histories, the range may include unrelated commits.
114 The authoritative commit list should be used for precise scoping.
115 """
116 if not commits:
117 return None
119 # Batch fetch timestamps for all commits in a single git command (Finding 2)
120 result = await self._runner.run_async(
121 ["git", "show", "-s", "--format=%H %ct", "--no-walk", *commits]
122 )
123 if not result.ok:
124 return None
126 timestamps: list[tuple[int, str]] = []
127 for line in result.stdout.strip().split("\n"):
128 parts = line.strip().split()
129 if len(parts) == 2 and parts[1].isdigit():
130 sha, ts = parts
131 timestamps.append((int(ts), sha))
133 # Only provide a range hint if we have timestamps for all commits.
134 # The commits list is aggregated from multiple git log calls over an
135 # unordered set of issue IDs, so we cannot assume any ordering without
136 # timestamps. When timestamps are unavailable, return None and rely on
137 # the authoritative commit list instead.
138 if len(timestamps) < len(commits):
139 return None
140 timestamps.sort(key=lambda item: item[0])
141 base = timestamps[0][1]
142 tip = timestamps[-1][1]
143 if base == tip:
144 return base
145 # Check if base has a parent (not a root commit) before using base^
146 parent_check = await self._runner.run_async(
147 ["git", "rev-parse", "--verify", f"{base}^", "--"]
148 )
149 if parent_check.ok:
150 # base has a parent, use base^..tip for inclusive range
151 return f"{base}^..{tip}"
152 else:
153 # base is a root commit; return valid range syntax only.
154 # The agent uses the authoritative commit list for precise scoping.
155 # Note: base..tip excludes base, so agent should inspect base separately.
156 return f"{base}..{tip}"
158 async def _format_commit_summary(
159 self, commits: list[str], max_commits: int = 50
160 ) -> str:
161 """Format commit list with SHA and subject for prompts/issues.
163 Args:
164 commits: List of commit SHAs to format.
165 max_commits: Maximum number of commits to include (default 50).
166 Prevents excessively large prompts/issue bodies.
168 Returns:
169 Formatted commit summary string.
170 """
171 if not commits:
172 return "No commits found."
174 truncated = len(commits) > max_commits
175 display_commits = commits[:max_commits] if truncated else commits
177 # Batch fetch commit summaries in a single git command (Finding 2)
178 result = await self._runner.run_async(
179 ["git", "show", "-s", "--format=%H %s", "--no-walk", *display_commits]
180 )
182 lines: list[str] = []
183 if result.ok and result.stdout.strip():
184 for line in result.stdout.strip().split("\n"):
185 if line.strip():
186 lines.append(f"- {line.strip()}")
187 else:
188 # Fallback: just list the SHAs
189 for commit in display_commits:
190 lines.append(f"- {commit}")
192 if truncated:
193 lines.append(f"\n[... {len(commits) - max_commits} more commits omitted]")
195 return "\n".join(lines)