Coverage for src / infra / epic_scope.py: 16%

68 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""Epic scope analysis for computing related commits from child issues.""" 

2 

3from dataclasses import dataclass 

4from pathlib import Path 

5 

6from src.core.protocols import CommandRunnerPort 

7 

8 

9@dataclass 

10class ScopedCommits: 

11 """Result of scoped commit analysis.""" 

12 

13 commit_shas: list[str] 

14 commit_range: str | None # e.g., "abc123^..def456" 

15 commit_summary: str # Formatted commit list 

16 

17 

18class EpicScopeAnalyzer: 

19 """Analyzes epic scope by computing related commits from child issues.""" 

20 

21 def __init__(self, repo_path: Path, runner: CommandRunnerPort): 

22 """Initialize EpicScopeAnalyzer. 

23 

24 Args: 

25 repo_path: Path to the git repository. 

26 runner: CommandRunner instance for executing git commands. 

27 """ 

28 self._runner = runner 

29 

30 async def compute_scoped_commits( 

31 self, child_ids: set[str], blocker_ids: set[str] | None = None 

32 ) -> ScopedCommits: 

33 """Compute scoped commits with range and summary. 

34 

35 Collects all commits matching bd-<issue_id>: prefix for each child 

36 issue and blocker issue (remediation issues), skips merge commits, 

37 and returns a ScopedCommits result with commit list, range, and summary. 

38 

39 Args: 

40 child_ids: Set of child issue IDs. 

41 blocker_ids: Optional set of blocker issue IDs (e.g., remediation 

42 issues). Commits from these issues are also included in the 

43 scope to capture work done to address epic verification failures. 

44 

45 Returns: 

46 ScopedCommits containing commit SHAs, range hint, and formatted summary. 

47 """ 

48 commit_shas = await self._compute_commit_list(child_ids, blocker_ids) 

49 commit_range = await self._summarize_commit_range(commit_shas) 

50 commit_summary = await self._format_commit_summary(commit_shas) 

51 

52 return ScopedCommits( 

53 commit_shas=commit_shas, 

54 commit_range=commit_range, 

55 commit_summary=commit_summary, 

56 ) 

57 

58 async def _compute_commit_list( 

59 self, child_ids: set[str], blocker_ids: set[str] | None = None 

60 ) -> list[str]: 

61 """Compute commit list from child and blocker issue commits. 

62 

63 Args: 

64 child_ids: Set of child issue IDs. 

65 blocker_ids: Optional set of blocker issue IDs. 

66 

67 Returns: 

68 List of commit SHAs, or empty list if no commits found. 

69 """ 

70 # Combine child IDs and blocker IDs 

71 all_issue_ids = child_ids.copy() 

72 if blocker_ids: 

73 all_issue_ids.update(blocker_ids) 

74 

75 if not all_issue_ids: 

76 return [] 

77 

78 # Sort issue IDs for deterministic commit discovery order (Finding 4) 

79 sorted_issue_ids = sorted(all_issue_ids) 

80 

81 # Build batched git log command with multiple --grep patterns (Finding 2) 

82 # Use --fixed-strings to treat issue IDs as literals (Finding 1) 

83 cmd = [ 

84 "git", 

85 "log", 

86 "--oneline", 

87 "--no-merges", 

88 "--fixed-strings", # Treat patterns as literal strings, not regex 

89 "--format=%H", 

90 ] 

91 for issue_id in sorted_issue_ids: 

92 cmd.append(f"--grep=bd-{issue_id}:") 

93 

94 result = await self._runner.run_async(cmd) 

95 if not result.ok or not result.stdout.strip(): 

96 return [] 

97 

98 all_commits = result.stdout.strip().split("\n") 

99 

100 # Deduplicate commits while preserving order 

101 # A single commit may fix multiple child issues under the same epic 

102 unique_commits = list(dict.fromkeys(all_commits)) 

103 

104 return unique_commits 

105 

106 async def _summarize_commit_range(self, commits: list[str]) -> str | None: 

107 """Summarize commit range from a list of commit SHAs. 

108 

109 Returns a git range hint covering all commits, or None if timestamps 

110 cannot be retrieved. The agent still receives the authoritative commit 

111 list even when this returns None. 

112 

113 Note: For non-linear histories, the range may include unrelated commits. 

114 The authoritative commit list should be used for precise scoping. 

115 """ 

116 if not commits: 

117 return None 

118 

119 # Batch fetch timestamps for all commits in a single git command (Finding 2) 

120 result = await self._runner.run_async( 

121 ["git", "show", "-s", "--format=%H %ct", "--no-walk", *commits] 

122 ) 

123 if not result.ok: 

124 return None 

125 

126 timestamps: list[tuple[int, str]] = [] 

127 for line in result.stdout.strip().split("\n"): 

128 parts = line.strip().split() 

129 if len(parts) == 2 and parts[1].isdigit(): 

130 sha, ts = parts 

131 timestamps.append((int(ts), sha)) 

132 

133 # Only provide a range hint if we have timestamps for all commits. 

134 # The commits list is aggregated from multiple git log calls over an 

135 # unordered set of issue IDs, so we cannot assume any ordering without 

136 # timestamps. When timestamps are unavailable, return None and rely on 

137 # the authoritative commit list instead. 

138 if len(timestamps) < len(commits): 

139 return None 

140 timestamps.sort(key=lambda item: item[0]) 

141 base = timestamps[0][1] 

142 tip = timestamps[-1][1] 

143 if base == tip: 

144 return base 

145 # Check if base has a parent (not a root commit) before using base^ 

146 parent_check = await self._runner.run_async( 

147 ["git", "rev-parse", "--verify", f"{base}^", "--"] 

148 ) 

149 if parent_check.ok: 

150 # base has a parent, use base^..tip for inclusive range 

151 return f"{base}^..{tip}" 

152 else: 

153 # base is a root commit; return valid range syntax only. 

154 # The agent uses the authoritative commit list for precise scoping. 

155 # Note: base..tip excludes base, so agent should inspect base separately. 

156 return f"{base}..{tip}" 

157 

158 async def _format_commit_summary( 

159 self, commits: list[str], max_commits: int = 50 

160 ) -> str: 

161 """Format commit list with SHA and subject for prompts/issues. 

162 

163 Args: 

164 commits: List of commit SHAs to format. 

165 max_commits: Maximum number of commits to include (default 50). 

166 Prevents excessively large prompts/issue bodies. 

167 

168 Returns: 

169 Formatted commit summary string. 

170 """ 

171 if not commits: 

172 return "No commits found." 

173 

174 truncated = len(commits) > max_commits 

175 display_commits = commits[:max_commits] if truncated else commits 

176 

177 # Batch fetch commit summaries in a single git command (Finding 2) 

178 result = await self._runner.run_async( 

179 ["git", "show", "-s", "--format=%H %s", "--no-walk", *display_commits] 

180 ) 

181 

182 lines: list[str] = [] 

183 if result.ok and result.stdout.strip(): 

184 for line in result.stdout.strip().split("\n"): 

185 if line.strip(): 

186 lines.append(f"- {line.strip()}") 

187 else: 

188 # Fallback: just list the SHAs 

189 for commit in display_commits: 

190 lines.append(f"- {commit}") 

191 

192 if truncated: 

193 lines.append(f"\n[... {len(commits) - max_commits} more commits omitted]") 

194 

195 return "\n".join(lines)