Coverage for little_loops / issue_discovery / extraction.py: 0%

106 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-18 16:18 -0500

1"""Git history analysis and regression detection for issue discovery.""" 

2 

3from __future__ import annotations 

4 

5import re 

6import subprocess 

7from datetime import datetime 

8from pathlib import Path 

9from typing import TYPE_CHECKING 

10 

11from little_loops.issue_discovery.matching import ( 

12 MatchClassification, 

13 RegressionEvidence, 

14) 

15 

16if TYPE_CHECKING: 

17 from little_loops.config import BRConfig 

18 

19 

20# ============================================================================= 

21# Git History Analysis 

22# ============================================================================= 

23 

24 

25def _extract_fix_commit(content: str) -> str | None: 

26 """Extract fix commit SHA from issue Resolution section. 

27 

28 Args: 

29 content: Issue file content 

30 

31 Returns: 

32 Fix commit SHA if found, None otherwise 

33 """ 

34 # Look for "Fix Commit: <sha>" pattern in Resolution section 

35 match = re.search(r"\*\*Fix Commit\*\*:\s*([a-f0-9]{7,40})", content) 

36 if match: 

37 return match.group(1) 

38 return None 

39 

40 

41def _extract_files_changed(content: str) -> list[str]: 

42 """Extract files changed from issue Resolution section. 

43 

44 Args: 

45 content: Issue file content 

46 

47 Returns: 

48 List of file paths that were changed to fix the issue 

49 """ 

50 files: list[str] = [] 

51 

52 # Look for Files Changed section 

53 section_match = re.search( 

54 r"###\s*Files Changed\s*\n(.*?)(?=\n###|\n##|\Z)", 

55 content, 

56 re.DOTALL, 

57 ) 

58 if section_match: 

59 section = section_match.group(1) 

60 # Extract backtick-quoted paths: `path/to/file.py` 

61 for match in re.finditer(r"`([^`]+)`", section): 

62 path = match.group(1).strip() 

63 if path and not path.startswith("See "): # Skip placeholder text 

64 files.append(path) 

65 

66 return files 

67 

68 

69def _extract_completion_date(content: str) -> datetime | None: 

70 """Extract completion/closed date from issue Resolution section. 

71 

72 Args: 

73 content: Issue file content 

74 

75 Returns: 

76 Completion date if found, None otherwise 

77 """ 

78 # Look for "Completed: YYYY-MM-DD" or "Closed: YYYY-MM-DD" 

79 match = re.search(r"\*\*(?:Completed|Closed)\*\*:\s*(\d{4}-\d{2}-\d{2})", content) 

80 if match: 

81 try: 

82 return datetime.strptime(match.group(1), "%Y-%m-%d") 

83 except ValueError: 

84 return None 

85 return None 

86 

87 

88def _commit_exists_in_history(commit_sha: str) -> bool: 

89 """Check if a commit exists in the current git history. 

90 

91 Args: 

92 commit_sha: SHA of the commit to check 

93 

94 Returns: 

95 True if commit exists in current history 

96 """ 

97 result = subprocess.run( 

98 ["git", "cat-file", "-t", commit_sha], 

99 capture_output=True, 

100 text=True, 

101 ) 

102 return result.returncode == 0 and result.stdout.strip() == "commit" 

103 

104 

105def _get_files_modified_since_commit( 

106 since_commit: str, 

107 target_files: list[str], 

108) -> tuple[list[str], list[str]]: 

109 """Find which target files have been modified since a given commit. 

110 

111 Uses a single batched git log call instead of per-file subprocess calls. 

112 

113 Args: 

114 since_commit: SHA of the commit to check since 

115 target_files: List of file paths to check 

116 

117 Returns: 

118 Tuple of (modified_files, related_commits) where: 

119 - modified_files: Target files that were modified after the commit 

120 - related_commits: SHAs of commits that modified the target files 

121 """ 

122 if not target_files: 

123 return [], [] 

124 

125 # Single batched git log call with all file paths 

126 result = subprocess.run( 

127 ["git", "log", "--pretty=format:%H", "--name-only", f"{since_commit}..HEAD", "--"] 

128 + target_files, 

129 capture_output=True, 

130 text=True, 

131 ) 

132 

133 if result.returncode != 0 or not result.stdout.strip(): 

134 return [], [] 

135 

136 # Parse output: blocks separated by blank lines, each block is SHA followed by file names 

137 target_set = set(target_files) 

138 modified_set: set[str] = set() 

139 related_commits: set[str] = set() 

140 

141 for block in result.stdout.strip().split("\n\n"): 

142 lines = block.strip().split("\n") 

143 if not lines: 

144 continue 

145 commit_sha = lines[0] 

146 related_commits.add(commit_sha[:8]) 

147 for file_name in lines[1:]: 

148 file_name = file_name.strip() 

149 if file_name in target_set: 

150 modified_set.add(file_name) 

151 

152 # Preserve original order from target_files 

153 modified_files = [f for f in target_files if f in modified_set] 

154 return modified_files, list(related_commits) 

155 

156 

157def detect_regression_or_duplicate( 

158 config: BRConfig, 

159 completed_issue_path: Path, 

160) -> tuple[MatchClassification, RegressionEvidence]: 

161 """Analyze a completed issue to classify if a match is a regression or invalid fix. 

162 

163 Classification Logic: 

164 - UNVERIFIED: No fix commit tracked - can't determine 

165 - INVALID_FIX: Fix commit not in history - fix was never merged/deployed 

166 - REGRESSION: Files modified AFTER fix - fix worked but later changes broke it 

167 - INVALID_FIX: Files NOT modified after fix - fix was applied but never worked 

168 

169 Args: 

170 config: Project configuration 

171 completed_issue_path: Path to the completed issue file 

172 

173 Returns: 

174 Tuple of (classification, evidence) with analysis results 

175 """ 

176 evidence = RegressionEvidence() 

177 

178 try: 

179 content = completed_issue_path.read_text(encoding="utf-8") 

180 except Exception: 

181 return MatchClassification.UNVERIFIED, evidence 

182 

183 # Extract fix commit 

184 fix_commit = _extract_fix_commit(content) 

185 evidence.fix_commit_sha = fix_commit 

186 

187 if not fix_commit: 

188 # No fix commit tracked - can't determine regression vs invalid fix 

189 return MatchClassification.UNVERIFIED, evidence 

190 

191 # Check if fix commit exists in current history 

192 if not _commit_exists_in_history(fix_commit): 

193 evidence.fix_commit_exists = False 

194 return MatchClassification.INVALID_FIX, evidence 

195 

196 # Extract files changed in the fix 

197 files_changed = _extract_files_changed(content) 

198 

199 if not files_changed: 

200 # No files tracked - can't determine 

201 return MatchClassification.UNVERIFIED, evidence 

202 

203 # Check if any of those files were modified since the fix 

204 modified_files, related_commits = _get_files_modified_since_commit(fix_commit, files_changed) 

205 evidence.files_modified_since_fix = modified_files 

206 evidence.related_commits = related_commits 

207 

208 # Calculate days since fix 

209 completion_date = _extract_completion_date(content) 

210 if completion_date: 

211 evidence.days_since_fix = (datetime.now() - completion_date).days 

212 

213 if modified_files: 

214 # Files were modified after fix - this is a regression 

215 return MatchClassification.REGRESSION, evidence 

216 else: 

217 # Files were NOT modified after fix - the fix never actually worked 

218 return MatchClassification.INVALID_FIX, evidence 

219 

220 

221# ============================================================================= 

222# Issue Reopening Section Builder 

223# ============================================================================= 

224 

225 

226def _build_reopen_section( 

227 reason: str, 

228 new_context: str, 

229 source_command: str, 

230 classification: MatchClassification | None = None, 

231 regression_evidence: RegressionEvidence | None = None, 

232) -> str: 

233 """Build the reopened section for an issue. 

234 

235 Args: 

236 reason: Reason for reopening 

237 new_context: New context/findings 

238 source_command: Command that triggered reopen 

239 classification: How this issue was classified (regression, invalid_fix, etc.) 

240 regression_evidence: Evidence supporting the classification 

241 

242 Returns: 

243 Markdown section string 

244 """ 

245 # Determine section header based on classification 

246 if classification == MatchClassification.REGRESSION: 

247 section_header = "## Regression" 

248 classification_line = "- **Classification**: Regression (fix was broken by later changes)" 

249 elif classification == MatchClassification.INVALID_FIX: 

250 section_header = "## Reopened (Invalid Fix)" 

251 classification_line = ( 

252 "- **Classification**: Invalid Fix (original fix never resolved the issue)" 

253 ) 

254 else: 

255 section_header = "## Reopened" 

256 classification_line = "" 

257 

258 # Build evidence section if available 

259 evidence_section = "" 

260 if regression_evidence: 

261 evidence_lines = [] 

262 if regression_evidence.fix_commit_sha: 

263 evidence_lines.append( 

264 f"- **Original Fix Commit**: {regression_evidence.fix_commit_sha}" 

265 ) 

266 if not regression_evidence.fix_commit_exists: 

267 evidence_lines.append( 

268 "- **Fix Status**: Fix commit not found in history (possibly never merged)" 

269 ) 

270 if regression_evidence.files_modified_since_fix: 

271 files_list = ", ".join( 

272 f"`{f}`" for f in regression_evidence.files_modified_since_fix[:5] 

273 ) 

274 evidence_lines.append(f"- **Files Modified Since Fix**: {files_list}") 

275 if regression_evidence.related_commits: 

276 commits_list = ", ".join(regression_evidence.related_commits[:5]) 

277 evidence_lines.append(f"- **Related Commits**: {commits_list}") 

278 if regression_evidence.days_since_fix > 0: 

279 evidence_lines.append(f"- **Days Since Fix**: {regression_evidence.days_since_fix}") 

280 

281 if evidence_lines: 

282 evidence_section = "\n### Evidence\n\n" + "\n".join(evidence_lines) 

283 

284 return f""" 

285 

286--- 

287 

288{section_header} 

289 

290- **Date**: {datetime.now().strftime("%Y-%m-%d")} 

291- **By**: {source_command} 

292- **Reason**: {reason} 

293{classification_line} 

294{evidence_section} 

295 

296### New Findings 

297 

298{new_context} 

299"""