Coverage for little_loops / issue_discovery / extraction.py: 0%
106 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
1"""Git history analysis and regression detection for issue discovery."""
3from __future__ import annotations
5import re
6import subprocess
7from datetime import datetime
8from pathlib import Path
9from typing import TYPE_CHECKING
11from little_loops.issue_discovery.matching import (
12 MatchClassification,
13 RegressionEvidence,
14)
16if TYPE_CHECKING:
17 from little_loops.config import BRConfig
20# =============================================================================
21# Git History Analysis
22# =============================================================================
25def _extract_fix_commit(content: str) -> str | None:
26 """Extract fix commit SHA from issue Resolution section.
28 Args:
29 content: Issue file content
31 Returns:
32 Fix commit SHA if found, None otherwise
33 """
34 # Look for "Fix Commit: <sha>" pattern in Resolution section
35 match = re.search(r"\*\*Fix Commit\*\*:\s*([a-f0-9]{7,40})", content)
36 if match:
37 return match.group(1)
38 return None
41def _extract_files_changed(content: str) -> list[str]:
42 """Extract files changed from issue Resolution section.
44 Args:
45 content: Issue file content
47 Returns:
48 List of file paths that were changed to fix the issue
49 """
50 files: list[str] = []
52 # Look for Files Changed section
53 section_match = re.search(
54 r"###\s*Files Changed\s*\n(.*?)(?=\n###|\n##|\Z)",
55 content,
56 re.DOTALL,
57 )
58 if section_match:
59 section = section_match.group(1)
60 # Extract backtick-quoted paths: `path/to/file.py`
61 for match in re.finditer(r"`([^`]+)`", section):
62 path = match.group(1).strip()
63 if path and not path.startswith("See "): # Skip placeholder text
64 files.append(path)
66 return files
69def _extract_completion_date(content: str) -> datetime | None:
70 """Extract completion/closed date from issue Resolution section.
72 Args:
73 content: Issue file content
75 Returns:
76 Completion date if found, None otherwise
77 """
78 # Look for "Completed: YYYY-MM-DD" or "Closed: YYYY-MM-DD"
79 match = re.search(r"\*\*(?:Completed|Closed)\*\*:\s*(\d{4}-\d{2}-\d{2})", content)
80 if match:
81 try:
82 return datetime.strptime(match.group(1), "%Y-%m-%d")
83 except ValueError:
84 return None
85 return None
88def _commit_exists_in_history(commit_sha: str) -> bool:
89 """Check if a commit exists in the current git history.
91 Args:
92 commit_sha: SHA of the commit to check
94 Returns:
95 True if commit exists in current history
96 """
97 result = subprocess.run(
98 ["git", "cat-file", "-t", commit_sha],
99 capture_output=True,
100 text=True,
101 )
102 return result.returncode == 0 and result.stdout.strip() == "commit"
105def _get_files_modified_since_commit(
106 since_commit: str,
107 target_files: list[str],
108) -> tuple[list[str], list[str]]:
109 """Find which target files have been modified since a given commit.
111 Uses a single batched git log call instead of per-file subprocess calls.
113 Args:
114 since_commit: SHA of the commit to check since
115 target_files: List of file paths to check
117 Returns:
118 Tuple of (modified_files, related_commits) where:
119 - modified_files: Target files that were modified after the commit
120 - related_commits: SHAs of commits that modified the target files
121 """
122 if not target_files:
123 return [], []
125 # Single batched git log call with all file paths
126 result = subprocess.run(
127 ["git", "log", "--pretty=format:%H", "--name-only", f"{since_commit}..HEAD", "--"]
128 + target_files,
129 capture_output=True,
130 text=True,
131 )
133 if result.returncode != 0 or not result.stdout.strip():
134 return [], []
136 # Parse output: blocks separated by blank lines, each block is SHA followed by file names
137 target_set = set(target_files)
138 modified_set: set[str] = set()
139 related_commits: set[str] = set()
141 for block in result.stdout.strip().split("\n\n"):
142 lines = block.strip().split("\n")
143 if not lines:
144 continue
145 commit_sha = lines[0]
146 related_commits.add(commit_sha[:8])
147 for file_name in lines[1:]:
148 file_name = file_name.strip()
149 if file_name in target_set:
150 modified_set.add(file_name)
152 # Preserve original order from target_files
153 modified_files = [f for f in target_files if f in modified_set]
154 return modified_files, list(related_commits)
157def detect_regression_or_duplicate(
158 config: BRConfig,
159 completed_issue_path: Path,
160) -> tuple[MatchClassification, RegressionEvidence]:
161 """Analyze a completed issue to classify if a match is a regression or invalid fix.
163 Classification Logic:
164 - UNVERIFIED: No fix commit tracked - can't determine
165 - INVALID_FIX: Fix commit not in history - fix was never merged/deployed
166 - REGRESSION: Files modified AFTER fix - fix worked but later changes broke it
167 - INVALID_FIX: Files NOT modified after fix - fix was applied but never worked
169 Args:
170 config: Project configuration
171 completed_issue_path: Path to the completed issue file
173 Returns:
174 Tuple of (classification, evidence) with analysis results
175 """
176 evidence = RegressionEvidence()
178 try:
179 content = completed_issue_path.read_text(encoding="utf-8")
180 except Exception:
181 return MatchClassification.UNVERIFIED, evidence
183 # Extract fix commit
184 fix_commit = _extract_fix_commit(content)
185 evidence.fix_commit_sha = fix_commit
187 if not fix_commit:
188 # No fix commit tracked - can't determine regression vs invalid fix
189 return MatchClassification.UNVERIFIED, evidence
191 # Check if fix commit exists in current history
192 if not _commit_exists_in_history(fix_commit):
193 evidence.fix_commit_exists = False
194 return MatchClassification.INVALID_FIX, evidence
196 # Extract files changed in the fix
197 files_changed = _extract_files_changed(content)
199 if not files_changed:
200 # No files tracked - can't determine
201 return MatchClassification.UNVERIFIED, evidence
203 # Check if any of those files were modified since the fix
204 modified_files, related_commits = _get_files_modified_since_commit(fix_commit, files_changed)
205 evidence.files_modified_since_fix = modified_files
206 evidence.related_commits = related_commits
208 # Calculate days since fix
209 completion_date = _extract_completion_date(content)
210 if completion_date:
211 evidence.days_since_fix = (datetime.now() - completion_date).days
213 if modified_files:
214 # Files were modified after fix - this is a regression
215 return MatchClassification.REGRESSION, evidence
216 else:
217 # Files were NOT modified after fix - the fix never actually worked
218 return MatchClassification.INVALID_FIX, evidence
221# =============================================================================
222# Issue Reopening Section Builder
223# =============================================================================
226def _build_reopen_section(
227 reason: str,
228 new_context: str,
229 source_command: str,
230 classification: MatchClassification | None = None,
231 regression_evidence: RegressionEvidence | None = None,
232) -> str:
233 """Build the reopened section for an issue.
235 Args:
236 reason: Reason for reopening
237 new_context: New context/findings
238 source_command: Command that triggered reopen
239 classification: How this issue was classified (regression, invalid_fix, etc.)
240 regression_evidence: Evidence supporting the classification
242 Returns:
243 Markdown section string
244 """
245 # Determine section header based on classification
246 if classification == MatchClassification.REGRESSION:
247 section_header = "## Regression"
248 classification_line = "- **Classification**: Regression (fix was broken by later changes)"
249 elif classification == MatchClassification.INVALID_FIX:
250 section_header = "## Reopened (Invalid Fix)"
251 classification_line = (
252 "- **Classification**: Invalid Fix (original fix never resolved the issue)"
253 )
254 else:
255 section_header = "## Reopened"
256 classification_line = ""
258 # Build evidence section if available
259 evidence_section = ""
260 if regression_evidence:
261 evidence_lines = []
262 if regression_evidence.fix_commit_sha:
263 evidence_lines.append(
264 f"- **Original Fix Commit**: {regression_evidence.fix_commit_sha}"
265 )
266 if not regression_evidence.fix_commit_exists:
267 evidence_lines.append(
268 "- **Fix Status**: Fix commit not found in history (possibly never merged)"
269 )
270 if regression_evidence.files_modified_since_fix:
271 files_list = ", ".join(
272 f"`{f}`" for f in regression_evidence.files_modified_since_fix[:5]
273 )
274 evidence_lines.append(f"- **Files Modified Since Fix**: {files_list}")
275 if regression_evidence.related_commits:
276 commits_list = ", ".join(regression_evidence.related_commits[:5])
277 evidence_lines.append(f"- **Related Commits**: {commits_list}")
278 if regression_evidence.days_since_fix > 0:
279 evidence_lines.append(f"- **Days Since Fix**: {regression_evidence.days_since_fix}")
281 if evidence_lines:
282 evidence_section = "\n### Evidence\n\n" + "\n".join(evidence_lines)
284 return f"""
286---
288{section_header}
290- **Date**: {datetime.now().strftime("%Y-%m-%d")}
291- **By**: {source_command}
292- **Reason**: {reason}
293{classification_line}
294{evidence_section}
296### New Findings
298{new_context}
299"""