Coverage for little_loops / issue_history / regressions.py: 0%
63 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
1"""Issue history regression clustering analysis."""
3from __future__ import annotations
5from pathlib import Path
7from little_loops.issue_history._utils import get_issue_content
8from little_loops.issue_history.models import (
9 CompletedIssue,
10 RegressionAnalysis,
11 RegressionCluster,
12)
13from little_loops.issue_history.parsing import _extract_paths_from_issue
16def analyze_regression_clustering(
17 issues: list[CompletedIssue],
18 contents: dict[Path, str] | None = None,
19) -> RegressionAnalysis:
20 """Detect files where bug fixes frequently lead to new bugs.
22 Uses heuristics:
23 1. Temporal proximity: Bug B completed within 7 days of Bug A
24 2. File overlap: Both bugs affect same file(s)
26 Args:
27 issues: List of completed issues
28 contents: Pre-loaded issue file contents (path -> content)
30 Returns:
31 RegressionAnalysis with clusters of related regressions
32 """
33 # Filter to bugs only and sort by completion date
34 bugs = [i for i in issues if i.issue_type == "BUG" and i.completed_date]
35 bugs.sort(key=lambda i: i.completed_date) # type: ignore
37 if len(bugs) < 2:
38 return RegressionAnalysis()
40 # Extract file paths for each bug
41 bug_files: dict[str, set[str]] = {} # issue_id -> set of files
42 for bug in bugs:
43 content = get_issue_content(bug, contents)
44 if content is None:
45 bug_files[bug.issue_id] = set()
46 else:
47 bug_files[bug.issue_id] = set(_extract_paths_from_issue(content))
49 # Find regression pairs (temporal proximity + file overlap)
50 regression_pairs: list[tuple[CompletedIssue, CompletedIssue, set[str]]] = []
52 for i, bug_a in enumerate(bugs[:-1]):
53 files_a = bug_files.get(bug_a.issue_id, set())
54 if not files_a:
55 continue
57 for bug_b in bugs[i + 1 :]:
58 # Check temporal proximity (within 7 days)
59 days_apart = (bug_b.completed_date - bug_a.completed_date).days # type: ignore
60 if days_apart > 7:
61 break # Bugs are sorted, no need to check further
63 files_b = bug_files.get(bug_b.issue_id, set())
64 if not files_b:
65 continue
67 # Check file overlap
68 overlap = files_a & files_b
69 if overlap:
70 regression_pairs.append((bug_a, bug_b, overlap))
72 if not regression_pairs:
73 return RegressionAnalysis()
75 # Group by primary file (most common overlapping file)
76 file_regressions: dict[str, list[tuple[str, str, int]]] = {} # file -> [(id_a, id_b, days)]
78 for bug_a, bug_b, overlap in regression_pairs:
79 days = (bug_b.completed_date - bug_a.completed_date).days # type: ignore
80 for file_path in overlap:
81 if file_path not in file_regressions:
82 file_regressions[file_path] = []
83 file_regressions[file_path].append((bug_a.issue_id, bug_b.issue_id, days))
85 # Build clusters
86 clusters: list[RegressionCluster] = []
88 for file_path, pairs in file_regressions.items():
89 # Determine time pattern
90 avg_days = sum(d for _, _, d in pairs) / len(pairs)
91 if avg_days < 3:
92 time_pattern = "immediate"
93 elif len(pairs) >= 3:
94 time_pattern = "chronic"
95 else:
96 time_pattern = "delayed"
98 # Determine severity
99 if len(pairs) >= 4:
100 severity = "critical"
101 elif len(pairs) >= 2:
102 severity = "high"
103 else:
104 severity = "medium"
106 # Collect related files
107 related_files: set[str] = set()
108 for bug_a, bug_b, _ in regression_pairs:
109 if file_path in (
110 bug_files.get(bug_a.issue_id, set()) & bug_files.get(bug_b.issue_id, set())
111 ):
112 related_files.update(bug_files.get(bug_a.issue_id, set()))
113 related_files.update(bug_files.get(bug_b.issue_id, set()))
114 related_files.discard(file_path)
116 clusters.append(
117 RegressionCluster(
118 primary_file=file_path,
119 regression_count=len(pairs),
120 fix_bug_pairs=[(a, b) for a, b, _ in pairs],
121 related_files=sorted(related_files),
122 time_pattern=time_pattern,
123 severity=severity,
124 )
125 )
127 # Sort by regression count descending
128 clusters.sort(key=lambda c: (-c.regression_count, c.primary_file))
130 # Identify most fragile files
131 most_fragile = [c.primary_file for c in clusters[:5]]
133 return RegressionAnalysis(
134 clusters=clusters[:10], # Top 10
135 total_regression_chains=len(regression_pairs),
136 most_fragile_files=most_fragile,
137 )