Coverage for little_loops / issue_history / regressions.py: 0%

63 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-18 16:18 -0500

1"""Issue history regression clustering analysis.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6 

7from little_loops.issue_history._utils import get_issue_content 

8from little_loops.issue_history.models import ( 

9 CompletedIssue, 

10 RegressionAnalysis, 

11 RegressionCluster, 

12) 

13from little_loops.issue_history.parsing import _extract_paths_from_issue 

14 

15 

16def analyze_regression_clustering( 

17 issues: list[CompletedIssue], 

18 contents: dict[Path, str] | None = None, 

19) -> RegressionAnalysis: 

20 """Detect files where bug fixes frequently lead to new bugs. 

21 

22 Uses heuristics: 

23 1. Temporal proximity: Bug B completed within 7 days of Bug A 

24 2. File overlap: Both bugs affect same file(s) 

25 

26 Args: 

27 issues: List of completed issues 

28 contents: Pre-loaded issue file contents (path -> content) 

29 

30 Returns: 

31 RegressionAnalysis with clusters of related regressions 

32 """ 

33 # Filter to bugs only and sort by completion date 

34 bugs = [i for i in issues if i.issue_type == "BUG" and i.completed_date] 

35 bugs.sort(key=lambda i: i.completed_date) # type: ignore 

36 

37 if len(bugs) < 2: 

38 return RegressionAnalysis() 

39 

40 # Extract file paths for each bug 

41 bug_files: dict[str, set[str]] = {} # issue_id -> set of files 

42 for bug in bugs: 

43 content = get_issue_content(bug, contents) 

44 if content is None: 

45 bug_files[bug.issue_id] = set() 

46 else: 

47 bug_files[bug.issue_id] = set(_extract_paths_from_issue(content)) 

48 

49 # Find regression pairs (temporal proximity + file overlap) 

50 regression_pairs: list[tuple[CompletedIssue, CompletedIssue, set[str]]] = [] 

51 

52 for i, bug_a in enumerate(bugs[:-1]): 

53 files_a = bug_files.get(bug_a.issue_id, set()) 

54 if not files_a: 

55 continue 

56 

57 for bug_b in bugs[i + 1 :]: 

58 # Check temporal proximity (within 7 days) 

59 days_apart = (bug_b.completed_date - bug_a.completed_date).days # type: ignore 

60 if days_apart > 7: 

61 break # Bugs are sorted, no need to check further 

62 

63 files_b = bug_files.get(bug_b.issue_id, set()) 

64 if not files_b: 

65 continue 

66 

67 # Check file overlap 

68 overlap = files_a & files_b 

69 if overlap: 

70 regression_pairs.append((bug_a, bug_b, overlap)) 

71 

72 if not regression_pairs: 

73 return RegressionAnalysis() 

74 

75 # Group by primary file (most common overlapping file) 

76 file_regressions: dict[str, list[tuple[str, str, int]]] = {} # file -> [(id_a, id_b, days)] 

77 

78 for bug_a, bug_b, overlap in regression_pairs: 

79 days = (bug_b.completed_date - bug_a.completed_date).days # type: ignore 

80 for file_path in overlap: 

81 if file_path not in file_regressions: 

82 file_regressions[file_path] = [] 

83 file_regressions[file_path].append((bug_a.issue_id, bug_b.issue_id, days)) 

84 

85 # Build clusters 

86 clusters: list[RegressionCluster] = [] 

87 

88 for file_path, pairs in file_regressions.items(): 

89 # Determine time pattern 

90 avg_days = sum(d for _, _, d in pairs) / len(pairs) 

91 if avg_days < 3: 

92 time_pattern = "immediate" 

93 elif len(pairs) >= 3: 

94 time_pattern = "chronic" 

95 else: 

96 time_pattern = "delayed" 

97 

98 # Determine severity 

99 if len(pairs) >= 4: 

100 severity = "critical" 

101 elif len(pairs) >= 2: 

102 severity = "high" 

103 else: 

104 severity = "medium" 

105 

106 # Collect related files 

107 related_files: set[str] = set() 

108 for bug_a, bug_b, _ in regression_pairs: 

109 if file_path in ( 

110 bug_files.get(bug_a.issue_id, set()) & bug_files.get(bug_b.issue_id, set()) 

111 ): 

112 related_files.update(bug_files.get(bug_a.issue_id, set())) 

113 related_files.update(bug_files.get(bug_b.issue_id, set())) 

114 related_files.discard(file_path) 

115 

116 clusters.append( 

117 RegressionCluster( 

118 primary_file=file_path, 

119 regression_count=len(pairs), 

120 fix_bug_pairs=[(a, b) for a, b, _ in pairs], 

121 related_files=sorted(related_files), 

122 time_pattern=time_pattern, 

123 severity=severity, 

124 ) 

125 ) 

126 

127 # Sort by regression count descending 

128 clusters.sort(key=lambda c: (-c.regression_count, c.primary_file)) 

129 

130 # Identify most fragile files 

131 most_fragile = [c.primary_file for c in clusters[:5]] 

132 

133 return RegressionAnalysis( 

134 clusters=clusters[:10], # Top 10 

135 total_regression_chains=len(regression_pairs), 

136 most_fragile_files=most_fragile, 

137 )