Coverage for little_loops / issue_history / coupling.py: 0%

69 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-18 16:18 -0500

1"""Issue history file coupling analysis.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6 

7from little_loops.issue_history._utils import get_issue_content 

8from little_loops.issue_history.models import ( 

9 CompletedIssue, 

10 CouplingAnalysis, 

11 CouplingPair, 

12) 

13from little_loops.issue_history.parsing import _extract_paths_from_issue 

14 

15 

16def analyze_coupling( 

17 issues: list[CompletedIssue], 

18 contents: dict[Path, str] | None = None, 

19) -> CouplingAnalysis: 

20 """Identify files that frequently change together across issues. 

21 

22 Uses Jaccard similarity to calculate coupling strength between file pairs. 

23 Files with coupling strength >= 0.3 and at least 2 co-occurrences are included. 

24 

25 Args: 

26 issues: List of completed issues 

27 contents: Pre-loaded issue file contents (path -> content) 

28 

29 Returns: 

30 CouplingAnalysis with coupled pairs, clusters, and hotspots 

31 """ 

32 # Build file -> set of issue IDs mapping 

33 file_to_issues: dict[str, set[str]] = {} 

34 

35 for issue in issues: 

36 content = get_issue_content(issue, contents) 

37 if content is None: 

38 continue 

39 

40 paths = _extract_paths_from_issue(content) 

41 for path in paths: 

42 if path not in file_to_issues: 

43 file_to_issues[path] = set() 

44 file_to_issues[path].add(issue.issue_id) 

45 

46 # Calculate pairwise coupling 

47 files = list(file_to_issues.keys()) 

48 pairs: list[CouplingPair] = [] 

49 

50 for i, file_a in enumerate(files): 

51 for file_b in files[i + 1 :]: 

52 a_issues = file_to_issues[file_a] 

53 b_issues = file_to_issues[file_b] 

54 co_occur = a_issues & b_issues 

55 union = a_issues | b_issues 

56 

57 if len(co_occur) < 2: # Require at least 2 co-occurrences 

58 continue 

59 

60 # Jaccard similarity 

61 strength = len(co_occur) / len(union) if union else 0.0 

62 

63 if strength >= 0.3: # Only include significant coupling 

64 pairs.append( 

65 CouplingPair( 

66 file_a=file_a, 

67 file_b=file_b, 

68 co_occurrence_count=len(co_occur), 

69 coupling_strength=strength, 

70 issue_ids=sorted(co_occur), 

71 ) 

72 ) 

73 

74 # Sort by coupling strength descending 

75 pairs.sort(key=lambda p: (-p.coupling_strength, -p.co_occurrence_count)) 

76 

77 # Build clusters using simple connected components 

78 clusters = _build_coupling_clusters(pairs) 

79 

80 # Identify hotspots (files coupled with 3+ others) 

81 file_coupling_count: dict[str, int] = {} 

82 for pair in pairs: 

83 file_coupling_count[pair.file_a] = file_coupling_count.get(pair.file_a, 0) + 1 

84 file_coupling_count[pair.file_b] = file_coupling_count.get(pair.file_b, 0) + 1 

85 

86 hotspots = [f for f, count in file_coupling_count.items() if count >= 3] 

87 hotspots.sort(key=lambda f: -file_coupling_count[f]) 

88 

89 return CouplingAnalysis( 

90 pairs=pairs[:20], # Top 20 pairs 

91 clusters=clusters[:10], # Top 10 clusters 

92 hotspots=hotspots[:10], # Top 10 hotspots 

93 ) 

94 

95 

96def _build_coupling_clusters(pairs: list[CouplingPair]) -> list[list[str]]: 

97 """Build clusters of coupled files using connected components. 

98 

99 Args: 

100 pairs: List of coupling pairs 

101 

102 Returns: 

103 List of file clusters (each cluster is a list of file paths) 

104 """ 

105 # Build adjacency for high-coupling pairs (strength >= 0.5) 

106 adjacency: dict[str, set[str]] = {} 

107 for pair in pairs: 

108 if pair.coupling_strength >= 0.5: 

109 if pair.file_a not in adjacency: 

110 adjacency[pair.file_a] = set() 

111 if pair.file_b not in adjacency: 

112 adjacency[pair.file_b] = set() 

113 adjacency[pair.file_a].add(pair.file_b) 

114 adjacency[pair.file_b].add(pair.file_a) 

115 

116 # Find connected components 

117 visited: set[str] = set() 

118 clusters: list[list[str]] = [] 

119 

120 for start in adjacency: 

121 if start in visited: 

122 continue 

123 # BFS to find component 

124 cluster: list[str] = [] 

125 queue = [start] 

126 while queue: 

127 node = queue.pop(0) 

128 if node in visited: 

129 continue 

130 visited.add(node) 

131 cluster.append(node) 

132 for neighbor in adjacency.get(node, set()): 

133 if neighbor not in visited: 

134 queue.append(neighbor) 

135 

136 if len(cluster) >= 2: # Only include clusters with 2+ files 

137 cluster.sort() 

138 clusters.append(cluster) 

139 

140 # Sort clusters by size descending 

141 clusters.sort(key=lambda c: -len(c)) 

142 return clusters