Coverage for little_loops / issue_history / coupling.py: 0%
69 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
1"""Issue history file coupling analysis."""
3from __future__ import annotations
5from pathlib import Path
7from little_loops.issue_history._utils import get_issue_content
8from little_loops.issue_history.models import (
9 CompletedIssue,
10 CouplingAnalysis,
11 CouplingPair,
12)
13from little_loops.issue_history.parsing import _extract_paths_from_issue
16def analyze_coupling(
17 issues: list[CompletedIssue],
18 contents: dict[Path, str] | None = None,
19) -> CouplingAnalysis:
20 """Identify files that frequently change together across issues.
22 Uses Jaccard similarity to calculate coupling strength between file pairs.
23 Files with coupling strength >= 0.3 and at least 2 co-occurrences are included.
25 Args:
26 issues: List of completed issues
27 contents: Pre-loaded issue file contents (path -> content)
29 Returns:
30 CouplingAnalysis with coupled pairs, clusters, and hotspots
31 """
32 # Build file -> set of issue IDs mapping
33 file_to_issues: dict[str, set[str]] = {}
35 for issue in issues:
36 content = get_issue_content(issue, contents)
37 if content is None:
38 continue
40 paths = _extract_paths_from_issue(content)
41 for path in paths:
42 if path not in file_to_issues:
43 file_to_issues[path] = set()
44 file_to_issues[path].add(issue.issue_id)
46 # Calculate pairwise coupling
47 files = list(file_to_issues.keys())
48 pairs: list[CouplingPair] = []
50 for i, file_a in enumerate(files):
51 for file_b in files[i + 1 :]:
52 a_issues = file_to_issues[file_a]
53 b_issues = file_to_issues[file_b]
54 co_occur = a_issues & b_issues
55 union = a_issues | b_issues
57 if len(co_occur) < 2: # Require at least 2 co-occurrences
58 continue
60 # Jaccard similarity
61 strength = len(co_occur) / len(union) if union else 0.0
63 if strength >= 0.3: # Only include significant coupling
64 pairs.append(
65 CouplingPair(
66 file_a=file_a,
67 file_b=file_b,
68 co_occurrence_count=len(co_occur),
69 coupling_strength=strength,
70 issue_ids=sorted(co_occur),
71 )
72 )
74 # Sort by coupling strength descending
75 pairs.sort(key=lambda p: (-p.coupling_strength, -p.co_occurrence_count))
77 # Build clusters using simple connected components
78 clusters = _build_coupling_clusters(pairs)
80 # Identify hotspots (files coupled with 3+ others)
81 file_coupling_count: dict[str, int] = {}
82 for pair in pairs:
83 file_coupling_count[pair.file_a] = file_coupling_count.get(pair.file_a, 0) + 1
84 file_coupling_count[pair.file_b] = file_coupling_count.get(pair.file_b, 0) + 1
86 hotspots = [f for f, count in file_coupling_count.items() if count >= 3]
87 hotspots.sort(key=lambda f: -file_coupling_count[f])
89 return CouplingAnalysis(
90 pairs=pairs[:20], # Top 20 pairs
91 clusters=clusters[:10], # Top 10 clusters
92 hotspots=hotspots[:10], # Top 10 hotspots
93 )
96def _build_coupling_clusters(pairs: list[CouplingPair]) -> list[list[str]]:
97 """Build clusters of coupled files using connected components.
99 Args:
100 pairs: List of coupling pairs
102 Returns:
103 List of file clusters (each cluster is a list of file paths)
104 """
105 # Build adjacency for high-coupling pairs (strength >= 0.5)
106 adjacency: dict[str, set[str]] = {}
107 for pair in pairs:
108 if pair.coupling_strength >= 0.5:
109 if pair.file_a not in adjacency:
110 adjacency[pair.file_a] = set()
111 if pair.file_b not in adjacency:
112 adjacency[pair.file_b] = set()
113 adjacency[pair.file_a].add(pair.file_b)
114 adjacency[pair.file_b].add(pair.file_a)
116 # Find connected components
117 visited: set[str] = set()
118 clusters: list[list[str]] = []
120 for start in adjacency:
121 if start in visited:
122 continue
123 # BFS to find component
124 cluster: list[str] = []
125 queue = [start]
126 while queue:
127 node = queue.pop(0)
128 if node in visited:
129 continue
130 visited.add(node)
131 cluster.append(node)
132 for neighbor in adjacency.get(node, set()):
133 if neighbor not in visited:
134 queue.append(neighbor)
136 if len(cluster) >= 2: # Only include clusters with 2+ files
137 cluster.sort()
138 clusters.append(cluster)
140 # Sort clusters by size descending
141 clusters.sort(key=lambda c: -len(c))
142 return clusters