Coverage for little_loops / issue_discovery / matching.py: 0%
70 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-03-18 16:18 -0500
1"""Issue matching types and text similarity helpers."""
3from __future__ import annotations
5import re
6from dataclasses import dataclass, field
7from enum import Enum
8from pathlib import Path
9from typing import TYPE_CHECKING
11# Promoted to text_utils.py as public functions; aliased here for backward compat
12from little_loops.text_utils import calculate_word_overlap as _calculate_word_overlap # noqa: F401
13from little_loops.text_utils import extract_words as _extract_words # noqa: F401
15if TYPE_CHECKING:
16 from little_loops.config import BRConfig
19# =============================================================================
20# Enums
21# =============================================================================
24class MatchClassification(Enum):
25 """Classification of how a finding matches an existing issue.
27 Used to distinguish between true duplicates, regressions, and invalid fixes
28 when a finding matches a completed issue.
29 """
31 NEW_ISSUE = "new_issue" # No existing issue matches
32 DUPLICATE = "duplicate" # Active issue exists
33 REGRESSION = "regression" # Completed, files modified AFTER fix (fix broke)
34 INVALID_FIX = "invalid_fix" # Completed, files NOT modified after fix (never worked)
35 UNVERIFIED = "unverified" # Completed, no fix commit tracked (can't determine)
38# =============================================================================
39# Data Classes
40# =============================================================================
43@dataclass
44class RegressionEvidence:
45 """Evidence for regression vs invalid fix classification.
47 Attributes:
48 fix_commit_sha: SHA of the commit that fixed the original issue
49 fix_commit_exists: Whether the fix commit exists in current history
50 files_modified_since_fix: Files from the fix that were modified after fix
51 days_since_fix: Number of days since the fix was applied
52 related_commits: Commits that modified the relevant files after fix
53 """
55 fix_commit_sha: str | None = None
56 fix_commit_exists: bool = True
57 files_modified_since_fix: list[str] = field(default_factory=list)
58 days_since_fix: int = 0
59 related_commits: list[str] = field(default_factory=list)
62@dataclass
63class FindingMatch:
64 """Result of matching a finding to an existing issue.
66 Attributes:
67 issue_path: Path to matched issue file, or None if no match
68 match_type: Type of match ("exact", "similar", "content", "none")
69 match_score: Confidence score from 0.0 to 1.0
70 is_completed: Whether the matched issue is in completed/
71 matched_terms: Terms that matched (for debugging)
72 classification: How to classify this match (regression, duplicate, etc.)
73 regression_evidence: Evidence supporting regression classification
74 """
76 issue_path: Path | None
77 match_type: str
78 match_score: float
79 is_completed: bool = False
80 matched_terms: list[str] = field(default_factory=list)
81 classification: MatchClassification = MatchClassification.NEW_ISSUE
82 regression_evidence: RegressionEvidence | None = None
84 @property
85 def should_skip(self) -> bool:
86 """Return True if finding is a duplicate and should be skipped."""
87 return self.match_score >= 0.8
89 @property
90 def should_update(self) -> bool:
91 """Return True if finding should update the existing issue."""
92 return 0.5 <= self.match_score < 0.8
94 @property
95 def should_create(self) -> bool:
96 """Return True if a new issue should be created."""
97 return self.match_score < 0.5
99 @property
100 def should_reopen(self) -> bool:
101 """Return True if a completed issue should be reopened."""
102 return self.is_completed and self.match_score >= 0.5
104 @property
105 def should_reopen_as_regression(self) -> bool:
106 """Return True if issue should be reopened as a regression.
108 A regression means the fix was applied but later code changes broke it.
109 """
110 return (
111 self.is_completed
112 and self.match_score >= 0.5
113 and self.classification == MatchClassification.REGRESSION
114 )
116 @property
117 def should_reopen_as_invalid_fix(self) -> bool:
118 """Return True if issue should be reopened due to invalid fix.
120 An invalid fix means the original fix never actually resolved the issue.
121 """
122 return (
123 self.is_completed
124 and self.match_score >= 0.5
125 and self.classification == MatchClassification.INVALID_FIX
126 )
128 @property
129 def is_unverified(self) -> bool:
130 """Return True if regression status cannot be determined.
132 Unverified means the completed issue has no fix commit tracked,
133 so we cannot determine if this is a regression or invalid fix.
134 """
135 return (
136 self.is_completed
137 and self.match_score >= 0.5
138 and self.classification == MatchClassification.UNVERIFIED
139 )
142# =============================================================================
143# Text Matching Helpers
144# =============================================================================
147def _normalize_text(text: str) -> str:
148 """Normalize text for comparison.
150 Args:
151 text: Input text
153 Returns:
154 Lowercase text with normalized whitespace
155 """
156 return re.sub(r"\s+", " ", text.lower().strip())
159def _extract_line_numbers(text: str) -> set[int]:
160 """Extract line numbers from text.
162 Args:
163 text: Input text
165 Returns:
166 Set of line numbers found
167 """
168 numbers: set[int] = set()
169 # Match line number patterns
170 patterns = [
171 r"\*\*Line(?:\(s\))?\*\*:\s*(\d+)(?:-(\d+))?", # **Line(s)**: 42-45
172 r":(\d+)(?:-(\d+))?", # :42-45 (in paths)
173 r"line\s+(\d+)", # line 42
174 ]
175 for pattern in patterns:
176 for match in re.finditer(pattern, text, re.IGNORECASE):
177 numbers.add(int(match.group(1)))
178 if match.lastindex and match.lastindex >= 2 and match.group(2):
179 numbers.add(int(match.group(2)))
180 return numbers
183def _matches_issue_type(
184 finding_type: str,
185 issue_path: Path,
186 config: BRConfig,
187 is_completed: bool,
188) -> bool:
189 """Check if finding type matches issue path using configured categories.
191 Args:
192 finding_type: The type of finding (e.g., 'BUG', 'ENH', 'FEAT')
193 issue_path: Path to the issue file
194 config: Configuration with category definitions
195 is_completed: Whether the issue is in the completed directory
197 Returns:
198 True if the finding type matches the issue path's category
199 """
200 if is_completed:
201 return True
203 path_str = str(issue_path)
204 for category in config.issues.categories.values():
205 if finding_type == category.prefix and f"/{category.dir}/" in path_str:
206 return True
207 return False