Coverage for little_loops / git_operations.py: 21%

186 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-03-18 16:18 -0500

1"""Git operations for little-loops issue management. 

2 

3Provides git status checking, verification of work done, file filtering 

4for excluded directories, and .gitignore pattern suggestions. 

5""" 

6 

7from __future__ import annotations 

8 

9import fnmatch 

10import subprocess 

11from dataclasses import dataclass, field 

12from pathlib import Path 

13 

14from little_loops.logger import Logger 

15from little_loops.work_verification import ( # noqa: F401 

16 EXCLUDED_DIRECTORIES, 

17 filter_excluded_files, 

18 verify_work_was_done, 

19) 

20 

21# Common .gitignore patterns with metadata. 

22# Format: (pattern, category, description, priority) 

23# Lower priority number = higher precedence when matching files. 

24COMMON_GITIGNORE_PATTERNS: list[tuple[str, str, str, int]] = [ 

25 # Coverage reports (priority 1 - very common) 

26 ("coverage.json", "coverage", "Coverage report JSON", 1), 

27 ("*.coverage", "coverage", "Coverage data files", 1), 

28 (".coverage*", "coverage", "Coverage data files", 1), 

29 (".nyc_output/", "coverage", "NYC coverage output", 2), 

30 # Environment files (priority 1 - security sensitive) 

31 (".env", "environment", "Environment variables", 1), 

32 (".env.*", "environment", "Environment-specific configs", 1), 

33 (".env.local", "environment", "Local environment overrides", 1), 

34 (".env.*.local", "environment", "Local environment overrides", 2), 

35 # Log files (priority 2 - common clutter) 

36 ("*.log", "logs", "Application log files", 2), 

37 ("logs/", "logs", "Log directory", 2), 

38 # Python (priority 2) 

39 ("__pycache__/", "python", "Python bytecode cache", 2), 

40 ("*.pyc", "python", "Python compiled files", 2), 

41 ("*.pyo", "python", "Python optimized files", 2), 

42 (".pytest_cache/", "python", "Pytest cache", 2), 

43 (".mypy_cache/", "python", "MyPy type cache", 2), 

44 ("*.egg-info/", "python", "Python package metadata", 3), 

45 # Node.js (priority 2) 

46 ("node_modules/", "nodejs", "Node.js dependencies", 2), 

47 ("package-lock.json", "nodejs", "NPM lock file", 3), 

48 ("yarn.lock", "nodejs", "Yarn lock file", 3), 

49 ("*.tgz", "nodejs", "NPM package tarballs", 3), 

50 # Build artifacts (priority 2) 

51 ("dist/", "build", "Distribution directory", 2), 

52 ("build/", "build", "Build directory", 2), 

53 ("*.egg", "python", "Python egg distribution", 3), 

54 # OS files (priority 3) 

55 (".DS_Store", "os", "macOS directory metadata", 3), 

56 (".DS_Store?", "os", "macOS directory metadata (variant)", 3), 

57 ("._*", "os", "macOS resource forks", 3), 

58 ("Thumbs.db", "os", "Windows thumbnail cache", 3), 

59 ("ehthumbs.db", "os", "Windows thumbnail cache (variant)", 3), 

60 ("Desktop.ini", "os", "Windows desktop settings", 3), 

61 # Editor/IDE (priority 3) 

62 (".idea/", "editor", "JetBrains IDE config", 3), 

63 (".vscode/", "editor", "VS Code config", 3), 

64 ("*.swp", "editor", "Vim swap files", 3), 

65 ("*.swo", "editor", "Vim swap files", 3), 

66 ("*~", "editor", "Backup files", 3), 

67 (".project", "editor", "Eclipse project", 3), 

68 (".settings/", "editor", "Eclipse settings", 3), 

69 # Temporary files (priority 2) 

70 ("*.tmp", "temp", "Temporary files", 2), 

71 ("tmp/", "temp", "Temp directory", 2), 

72 ("temp/", "temp", "Temp directory", 2), 

73 # State files (priority 2) 

74 ("*-state.json", "state", "State tracking files", 2), 

75 (".state.json", "state", "State tracking files", 2), 

76 # Runtime and cache (priority 2) 

77 (".cache/", "cache", "Cache directory", 2), 

78 (".parcel-cache/", "cache", "Parcel bundler cache", 3), 

79 # Database (priority 3) 

80 ("*.db", "database", "Database files", 3), 

81 ("*.sqlite", "database", "SQLite databases", 3), 

82 ("*.sqlite3", "database", "SQLite databases", 3), 

83] 

84 

85 

86@dataclass 

87class GitignorePattern: 

88 """Represents a suggested .gitignore pattern with metadata. 

89 

90 Attributes: 

91 pattern: The .gitignore pattern string (e.g., "*.log", ".env") 

92 category: Category of file (e.g., "coverage", "environment", "logs") 

93 description: Human-readable description of what this pattern matches 

94 files_matched: List of untracked files that match this pattern 

95 priority: Priority for suggestion (1=highest, 5=lowest). 

96 """ 

97 

98 pattern: str 

99 category: str 

100 description: str 

101 files_matched: list[str] = field(default_factory=list) 

102 priority: int = 3 

103 

104 def __post_init__(self) -> None: 

105 """Validate and normalize the pattern.""" 

106 self.pattern = self.pattern.strip() 

107 if not self.pattern: 

108 raise ValueError("Pattern cannot be empty") 

109 

110 @property 

111 def is_wildcard(self) -> bool: 

112 """Return True if pattern contains wildcards.""" 

113 return "*" in self.pattern or "?" in self.pattern 

114 

115 @property 

116 def is_directory(self) -> bool: 

117 """Return True if pattern targets a directory.""" 

118 return self.pattern.endswith("/") 

119 

120 

121@dataclass 

122class GitignoreSuggestion: 

123 """Container for gitignore suggestions with user interaction helpers. 

124 

125 Attributes: 

126 patterns: List of suggested patterns 

127 existing_gitignore: Path to .gitignore file 

128 already_ignored: Files already covered by existing .gitignore 

129 total_files: Total untracked files examined 

130 """ 

131 

132 patterns: list[GitignorePattern] = field(default_factory=list) 

133 existing_gitignore: Path | None = None 

134 already_ignored: list[str] = field(default_factory=list) 

135 total_files: int = 0 

136 

137 @property 

138 def has_suggestions(self) -> bool: 

139 """Return True if there are patterns to suggest.""" 

140 return len(self.patterns) > 0 

141 

142 @property 

143 def files_to_ignore(self) -> list[str]: 

144 """Get all files that would be ignored by suggested patterns.""" 

145 files: list[str] = [] 

146 for pattern in self.patterns: 

147 files.extend(pattern.files_matched) 

148 return sorted(set(files)) 

149 

150 @property 

151 def summary(self) -> str: 

152 """Generate a human-readable summary of suggestions.""" 

153 if not self.has_suggestions: 

154 return "No .gitignore suggestions needed." 

155 

156 total_files = len(self.files_to_ignore) 

157 pattern_count = len(self.patterns) 

158 return f"Found {total_files} file(s) matching {pattern_count} .gitignore pattern(s)." 

159 

160 

161def check_git_status(logger: Logger) -> bool: 

162 """Check for uncommitted changes. 

163 

164 Args: 

165 logger: Logger for output 

166 

167 Returns: 

168 True if there are uncommitted changes 

169 """ 

170 try: 

171 result = subprocess.run( 

172 ["git", "diff", "--quiet"], 

173 capture_output=True, 

174 text=True, 

175 ) 

176 if result.returncode != 0: 

177 logger.warning("Uncommitted changes detected in working directory") 

178 return True 

179 

180 result = subprocess.run( 

181 ["git", "diff", "--cached", "--quiet"], 

182 capture_output=True, 

183 text=True, 

184 ) 

185 if result.returncode != 0: 

186 logger.warning("Uncommitted staged changes detected") 

187 return True 

188 

189 return False 

190 except Exception as e: 

191 logger.warning(f"Could not check git status: {e}") 

192 return True 

193 

194 

195def get_untracked_files(repo_root: Path | str = ".") -> list[str]: 

196 """Get list of untracked files from git status. 

197 

198 Args: 

199 repo_root: Path to repository root. Defaults to current directory. 

200 

201 Returns: 

202 List of untracked file paths (relative to repo root). 

203 """ 

204 repo_root = Path(repo_root).resolve() 

205 

206 try: 

207 result = subprocess.run( 

208 ["git", "status", "--porcelain"], 

209 cwd=repo_root, 

210 capture_output=True, 

211 text=True, 

212 check=True, 

213 ) 

214 except (subprocess.CalledProcessError, FileNotFoundError): 

215 return [] 

216 

217 # Parse porcelain output: ?? for untracked files 

218 untracked: list[str] = [] 

219 for line in result.stdout.strip().split("\n"): 

220 if not line: 

221 continue 

222 # Format: XY filename 

223 # X = staged status, Y = unstaged status 

224 # ?? = untracked 

225 if line.startswith("??"): 

226 # Extract filename (after status markers) 

227 filename = line[3:].strip() 

228 # Handle quoted filenames with spaces 

229 if filename.startswith('"') and filename.endswith('"'): 

230 filename = filename[1:-1] 

231 untracked.append(filename) 

232 

233 return sorted(untracked) 

234 

235 

236def _read_existing_gitignore(repo_root: Path) -> list[str]: 

237 """Read and parse existing .gitignore patterns. 

238 

239 Args: 

240 repo_root: Path to repository root. 

241 

242 Returns: 

243 List of existing patterns (stripped of comments and whitespace). 

244 Returns empty list if .gitignore doesn't exist. 

245 """ 

246 gitignore_path = repo_root / ".gitignore" 

247 

248 if not gitignore_path.exists(): 

249 return [] 

250 

251 patterns: list[str] = [] 

252 try: 

253 content = gitignore_path.read_text(encoding="utf-8") 

254 for line in content.split("\n"): 

255 line = line.strip() 

256 # Skip empty lines and comments 

257 if line and not line.startswith("#"): 

258 patterns.append(line) 

259 except (OSError, UnicodeDecodeError): 

260 # If we can't read it, assume empty 

261 return [] 

262 

263 return patterns 

264 

265 

266def _file_matches_pattern(file_path: str, pattern: str) -> bool: 

267 """Check if a file path matches a gitignore pattern. 

268 

269 Implements gitignore-style matching semantics: 

270 - If pattern doesn't contain '/', it matches basename in any directory 

271 - If pattern contains '/', it matches relative to repo root 

272 - If pattern ends with '/', it matches a directory 

273 - Leading '/' anchors to repo root 

274 - Negation patterns (starting with !) match the same as their base pattern 

275 

276 Args: 

277 file_path: File path relative to repo root 

278 pattern: Gitignore pattern (may start with ! for negation) 

279 

280 Returns: 

281 True if file matches the base pattern (regardless of negation) 

282 """ 

283 # Normalize paths 

284 file_path = file_path.replace("\\", "/") 

285 pattern = pattern.replace("\\", "/") 

286 

287 # Strip negation prefix for matching logic 

288 # The negation is handled by _is_already_ignored() 

289 if pattern.startswith("!"): 

290 pattern = pattern[1:] 

291 

292 # Handle directory patterns 

293 if pattern.endswith("/"): 

294 # Match if file is inside this directory 

295 dir_pattern = pattern.rstrip("/") 

296 return file_path == dir_pattern or file_path.startswith(dir_pattern + "/") 

297 

298 # Handle patterns without path separator (match basename anywhere) 

299 if "/" not in pattern: 

300 basename = Path(file_path).name 

301 # Also check if pattern has wildcards 

302 if "*" in pattern or "?" in pattern: 

303 return fnmatch.fnmatch(basename, pattern) 

304 return basename == pattern 

305 

306 # Handle patterns with path separator (match from root or subdirectory) 

307 if pattern.startswith("/"): 

308 # Anchored to root: must match from start 

309 return fnmatch.fnmatch(file_path, pattern[1:]) 

310 else: 

311 # Not anchored: can match at any level 

312 # Check if it matches the full path 

313 if fnmatch.fnmatch(file_path, pattern): 

314 return True 

315 # Check if it matches any parent path 

316 parts = file_path.split("/") 

317 for i in range(len(parts)): 

318 subpath = "/".join(parts[i:]) 

319 if fnmatch.fnmatch(subpath, pattern): 

320 return True 

321 return False 

322 

323 

324def _is_already_ignored( 

325 file_path: str, 

326 existing_patterns: list[str], 

327) -> bool: 

328 """Check if a file is already covered by existing .gitignore patterns. 

329 

330 Processes patterns in order, with negation patterns (starting with !) 

331 overriding previous matches. This follows gitignore semantics where 

332 later patterns can negate earlier ones. 

333 

334 Args: 

335 file_path: File path to check 

336 existing_patterns: List of patterns from .gitignore 

337 

338 Returns: 

339 True if file is already ignored (final result after all patterns) 

340 """ 

341 # Process patterns in order - later patterns override earlier ones 

342 is_ignored = False 

343 

344 for pattern in existing_patterns: 

345 if _file_matches_pattern(file_path, pattern): 

346 # If pattern starts with !, it's a negation 

347 if pattern.startswith("!"): 

348 is_ignored = False 

349 else: 

350 is_ignored = True 

351 

352 return is_ignored 

353 

354 

355def suggest_gitignore_patterns( 

356 untracked_files: list[str] | None = None, 

357 repo_root: Path | str = ".", 

358 logger: Logger | None = None, 

359) -> GitignoreSuggestion: 

360 """Analyze untracked files and suggest .gitignore patterns. 

361 

362 This function examines untracked files and suggests common .gitignore 

363 patterns that should be added. It respects existing .gitignore patterns 

364 and won't suggest patterns for already-ignored files. 

365 

366 Args: 

367 untracked_files: Optional list of untracked files. If None, will 

368 detect via git status. 

369 repo_root: Path to repository root. Defaults to current directory. 

370 logger: Optional logger for debug output. 

371 

372 Returns: 

373 GitignoreSuggestion with suggested patterns and metadata. 

374 """ 

375 repo_root = Path(repo_root).resolve() 

376 

377 # Get untracked files if not provided 

378 if untracked_files is None: 

379 untracked_files = get_untracked_files(repo_root) 

380 

381 if not untracked_files: 

382 return GitignoreSuggestion() 

383 

384 # Read existing .gitignore 

385 existing_patterns = _read_existing_gitignore(repo_root) 

386 gitignore_path = repo_root / ".gitignore" 

387 

388 # Build pattern objects from common patterns 

389 pattern_objects: list[GitignorePattern] = [] 

390 for pattern_str, category, description, priority in COMMON_GITIGNORE_PATTERNS: 

391 pattern_objects.append( 

392 GitignorePattern( 

393 pattern=pattern_str, 

394 category=category, 

395 description=description, 

396 priority=priority, 

397 ) 

398 ) 

399 

400 # Match files to patterns 

401 already_ignored: list[str] = [] 

402 suggestions: dict[str, GitignorePattern] = {} 

403 

404 for file_path in untracked_files: 

405 # Check if already covered by existing .gitignore 

406 if _is_already_ignored(file_path, existing_patterns): 

407 already_ignored.append(file_path) 

408 continue 

409 

410 # Try to match against common patterns 

411 matched = False 

412 for pattern_obj in sorted(pattern_objects, key=lambda p: p.priority): 

413 if _file_matches_pattern(file_path, pattern_obj.pattern): 

414 # Add to suggestions (deduplicate by pattern) 

415 if pattern_obj.pattern not in suggestions: 

416 suggestions[pattern_obj.pattern] = pattern_obj 

417 # Add this file to the pattern's match list 

418 if file_path not in suggestions[pattern_obj.pattern].files_matched: 

419 suggestions[pattern_obj.pattern].files_matched.append(file_path) 

420 matched = True 

421 break # Use first (highest priority) match 

422 

423 # Log unmatched files for debugging 

424 if not matched and logger: 

425 logger.debug(f"No pattern match for: {file_path}") 

426 

427 # Convert to sorted list (by priority, then category, then pattern) 

428 suggested_patterns = sorted( 

429 suggestions.values(), 

430 key=lambda p: (p.priority, p.category, p.pattern), 

431 ) 

432 

433 return GitignoreSuggestion( 

434 patterns=suggested_patterns, 

435 existing_gitignore=gitignore_path if gitignore_path.exists() else None, 

436 already_ignored=already_ignored, 

437 total_files=len(untracked_files), 

438 ) 

439 

440 

441def add_patterns_to_gitignore( 

442 patterns: list[str], 

443 repo_root: Path | str = ".", 

444 logger: Logger | None = None, 

445 backup: bool = True, 

446) -> bool: 

447 """Add patterns to .gitignore file. 

448 

449 Args: 

450 patterns: List of patterns to add (will skip duplicates) 

451 repo_root: Path to repository root 

452 logger: Optional logger for output 

453 backup: If True, create .gitignore.backup before modifying 

454 

455 Returns: 

456 True if patterns were added successfully, False otherwise 

457 """ 

458 repo_root = Path(repo_root).resolve() 

459 gitignore_path = repo_root / ".gitignore" 

460 

461 # Read existing patterns 

462 existing_patterns = _read_existing_gitignore(repo_root) 

463 existing_set = set(existing_patterns) 

464 

465 # Filter out patterns that already exist 

466 new_patterns = [p for p in patterns if p not in existing_set] 

467 

468 if not new_patterns: 

469 if logger: 

470 logger.info("All patterns already exist in .gitignore") 

471 return True 

472 

473 try: 

474 # Create backup if requested 

475 if backup and gitignore_path.exists(): 

476 backup_path = repo_root / ".gitignore.backup" 

477 import shutil 

478 

479 if logger: 

480 logger.debug(f"Creating backup: {backup_path}") 

481 shutil.copy2(gitignore_path, backup_path) 

482 

483 # Build new content 

484 if gitignore_path.exists(): 

485 content = gitignore_path.read_text(encoding="utf-8") 

486 # Ensure trailing newline 

487 if content and not content.endswith("\n"): 

488 content += "\n" 

489 else: 

490 content = "" 

491 

492 # Add new patterns 

493 for pattern in new_patterns: 

494 content += f"{pattern}\n" 

495 

496 # Write back 

497 gitignore_path.write_text(content, encoding="utf-8") 

498 

499 if logger: 

500 logger.success(f"Added {len(new_patterns)} pattern(s) to .gitignore") 

501 for pattern in new_patterns: 

502 logger.info(f" + {pattern}") 

503 

504 return True 

505 

506 except (OSError, UnicodeDecodeError) as e: 

507 if logger: 

508 logger.error(f"Failed to update .gitignore: {e}") 

509 return False