Coverage for src / domain / validation / code_pattern_matcher.py: 15%
52 statements
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2026-01-04 04:43 +0000
1"""Code pattern matcher for file path filtering.
3This module provides utilities to match file paths against glob patterns
4for validation gating. It supports:
5- `*` matches any non-slash characters
6- `**` matches anything including `/` (zero or more directory levels)
7- Filename-only patterns (no `/`) match against basename
8- Path patterns (contain `/`) match against full relative path
9"""
11from __future__ import annotations
13import logging
14import os
15import re
17logger = logging.getLogger(__name__)
20def glob_to_regex(pattern: str) -> re.Pattern[str]:
21 """Convert a glob pattern to a compiled regex pattern.
23 Supports:
24 - `*` matches any character except `/`
25 - `**` matches anything including `/`
27 If the pattern is invalid (e.g., contains unbalanced brackets),
28 treat it as a literal string and log a warning.
30 Args:
31 pattern: Glob pattern to convert.
33 Returns:
34 Compiled regex pattern.
35 """
36 try:
37 # Build regex by processing the pattern character by character
38 regex_parts: list[str] = []
39 i = 0
40 n = len(pattern)
42 while i < n:
43 char = pattern[i]
45 # Check for **/ or ** at end (matches zero or more directory segments)
46 if char == "*" and i + 1 < n and pattern[i + 1] == "*":
47 i += 2
48 if i < n and pattern[i] == "/":
49 # **/ matches zero or more complete directory segments
50 # Either nothing (zero segments) or anything ending with /
51 regex_parts.append("(?:.*/)?")
52 i += 1
53 else:
54 # ** at end or not followed by / - matches anything
55 regex_parts.append(".*")
56 elif char == "*":
57 # Single * matches any character except /
58 regex_parts.append("[^/]*")
59 i += 1
60 elif char == "?":
61 # ? matches any single character except /
62 regex_parts.append("[^/]")
63 i += 1
64 elif char in ".^$+{}|()[]":
65 # Escape regex special characters
66 regex_parts.append("\\" + char)
67 i += 1
68 elif char == "\\":
69 # Escape next character
70 if i + 1 < n:
71 regex_parts.append("\\" + pattern[i + 1])
72 i += 2
73 else:
74 regex_parts.append("\\\\")
75 i += 1
76 else:
77 regex_parts.append(char)
78 i += 1
80 regex_str = "^" + "".join(regex_parts) + "$"
81 return re.compile(regex_str)
82 except re.error as e:
83 # Invalid pattern - treat as literal string
84 logger.warning("Invalid glob pattern '%s', treating as literal: %s", pattern, e)
85 return re.compile("^" + re.escape(pattern) + "$")
88def matches_pattern(path: str, pattern: str) -> bool:
89 """Check if a path matches a glob pattern.
91 Matching rules:
92 - Filename-only patterns (no `/`): match against os.path.basename(path)
93 - Path patterns (contain `/`): match against full relative path
95 Args:
96 path: File path to check.
97 pattern: Glob pattern to match against.
99 Returns:
100 True if path matches pattern, False otherwise.
101 """
102 # Normalize path separators
103 path = path.replace("\\", "/")
104 pattern = pattern.replace("\\", "/")
106 # Determine if this is a filename-only pattern or a path pattern
107 if "/" in pattern:
108 # Path pattern - match against full path
109 # Handle patterns starting with **/ which should match any path
110 target = path.lstrip("/")
111 else:
112 # Filename-only pattern - match against basename
113 target = os.path.basename(path)
115 regex = glob_to_regex(pattern)
116 return regex.match(target) is not None
119def filter_matching_files(files: list[str], patterns: list[str]) -> list[str]:
120 """Filter files that match any of the given patterns.
122 Args:
123 files: List of file paths to filter.
124 patterns: List of glob patterns. Empty list matches everything.
126 Returns:
127 List of files that match at least one pattern.
128 """
129 if not patterns:
130 # Empty patterns list matches everything
131 return list(files)
133 return [f for f in files if any(matches_pattern(f, p) for p in patterns)]