Coverage for src / domain / validation / code_pattern_matcher.py: 15%

52 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""Code pattern matcher for file path filtering. 

2 

3This module provides utilities to match file paths against glob patterns 

4for validation gating. It supports: 

5- `*` matches any non-slash characters 

6- `**` matches anything including `/` (zero or more directory levels) 

7- Filename-only patterns (no `/`) match against basename 

8- Path patterns (contain `/`) match against full relative path 

9""" 

10 

11from __future__ import annotations 

12 

13import logging 

14import os 

15import re 

16 

17logger = logging.getLogger(__name__) 

18 

19 

20def glob_to_regex(pattern: str) -> re.Pattern[str]: 

21 """Convert a glob pattern to a compiled regex pattern. 

22 

23 Supports: 

24 - `*` matches any character except `/` 

25 - `**` matches anything including `/` 

26 

27 If the pattern is invalid (e.g., contains unbalanced brackets), 

28 treat it as a literal string and log a warning. 

29 

30 Args: 

31 pattern: Glob pattern to convert. 

32 

33 Returns: 

34 Compiled regex pattern. 

35 """ 

36 try: 

37 # Build regex by processing the pattern character by character 

38 regex_parts: list[str] = [] 

39 i = 0 

40 n = len(pattern) 

41 

42 while i < n: 

43 char = pattern[i] 

44 

45 # Check for **/ or ** at end (matches zero or more directory segments) 

46 if char == "*" and i + 1 < n and pattern[i + 1] == "*": 

47 i += 2 

48 if i < n and pattern[i] == "/": 

49 # **/ matches zero or more complete directory segments 

50 # Either nothing (zero segments) or anything ending with / 

51 regex_parts.append("(?:.*/)?") 

52 i += 1 

53 else: 

54 # ** at end or not followed by / - matches anything 

55 regex_parts.append(".*") 

56 elif char == "*": 

57 # Single * matches any character except / 

58 regex_parts.append("[^/]*") 

59 i += 1 

60 elif char == "?": 

61 # ? matches any single character except / 

62 regex_parts.append("[^/]") 

63 i += 1 

64 elif char in ".^$+{}|()[]": 

65 # Escape regex special characters 

66 regex_parts.append("\\" + char) 

67 i += 1 

68 elif char == "\\": 

69 # Escape next character 

70 if i + 1 < n: 

71 regex_parts.append("\\" + pattern[i + 1]) 

72 i += 2 

73 else: 

74 regex_parts.append("\\\\") 

75 i += 1 

76 else: 

77 regex_parts.append(char) 

78 i += 1 

79 

80 regex_str = "^" + "".join(regex_parts) + "$" 

81 return re.compile(regex_str) 

82 except re.error as e: 

83 # Invalid pattern - treat as literal string 

84 logger.warning("Invalid glob pattern '%s', treating as literal: %s", pattern, e) 

85 return re.compile("^" + re.escape(pattern) + "$") 

86 

87 

88def matches_pattern(path: str, pattern: str) -> bool: 

89 """Check if a path matches a glob pattern. 

90 

91 Matching rules: 

92 - Filename-only patterns (no `/`): match against os.path.basename(path) 

93 - Path patterns (contain `/`): match against full relative path 

94 

95 Args: 

96 path: File path to check. 

97 pattern: Glob pattern to match against. 

98 

99 Returns: 

100 True if path matches pattern, False otherwise. 

101 """ 

102 # Normalize path separators 

103 path = path.replace("\\", "/") 

104 pattern = pattern.replace("\\", "/") 

105 

106 # Determine if this is a filename-only pattern or a path pattern 

107 if "/" in pattern: 

108 # Path pattern - match against full path 

109 # Handle patterns starting with **/ which should match any path 

110 target = path.lstrip("/") 

111 else: 

112 # Filename-only pattern - match against basename 

113 target = os.path.basename(path) 

114 

115 regex = glob_to_regex(pattern) 

116 return regex.match(target) is not None 

117 

118 

119def filter_matching_files(files: list[str], patterns: list[str]) -> list[str]: 

120 """Filter files that match any of the given patterns. 

121 

122 Args: 

123 files: List of file paths to filter. 

124 patterns: List of glob patterns. Empty list matches everything. 

125 

126 Returns: 

127 List of files that match at least one pattern. 

128 """ 

129 if not patterns: 

130 # Empty patterns list matches everything 

131 return list(files) 

132 

133 return [f for f in files if any(matches_pattern(f, p) for p in patterns)]