Coverage for src/pylint_sort_functions/utils/categorization.py: 100%

87 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-08-12 16:06 +0200

1"""Method categorization system for flexible sorting organization. 

2 

3This module provides the categorization framework that allows methods to be 

4organized into multiple categories (properties, test methods, etc.) instead 

5of just the binary public/private distinction. 

6""" 

7 

8import fnmatch 

9from dataclasses import dataclass, field 

10 

11from astroid import nodes # type: ignore[import-untyped] 

12 

13from .ast_analysis import is_private_function 

14from .decorators import decorator_matches_pattern, get_decorator_strings 

15 

16 

17@dataclass 

18class MethodCategory: 

19 """Configuration for a method category in the sorting system. 

20 

21 Defines how methods are categorized based on patterns, decorators, and other 

22 criteria. Categories determine the sorting order and section organization. 

23 

24 :param name: Unique identifier for this category (e.g., 'properties') 

25 :type name: str 

26 :param patterns: List of glob patterns to match names (e.g., ['test_*']) 

27 :type patterns: list[str] 

28 :param decorators: List of decorator patterns (e.g., ['@property']) 

29 :type decorators: list[str] 

30 :param priority: Priority for conflict resolution, higher values win 

31 :type priority: int 

32 :param section_header: Comment header text for this category 

33 :type section_header: str 

34 """ 

35 

36 name: str 

37 patterns: list[str] = field(default_factory=list) 

38 decorators: list[str] = field(default_factory=list) 

39 priority: int = 0 

40 section_header: str = "" 

41 

42 

43@dataclass 

44class CategoryConfig: 

45 """Configuration for the method categorization system. 

46 

47 Defines the complete categorization scheme including all categories, 

48 default behavior, and compatibility settings. 

49 

50 :param categories: List of method categories in sorting order 

51 :type categories: list[MethodCategory] 

52 :param default_category: Category name for methods that do not match patterns 

53 :type default_category: str 

54 :param enable_categories: Enable multi-category system (false = backward 

55 compatibility) 

56 :type enable_categories: bool 

57 :param category_sorting: How to sort within categories ('alphabetical' or 

58 'declaration') 

59 :type category_sorting: str 

60 """ 

61 

62 categories: list[MethodCategory] = field(default_factory=list) 

63 default_category: str = "public_methods" 

64 enable_categories: bool = False # Backward compatibility - disabled by default 

65 category_sorting: str = "alphabetical" # or "declaration" to preserve order 

66 

67 def __post_init__(self) -> None: 

68 """Initialize with default binary categories if none provided.""" 

69 if not self.categories: 

70 self.categories = self._get_default_categories() 

71 

72 def _get_default_categories(self) -> list[MethodCategory]: 

73 """Get default binary public/private categories for backward compatibility. 

74 

75 :returns: List of default method categories (public, private) 

76 :rtype: list[MethodCategory] 

77 """ 

78 return [ 

79 MethodCategory( 

80 name="public_methods", 

81 patterns=["*"], # Catch-all for non-private methods 

82 section_header="# Public methods", 

83 ), 

84 MethodCategory( 

85 name="private_methods", 

86 patterns=["_*"], # Methods starting with underscore 

87 priority=1, # Higher priority than public catch-all 

88 section_header="# Private methods", 

89 ), 

90 ] 

91 

92 

93def categorize_method( # pylint: disable=function-should-be-private 

94 func: nodes.FunctionDef, config: CategoryConfig | None = None 

95) -> str: 

96 """Determine the category for a method based on configuration patterns. 

97 

98 This replaces the binary is_private_function() with a flexible categorization 

99 system that supports multiple method types (properties, test methods, etc.). 

100 

101 When enable_categories=False (default), provides backward compatible behavior 

102 by returning 'public_methods' or 'private_methods' based on naming convention. 

103 

104 :param func: Function definition node to categorize 

105 :type func: nodes.FunctionDef 

106 :param config: Category configuration, uses default if None 

107 :type config: CategoryConfig | None 

108 :returns: Category name for the method (e.g., 'properties', 'test_methods') 

109 :rtype: str 

110 """ 

111 if config is None: 

112 config = CategoryConfig() 

113 

114 # For backward compatibility, when categories disabled, use original logic 

115 if not config.enable_categories: 

116 return "private_methods" if is_private_function(func) else "public_methods" 

117 

118 # Find matching categories, prioritizing higher priority values 

119 matching_categories: list[tuple[MethodCategory, int]] = [] 

120 

121 for category in config.categories: 

122 match_priority = _get_category_match_priority(func, category) 

123 if match_priority > 0: 

124 matching_categories.append((category, match_priority)) 

125 

126 if not matching_categories: 

127 # No matches found, use default category 

128 return config.default_category 

129 

130 # Sort by priority (higher first), then by category priority field 

131 matching_categories.sort(key=lambda x: (x[1], x[0].priority), reverse=True) 

132 

133 return matching_categories[0][0].name 

134 

135 

136def find_method_section_boundaries( # pylint: disable=function-should-be-private 

137 lines: list[str], config: CategoryConfig 

138) -> dict[int, str]: 

139 """Map line numbers to their section categories based on headers. 

140 

141 Creates a mapping from line numbers to category names, where each line 

142 between section headers belongs to the category of the preceding header. 

143 

144 :param lines: Source code lines to analyze 

145 :type lines: list[str] 

146 :param config: Category configuration with section headers 

147 :type config: CategoryConfig 

148 :returns: Dict mapping line numbers to category names 

149 :rtype: dict[int, str] 

150 """ 

151 boundaries = {} 

152 current_section = None 

153 

154 # Parse all section headers first 

155 headers = parse_section_headers(lines, config) 

156 header_lines = {line_num: category for category, (line_num, _) in headers.items()} 

157 

158 # Map each line to its section 

159 for line_num in range(len(lines)): 

160 # Check if this line is a section header 

161 if line_num in header_lines: 

162 current_section = header_lines[line_num] 

163 

164 # Assign current section to this line 

165 if current_section: 

166 boundaries[line_num] = current_section 

167 

168 return boundaries 

169 

170 

171def get_expected_section_for_method( # pylint: disable=function-should-be-private 

172 method: nodes.FunctionDef, config: CategoryConfig 

173) -> str: 

174 """Get expected section name for a method based on categorization. 

175 

176 Uses the categorization system to determine which section header a method 

177 should appear under according to the configuration. 

178 

179 :param method: Method node to analyze 

180 :type method: nodes.FunctionDef 

181 :param config: Category configuration 

182 :type config: CategoryConfig 

183 :returns: Expected category/section name for this method 

184 :rtype: str 

185 """ 

186 return categorize_method(method, config) 

187 

188 

189def is_method_in_correct_section( # pylint: disable=function-should-be-private 

190 method: nodes.FunctionDef, 

191 method_line: int, 

192 lines: list[str], 

193 config: CategoryConfig, 

194) -> bool: 

195 """Check if a method is positioned in its correct section. 

196 

197 Validates that a method appears under the appropriate section header 

198 according to its categorization. 

199 

200 :param method: Method node to validate 

201 :type method: nodes.FunctionDef 

202 :param method_line: Line number where method is defined (0-based) 

203 :type method_line: int 

204 :param lines: Source code lines 

205 :type lines: list[str] 

206 :param config: Category configuration 

207 :type config: CategoryConfig 

208 :returns: True if method is in correct section, False otherwise 

209 :rtype: bool 

210 """ 

211 # Get expected section for this method 

212 expected_section = get_expected_section_for_method(method, config) 

213 

214 # Get section boundaries mapping 

215 boundaries = find_method_section_boundaries(lines, config) 

216 

217 # Check if method line has correct section assignment 

218 actual_section = boundaries.get(method_line) 

219 

220 return actual_section == expected_section 

221 

222 

223def parse_section_headers( # pylint: disable=function-should-be-private 

224 lines: list[str], config: CategoryConfig 

225) -> dict[str, tuple[int, str]]: 

226 """Parse existing section headers and map them to categories. 

227 

228 Scans source code lines to find comment lines that match section header 

229 patterns for any of the configured categories. Returns a mapping from 

230 category names to their header line numbers and text. 

231 

232 :param lines: Source code lines to scan for headers 

233 :type lines: list[str] 

234 :param config: Category configuration with header patterns 

235 :type config: CategoryConfig 

236 :returns: Dict mapping category names to (line_number, header_text) tuples 

237 :rtype: dict[str, tuple[int, str]] 

238 """ 

239 headers = {} 

240 

241 for line_num, line in enumerate(lines): 

242 stripped_line = line.strip() 

243 

244 # Skip non-comment lines 

245 if not stripped_line.startswith("#"): 

246 continue 

247 

248 # Check if this line matches any category's section header 

249 for category in config.categories: 

250 if category.section_header and _is_header_match( 

251 stripped_line, category.section_header, config 

252 ): 

253 headers[category.name] = (line_num, stripped_line) 

254 break # Each line can only match one category 

255 

256 return headers 

257 

258 

259def _get_category_match_priority( 

260 func: nodes.FunctionDef, category: MethodCategory 

261) -> int: 

262 """Calculate match priority for a function against a category. 

263 

264 Returns 0 if no match, positive integer if match (higher = better match). 

265 Priority calculation: 

266 - Decorator match: 100 (highest priority - most specific) 

267 - Name pattern match: 50 (medium priority) 

268 - Catch-all pattern (*): 1 (lowest priority - fallback) 

269 

270 :param func: Function definition node to check 

271 :type func: nodes.FunctionDef 

272 :param category: Category to test against 

273 :type category: MethodCategory 

274 :returns: Match priority (0 = no match, >0 = match strength) 

275 :rtype: int 

276 """ 

277 priority = 0 

278 

279 # Check decorator patterns (highest priority) 

280 if category.decorators: 

281 function_decorators = get_decorator_strings(func) 

282 for decorator_pattern in category.decorators: 

283 for func_decorator in function_decorators: 

284 if decorator_matches_pattern(func_decorator, decorator_pattern): 

285 priority = max(priority, 100) 

286 break 

287 

288 # Check name patterns (medium priority) 

289 if category.patterns: 

290 for pattern in category.patterns: 

291 if _method_name_matches_pattern(func.name, pattern): 

292 if pattern == "*": 

293 # Catch-all pattern gets lowest priority 

294 priority = max(priority, 1) 

295 else: 

296 # Specific patterns get medium priority 

297 priority = max(priority, 50) 

298 break 

299 

300 return priority 

301 

302 

303def _is_header_match( 

304 comment_line: str, 

305 header_pattern: str, 

306 config: CategoryConfig, # pylint: disable=unused-argument 

307) -> bool: 

308 """Check if a comment line matches a section header pattern. 

309 

310 Supports flexible matching including case-insensitive comparison and 

311 substring matching for section headers. 

312 

313 :param comment_line: Comment line to check (already stripped) 

314 :type comment_line: str 

315 :param header_pattern: Expected header text pattern 

316 :type header_pattern: str 

317 :param config: Category configuration (for future case sensitivity options) 

318 :type config: CategoryConfig 

319 :returns: True if comment matches header pattern 

320 :rtype: bool 

321 """ 

322 # For now, use case-insensitive exact match 

323 # Future enhancement: Add case sensitivity options to CategoryConfig 

324 return comment_line.lower() == header_pattern.lower() 

325 

326 

327def _method_name_matches_pattern(method_name: str, pattern: str) -> bool: 

328 """Check if a method name matches a glob pattern. 

329 

330 Supports standard glob patterns: 

331 - * matches any sequence of characters 

332 - ? matches any single character 

333 - [seq] matches any character in seq 

334 - [!seq] matches any character not in seq 

335 

336 :param method_name: Method name to check 

337 :type method_name: str 

338 :param pattern: Glob pattern to match against 

339 :type pattern: str 

340 :returns: True if method name matches pattern 

341 :rtype: bool 

342 """ 

343 return fnmatch.fnmatch(method_name, pattern)