Coverage for src/pylint_sort_functions/utils/categorization.py: 100%
87 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-12 16:06 +0200
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-12 16:06 +0200
1"""Method categorization system for flexible sorting organization.
3This module provides the categorization framework that allows methods to be
4organized into multiple categories (properties, test methods, etc.) instead
5of just the binary public/private distinction.
6"""
8import fnmatch
9from dataclasses import dataclass, field
11from astroid import nodes # type: ignore[import-untyped]
13from .ast_analysis import is_private_function
14from .decorators import decorator_matches_pattern, get_decorator_strings
17@dataclass
18class MethodCategory:
19 """Configuration for a method category in the sorting system.
21 Defines how methods are categorized based on patterns, decorators, and other
22 criteria. Categories determine the sorting order and section organization.
24 :param name: Unique identifier for this category (e.g., 'properties')
25 :type name: str
26 :param patterns: List of glob patterns to match names (e.g., ['test_*'])
27 :type patterns: list[str]
28 :param decorators: List of decorator patterns (e.g., ['@property'])
29 :type decorators: list[str]
30 :param priority: Priority for conflict resolution, higher values win
31 :type priority: int
32 :param section_header: Comment header text for this category
33 :type section_header: str
34 """
36 name: str
37 patterns: list[str] = field(default_factory=list)
38 decorators: list[str] = field(default_factory=list)
39 priority: int = 0
40 section_header: str = ""
43@dataclass
44class CategoryConfig:
45 """Configuration for the method categorization system.
47 Defines the complete categorization scheme including all categories,
48 default behavior, and compatibility settings.
50 :param categories: List of method categories in sorting order
51 :type categories: list[MethodCategory]
52 :param default_category: Category name for methods that do not match patterns
53 :type default_category: str
54 :param enable_categories: Enable multi-category system (false = backward
55 compatibility)
56 :type enable_categories: bool
57 :param category_sorting: How to sort within categories ('alphabetical' or
58 'declaration')
59 :type category_sorting: str
60 """
62 categories: list[MethodCategory] = field(default_factory=list)
63 default_category: str = "public_methods"
64 enable_categories: bool = False # Backward compatibility - disabled by default
65 category_sorting: str = "alphabetical" # or "declaration" to preserve order
67 def __post_init__(self) -> None:
68 """Initialize with default binary categories if none provided."""
69 if not self.categories:
70 self.categories = self._get_default_categories()
72 def _get_default_categories(self) -> list[MethodCategory]:
73 """Get default binary public/private categories for backward compatibility.
75 :returns: List of default method categories (public, private)
76 :rtype: list[MethodCategory]
77 """
78 return [
79 MethodCategory(
80 name="public_methods",
81 patterns=["*"], # Catch-all for non-private methods
82 section_header="# Public methods",
83 ),
84 MethodCategory(
85 name="private_methods",
86 patterns=["_*"], # Methods starting with underscore
87 priority=1, # Higher priority than public catch-all
88 section_header="# Private methods",
89 ),
90 ]
93def categorize_method( # pylint: disable=function-should-be-private
94 func: nodes.FunctionDef, config: CategoryConfig | None = None
95) -> str:
96 """Determine the category for a method based on configuration patterns.
98 This replaces the binary is_private_function() with a flexible categorization
99 system that supports multiple method types (properties, test methods, etc.).
101 When enable_categories=False (default), provides backward compatible behavior
102 by returning 'public_methods' or 'private_methods' based on naming convention.
104 :param func: Function definition node to categorize
105 :type func: nodes.FunctionDef
106 :param config: Category configuration, uses default if None
107 :type config: CategoryConfig | None
108 :returns: Category name for the method (e.g., 'properties', 'test_methods')
109 :rtype: str
110 """
111 if config is None:
112 config = CategoryConfig()
114 # For backward compatibility, when categories disabled, use original logic
115 if not config.enable_categories:
116 return "private_methods" if is_private_function(func) else "public_methods"
118 # Find matching categories, prioritizing higher priority values
119 matching_categories: list[tuple[MethodCategory, int]] = []
121 for category in config.categories:
122 match_priority = _get_category_match_priority(func, category)
123 if match_priority > 0:
124 matching_categories.append((category, match_priority))
126 if not matching_categories:
127 # No matches found, use default category
128 return config.default_category
130 # Sort by priority (higher first), then by category priority field
131 matching_categories.sort(key=lambda x: (x[1], x[0].priority), reverse=True)
133 return matching_categories[0][0].name
136def find_method_section_boundaries( # pylint: disable=function-should-be-private
137 lines: list[str], config: CategoryConfig
138) -> dict[int, str]:
139 """Map line numbers to their section categories based on headers.
141 Creates a mapping from line numbers to category names, where each line
142 between section headers belongs to the category of the preceding header.
144 :param lines: Source code lines to analyze
145 :type lines: list[str]
146 :param config: Category configuration with section headers
147 :type config: CategoryConfig
148 :returns: Dict mapping line numbers to category names
149 :rtype: dict[int, str]
150 """
151 boundaries = {}
152 current_section = None
154 # Parse all section headers first
155 headers = parse_section_headers(lines, config)
156 header_lines = {line_num: category for category, (line_num, _) in headers.items()}
158 # Map each line to its section
159 for line_num in range(len(lines)):
160 # Check if this line is a section header
161 if line_num in header_lines:
162 current_section = header_lines[line_num]
164 # Assign current section to this line
165 if current_section:
166 boundaries[line_num] = current_section
168 return boundaries
171def get_expected_section_for_method( # pylint: disable=function-should-be-private
172 method: nodes.FunctionDef, config: CategoryConfig
173) -> str:
174 """Get expected section name for a method based on categorization.
176 Uses the categorization system to determine which section header a method
177 should appear under according to the configuration.
179 :param method: Method node to analyze
180 :type method: nodes.FunctionDef
181 :param config: Category configuration
182 :type config: CategoryConfig
183 :returns: Expected category/section name for this method
184 :rtype: str
185 """
186 return categorize_method(method, config)
189def is_method_in_correct_section( # pylint: disable=function-should-be-private
190 method: nodes.FunctionDef,
191 method_line: int,
192 lines: list[str],
193 config: CategoryConfig,
194) -> bool:
195 """Check if a method is positioned in its correct section.
197 Validates that a method appears under the appropriate section header
198 according to its categorization.
200 :param method: Method node to validate
201 :type method: nodes.FunctionDef
202 :param method_line: Line number where method is defined (0-based)
203 :type method_line: int
204 :param lines: Source code lines
205 :type lines: list[str]
206 :param config: Category configuration
207 :type config: CategoryConfig
208 :returns: True if method is in correct section, False otherwise
209 :rtype: bool
210 """
211 # Get expected section for this method
212 expected_section = get_expected_section_for_method(method, config)
214 # Get section boundaries mapping
215 boundaries = find_method_section_boundaries(lines, config)
217 # Check if method line has correct section assignment
218 actual_section = boundaries.get(method_line)
220 return actual_section == expected_section
223def parse_section_headers( # pylint: disable=function-should-be-private
224 lines: list[str], config: CategoryConfig
225) -> dict[str, tuple[int, str]]:
226 """Parse existing section headers and map them to categories.
228 Scans source code lines to find comment lines that match section header
229 patterns for any of the configured categories. Returns a mapping from
230 category names to their header line numbers and text.
232 :param lines: Source code lines to scan for headers
233 :type lines: list[str]
234 :param config: Category configuration with header patterns
235 :type config: CategoryConfig
236 :returns: Dict mapping category names to (line_number, header_text) tuples
237 :rtype: dict[str, tuple[int, str]]
238 """
239 headers = {}
241 for line_num, line in enumerate(lines):
242 stripped_line = line.strip()
244 # Skip non-comment lines
245 if not stripped_line.startswith("#"):
246 continue
248 # Check if this line matches any category's section header
249 for category in config.categories:
250 if category.section_header and _is_header_match(
251 stripped_line, category.section_header, config
252 ):
253 headers[category.name] = (line_num, stripped_line)
254 break # Each line can only match one category
256 return headers
259def _get_category_match_priority(
260 func: nodes.FunctionDef, category: MethodCategory
261) -> int:
262 """Calculate match priority for a function against a category.
264 Returns 0 if no match, positive integer if match (higher = better match).
265 Priority calculation:
266 - Decorator match: 100 (highest priority - most specific)
267 - Name pattern match: 50 (medium priority)
268 - Catch-all pattern (*): 1 (lowest priority - fallback)
270 :param func: Function definition node to check
271 :type func: nodes.FunctionDef
272 :param category: Category to test against
273 :type category: MethodCategory
274 :returns: Match priority (0 = no match, >0 = match strength)
275 :rtype: int
276 """
277 priority = 0
279 # Check decorator patterns (highest priority)
280 if category.decorators:
281 function_decorators = get_decorator_strings(func)
282 for decorator_pattern in category.decorators:
283 for func_decorator in function_decorators:
284 if decorator_matches_pattern(func_decorator, decorator_pattern):
285 priority = max(priority, 100)
286 break
288 # Check name patterns (medium priority)
289 if category.patterns:
290 for pattern in category.patterns:
291 if _method_name_matches_pattern(func.name, pattern):
292 if pattern == "*":
293 # Catch-all pattern gets lowest priority
294 priority = max(priority, 1)
295 else:
296 # Specific patterns get medium priority
297 priority = max(priority, 50)
298 break
300 return priority
303def _is_header_match(
304 comment_line: str,
305 header_pattern: str,
306 config: CategoryConfig, # pylint: disable=unused-argument
307) -> bool:
308 """Check if a comment line matches a section header pattern.
310 Supports flexible matching including case-insensitive comparison and
311 substring matching for section headers.
313 :param comment_line: Comment line to check (already stripped)
314 :type comment_line: str
315 :param header_pattern: Expected header text pattern
316 :type header_pattern: str
317 :param config: Category configuration (for future case sensitivity options)
318 :type config: CategoryConfig
319 :returns: True if comment matches header pattern
320 :rtype: bool
321 """
322 # For now, use case-insensitive exact match
323 # Future enhancement: Add case sensitivity options to CategoryConfig
324 return comment_line.lower() == header_pattern.lower()
327def _method_name_matches_pattern(method_name: str, pattern: str) -> bool:
328 """Check if a method name matches a glob pattern.
330 Supports standard glob patterns:
331 - * matches any sequence of characters
332 - ? matches any single character
333 - [seq] matches any character in seq
334 - [!seq] matches any character not in seq
336 :param method_name: Method name to check
337 :type method_name: str
338 :param pattern: Glob pattern to match against
339 :type pattern: str
340 :returns: True if method name matches pattern
341 :rtype: bool
342 """
343 return fnmatch.fnmatch(method_name, pattern)