Coverage for .tox/py312/lib/python3.12/site-packages/pylint_sort_functions/utils.py: 100%
199 statements
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-07 04:45 +0200
« prev ^ index » next coverage.py v7.10.2, created at 2025-08-07 04:45 +0200
1"""Utility functions for AST analysis and sorting logic.
3This module provides the core analysis functions for the pylint-sort-functions plugin.
4It includes functions for:
61. Function/method sorting validation
72. Public/private function separation validation
83. Function privacy detection (identifying functions that should be private)
94. Framework-aware sorting with decorator exclusions
11For detailed information about the sorting algorithm and rules, see the documentation
12at docs/sorting.rst which explains the complete sorting methodology, special method
13handling, privacy detection, and configuration options.
15Function Privacy Detection:
16The plugin uses import analysis to identify functions that should be private by
17scanning actual usage patterns across the project:
18- Analyzes cross-module imports and function calls in all Python files
19- Identifies functions that are only used within their own module
20- Skips common public API patterns (main, run, setup, etc.)
21- Provides accurate detection based on real usage patterns
22"""
24import ast
25import os
26import re
27from functools import lru_cache
28from pathlib import Path
30from astroid import nodes # type: ignore[import-untyped]
32# Public functions
35def are_functions_properly_separated(functions: list[nodes.FunctionDef]) -> bool:
36 """Check if public and private functions are properly separated.
38 This function only verifies the ordering constraint: public functions must
39 appear before private functions. It does not check for section comment headers
40 like "# Public functions" or "# Private functions" - that would be a separate
41 validation if implemented.
43 :param functions: List of function definition nodes
44 :type functions: list[nodes.FunctionDef]
45 :returns: True if public functions come before private functions
46 :rtype: bool
47 """
48 if len(functions) <= 1:
49 return True
51 # Track if we've seen any private functions
52 seen_private = False
54 for func in functions:
55 if is_private_function(func):
56 seen_private = True
57 elif seen_private:
58 # Found a public function after a private function
59 return False
61 return True
64def are_functions_sorted(functions: list[nodes.FunctionDef]) -> bool:
65 """Check if functions are sorted alphabetically within their visibility scope.
67 Functions are expected to be sorted with:
68 - Public functions (including dunder methods like __init__) sorted first
69 - Private functions (single underscore prefix) sorted alphabetically second
71 Dunder methods are treated as public and will naturally sort to the top due to
72 the __ prefix (e.g., __init__ comes before add_item).
74 :param functions: List of function definition nodes
75 :type functions: list[nodes.FunctionDef]
76 :returns: True if functions are properly sorted
77 :rtype: bool
78 """
79 if len(functions) <= 1:
80 return True
82 public_functions, private_functions = _get_function_groups(functions)
84 # Check if public functions are sorted
85 public_names = [f.name for f in public_functions]
86 if public_names != sorted(public_names):
87 return False
89 # Check if private functions are sorted
90 private_names = [f.name for f in private_functions]
91 if private_names != sorted(private_names):
92 return False
94 return True
97def are_functions_sorted_with_exclusions(
98 functions: list[nodes.FunctionDef], ignore_decorators: list[str] | None = None
99) -> bool:
100 """Check if functions are sorted alphabetically, excluding decorator-dependent ones.
102 This is the enhanced version of are_functions_sorted that supports framework-aware
103 sorting by excluding functions with specific decorators that create dependencies.
105 :param functions: List of function definition nodes
106 :type functions: list[nodes.FunctionDef]
107 :param ignore_decorators: List of decorator patterns to ignore
108 :type ignore_decorators: list[str] | None
109 :returns: True if functions are properly sorted (excluding ignored ones)
110 :rtype: bool
111 """
112 if ignore_decorators is None:
113 ignore_decorators = []
115 # Filter out functions with excluded decorators
116 sortable_functions = [
117 func
118 for func in functions
119 if not function_has_excluded_decorator(func, ignore_decorators)
120 ]
122 # Use existing sorting logic on the filtered functions
123 return are_functions_sorted(sortable_functions)
126def are_methods_sorted(methods: list[nodes.FunctionDef]) -> bool:
127 """Check if methods are sorted alphabetically within their visibility scope.
129 :param methods: List of method definition nodes
130 :type methods: list[nodes.FunctionDef]
131 :returns: True if methods are properly sorted
132 :rtype: bool
133 """
134 # Methods follow the same sorting rules as functions
135 return are_functions_sorted(methods)
138def are_methods_sorted_with_exclusions(
139 methods: list[nodes.FunctionDef], ignore_decorators: list[str] | None = None
140) -> bool:
141 """Check if methods are sorted alphabetically, excluding decorator-dependent ones.
143 :param methods: List of method definition nodes
144 :type methods: list[nodes.FunctionDef]
145 :param ignore_decorators: List of decorator patterns to ignore
146 :type ignore_decorators: list[str] | None
147 :returns: True if methods are properly sorted (excluding ignored ones)
148 :rtype: bool
149 """
150 # Methods follow the same sorting rules as functions
151 return are_functions_sorted_with_exclusions(methods, ignore_decorators)
154def function_has_excluded_decorator(
155 func: nodes.FunctionDef, ignore_decorators: list[str] | None
156) -> bool:
157 """Check if a function should be excluded from sorting due to its decorators.
159 Some decorators create dependencies that make alphabetical sorting inappropriate.
160 For example, Click commands or Flask routes may need specific ordering for proper
161 framework behavior.
163 :param func: Function definition node to check
164 :type func: nodes.FunctionDef
165 :param ignore_decorators: List of decorator patterns to match against
166 :type ignore_decorators: list[str] | None
167 :returns: True if function should be excluded from sorting requirements
168 :rtype: bool
169 """
170 if not ignore_decorators or not func.decorators:
171 return False
173 # Get string representations of all decorators on this function
174 function_decorators = _get_decorator_strings(func)
176 # Check if any decorator matches any ignore pattern
177 for decorator_str in function_decorators:
178 for ignore_pattern in ignore_decorators:
179 if _decorator_matches_pattern(decorator_str, ignore_pattern):
180 return True
182 return False
185def get_functions_from_node(node: nodes.Module) -> list[nodes.FunctionDef]:
186 """Extract all function definitions from a module.
188 :param node: Module AST node
189 :type node: nodes.Module
190 :returns: List of function definition nodes
191 :rtype: list[nodes.FunctionDef]
192 """
193 functions = []
194 for child in node.body:
195 if isinstance(child, nodes.FunctionDef):
196 functions.append(child)
197 return functions
200def get_methods_from_class(node: nodes.ClassDef) -> list[nodes.FunctionDef]:
201 """Extract all method definitions from a class.
203 :param node: Class definition node
204 :type node: nodes.ClassDef
205 :returns: List of method definition nodes
206 :rtype: list[nodes.FunctionDef]
207 """
208 methods = []
209 for child in node.body:
210 if isinstance(child, nodes.FunctionDef):
211 methods.append(child)
212 return methods
215def is_private_function(func: nodes.FunctionDef) -> bool:
216 """Check if a function is private (starts with underscore).
218 Functions starting with a single underscore are considered private by convention.
219 Dunder methods (double underscore) like __init__ are not considered private
220 as they are special methods with specific meanings in Python.
222 :param func: Function definition node
223 :type func: nodes.FunctionDef
224 :returns: True if function name starts with underscore but not double underscore
225 :rtype: bool
226 """
227 return func.name.startswith("_") and not _is_dunder_method(func)
230def should_function_be_private(
231 func: nodes.FunctionDef,
232 module_path: Path,
233 project_root: Path,
234 public_patterns: set[str] | None = None,
235) -> bool:
236 """Detect if a function should be private based on import analysis.
238 Analyzes actual usage patterns across the project to determine if a function
239 is only used within its own module and should therefore be made private.
241 Detection Logic:
242 1. Skip if already private (starts with underscore)
243 2. Skip special methods (__init__, __str__, etc.)
244 3. Skip configurable public API patterns (main, run, setup, etc.)
245 4. Check if function is imported/used by other modules
246 5. If not used externally, suggest making it private
248 :param func: Function definition node to analyze
249 :type func: nodes.FunctionDef
250 :param module_path: Path to the module file
251 :type module_path: Path
252 :param project_root: Root directory of the project
253 :type project_root: Path
254 :param public_patterns: Set of function names to always treat as public.
255 If None, uses default patterns (main, run, execute, etc.)
256 :type public_patterns: set[str] | None
257 :returns: True if the function should be marked as private
258 :rtype: bool
259 """
260 # Skip if already private
261 if is_private_function(func):
262 return False
264 # Skip special methods (dunder methods)
265 if _is_dunder_method(func):
266 return False
268 # Skip common public API patterns that are called by external systems
269 # These are entry points, framework callbacks, or conventional APIs that
270 # won't show up in import analysis (e.g., main() called by Python runtime,
271 # setup/teardown called by test frameworks)
272 if public_patterns is None:
273 public_patterns = {
274 "main",
275 "run",
276 "execute",
277 "start",
278 "stop",
279 "setup",
280 "teardown",
281 }
282 if func.name in public_patterns:
283 return False
285 # Check if function is actually used by other modules
286 is_used_externally = _is_function_used_externally(
287 func.name, module_path, project_root
288 )
290 # If not used externally, it should probably be private
291 return not is_used_externally
294# Private functions
297@lru_cache(maxsize=1)
298def _build_cross_module_usage_graph(project_root: Path) -> dict[str, set[str]]:
299 """Build a graph of which functions are used by which modules.
301 This creates a mapping from function names to the set of modules that import them.
303 WARNING: This is an expensive operation that scans the entire project.
304 Results are cached during the analysis run to avoid redundant scanning.
306 :param project_root: Root directory of the project
307 :type project_root: Path
308 :returns: Dictionary mapping function names to set of importing modules
309 :rtype: dict[str, set[str]]
310 """
311 usage_graph: dict[str, set[str]] = {}
312 python_files = _find_python_files(project_root)
314 for file_path in python_files:
315 # Get relative module name (e.g., "src/package/module.py" -> "package.module")
316 try:
317 relative_path = file_path.relative_to(project_root)
318 module_name = str(relative_path.with_suffix("")).replace(os.sep, ".")
320 # Skip __init__ files (they re-export for API organization)
321 # not actual usage)
322 # and test files (tests access internals, don't indicate public API)
323 if module_name.endswith("__init__") or _is_unittest_file(module_name):
324 continue
326 # Get file modification time for cache key
327 try:
328 file_mtime = file_path.stat().st_mtime
329 except OSError: # pragma: no cover
330 # If we can't get mtime, skip this file
331 continue
333 _, function_imports, attribute_accesses = _extract_imports_from_file(
334 file_path, file_mtime
335 )
337 # Record direct function imports
338 # Example: from utils import calculate_total, validate_input
339 for _, function_name in function_imports:
340 if function_name not in usage_graph:
341 usage_graph[function_name] = set()
342 usage_graph[function_name].add(module_name)
344 # Record attribute accesses (module.function calls)
345 # Example: result = utils.calculate_total(items)
346 for _, function_name in attribute_accesses:
347 if function_name not in usage_graph:
348 usage_graph[function_name] = set()
349 usage_graph[function_name].add(module_name)
351 except (ValueError, OSError):
352 # Skip files that can't be processed
353 continue
355 return usage_graph
358def _decorator_matches_pattern(decorator_str: str, pattern: str) -> bool:
359 """Check if a decorator string matches an ignore pattern.
361 Supports exact matches and simple wildcard patterns. This allows users to
362 exclude functions with specific decorators from sorting requirements when
363 the decorators create ordering dependencies.
365 Examples:
366 - "@app.route" matches both @app.route and @app.route("/path")
367 - "@*.command" matches @main.command(), @cli.command(), etc.
369 :param decorator_str: Decorator string to check (e.g., "@main.command()")
370 :type decorator_str: str
371 :param pattern: Pattern to match against (e.g., "@main.command", "@*.command")
372 :type pattern: str
373 :returns: True if decorator matches the pattern
374 :rtype: bool
375 """
376 # Normalize patterns by ensuring they start with @
377 if not pattern.startswith("@"):
378 pattern = f"@{pattern}"
380 # Exact match
381 if decorator_str == pattern:
382 return True
384 # Remove parentheses for pattern matching (treat @main.command() as @main.command)
385 decorator_base = decorator_str.rstrip("()")
386 pattern_base = pattern.rstrip("()")
388 if decorator_base == pattern_base:
389 return True
391 # Simple wildcard support: @*.command matches @main.command, @app.command, etc.
392 if "*" in pattern_base:
393 # Convert simple wildcard pattern to regex
394 # First escape the pattern, then replace escaped wildcards with regex
395 regex_pattern = re.escape(pattern_base)
396 regex_pattern = regex_pattern.replace(r"\*", r"[^.]+")
397 regex_pattern = f"^{regex_pattern}$"
398 if re.match(regex_pattern, decorator_base):
399 return True
401 return False
404def _decorator_node_to_string(decorator: nodes.NodeNG) -> str:
405 """Convert a decorator AST node to its string representation.
407 :param decorator: Decorator AST node
408 :type decorator: nodes.NodeNG
409 :returns: String representation of the decorator (without @ prefix)
410 :rtype: str
411 """
412 if isinstance(decorator, nodes.Name):
413 # Simple decorator: @decorator_name
414 return str(decorator.name)
416 if isinstance(decorator, nodes.Attribute):
417 # Attribute decorator: @obj.method
418 if isinstance(decorator.expr, nodes.Name):
419 return f"{decorator.expr.name}.{decorator.attrname}"
420 # Handle nested attributes: @obj.nested.method
421 base = _decorator_node_to_string(decorator.expr)
422 if base:
423 return f"{base}.{decorator.attrname}"
425 if isinstance(decorator, nodes.Call):
426 # Function call decorator: @decorator() or @obj.method(args)
427 func_str = _decorator_node_to_string(decorator.func)
428 if func_str:
429 return f"{func_str}()"
431 # Fallback for complex decorators - return empty string to skip
432 return ""
435def _extract_attribute_accesses(
436 tree: ast.AST,
437 imported_modules: dict[str, str],
438 attribute_accesses: set[tuple[str, str]],
439) -> None:
440 """Extract attribute access patterns from AST for import analysis.
442 Helper function for _extract_imports_from_file to reduce complexity.
444 :param tree: Parsed AST tree
445 :type tree: ast.AST
446 :param imported_modules: Map of aliases to actual module names
447 :type imported_modules: dict[str, str]
448 :param attribute_accesses: Set to populate with (module, attribute) tuples
449 :type attribute_accesses: set[tuple[str, str]]
450 """
451 for node in ast.walk(tree):
452 if isinstance(node, ast.Attribute):
453 # Handle: module.function_name or alias.function_name
454 if isinstance(node.value, ast.Name):
455 module_alias = node.value.id
456 if module_alias in imported_modules:
457 actual_module = imported_modules[module_alias]
458 attribute_accesses.add((actual_module, node.attr))
461@lru_cache(maxsize=128)
462def _extract_imports_from_file(
463 file_path: Path,
464 file_mtime: float, # pylint: disable=unused-argument
465) -> tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]:
466 """Extract import information from a Python file.
468 This function is now cached to prevent redundant parsing of the same files
469 during a single analysis run. The file modification time is included in the
470 cache key to ensure cache invalidation when files change.
472 Performance impact: For projects with 100+ files, this caching can provide
473 50%+ performance improvement by avoiding repeated AST parsing of the same files.
475 :param file_path: Path to the Python file to analyze
476 :type file_path: Path
477 :param file_mtime: File modification time (used for cache invalidation)
478 :type file_mtime: float
479 :returns: Tuple of:
480 module_imports: Set of module names from direct imports
481 function_imports: Set of (module, function) tuples from direct imports
482 attribute_accesses: Set of (module, attribute) tuples from dot notation
483 :rtype: tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]
484 """
485 try:
486 with open(file_path, "r", encoding="utf-8") as f:
487 content = f.read()
489 tree = ast.parse(content)
491 module_imports: set[str] = set()
492 function_imports: set[tuple[str, str]] = set()
493 attribute_accesses: set[tuple[str, str]] = set()
495 # Track module aliases for attribute access detection
496 imported_modules: dict[str, str] = {}
498 # First pass: extract direct imports
499 for node in ast.walk(tree):
500 if isinstance(node, ast.Import):
501 # Handle: import module [as alias]
502 for alias in node.names:
503 module_name = alias.name
504 alias_name = alias.asname if alias.asname else alias.name
505 module_imports.add(module_name)
506 imported_modules[alias_name] = module_name
508 elif isinstance(node, ast.ImportFrom):
509 # Handle: from module import function [as alias]
510 if node.module:
511 module_imports.add(node.module) # Add the module itself
512 for alias in node.names:
513 function_name = alias.name
514 alias_name = alias.asname if alias.asname else alias.name
515 function_imports.add((node.module, function_name))
516 # Also track the alias for attribute access detection
517 imported_modules[alias_name] = node.module
519 # Second pass: find attribute accesses (module.function calls)
520 _extract_attribute_accesses(tree, imported_modules, attribute_accesses)
522 return module_imports, function_imports, attribute_accesses
524 except (SyntaxError, UnicodeDecodeError, FileNotFoundError):
525 # If file can't be parsed, return empty sets
526 return set(), set(), set()
529def _find_python_files(root_path: Path) -> list[Path]:
530 """Find all Python files in a project directory.
532 Recursively searches for files with .py extension while skipping common
533 directories that should not be analyzed (build artifacts, virtual environments,
534 caches, etc.).
536 TODO: Make skip_dirs list configurable for project-specific needs.
538 :param root_path: Root directory to search for Python files
539 :type root_path: Path
540 :returns: List of paths to Python files
541 :rtype: list[Path]
542 """
543 python_files = []
545 # Directories to skip
546 skip_dirs = {
547 "__pycache__",
548 ".git",
549 ".tox",
550 ".pytest_cache",
551 ".mypy_cache",
552 "venv",
553 ".venv",
554 "env",
555 ".env",
556 "build",
557 "dist",
558 "*.egg-info",
559 "node_modules",
560 }
562 for item in root_path.rglob("*.py"):
563 # Skip if any parent directory should be skipped
564 if any(skip_dir in item.parts for skip_dir in skip_dirs):
565 continue
567 python_files.append(item)
569 return python_files
572def _get_decorator_strings(func: nodes.FunctionDef) -> list[str]:
573 """Extract string representations of all decorators on a function.
575 :param func: Function definition node
576 :type func: nodes.FunctionDef
577 :returns: List of decorator strings (e.g., ["@main.command()", "@app.route()"])
578 :rtype: list[str]
579 """
580 if not func.decorators:
581 return []
583 decorator_strings = []
584 for decorator in func.decorators.nodes:
585 decorator_str = _decorator_node_to_string(decorator)
586 if decorator_str:
587 decorator_strings.append(f"@{decorator_str}")
589 return decorator_strings
592def _get_function_groups(
593 functions: list[nodes.FunctionDef],
594) -> tuple[list[nodes.FunctionDef], list[nodes.FunctionDef]]:
595 """Split functions into public and private groups.
597 :param functions: List of function definitions
598 :type functions: list[nodes.FunctionDef]
599 :returns: Tuple of (public_functions, private_functions)
600 :rtype: tuple[list[nodes.FunctionDef], list[nodes.FunctionDef]]
601 """
602 public_functions = [f for f in functions if not is_private_function(f)]
603 private_functions = [f for f in functions if is_private_function(f)]
604 return public_functions, private_functions
607def _is_dunder_method(func: nodes.FunctionDef) -> bool:
608 """Check if a function is a dunder/magic method.
610 Dunder methods are special methods that start and end with double underscores,
611 like __init__, __str__, __call__, etc.
613 :param func: Function definition node
614 :type func: nodes.FunctionDef
615 :returns: True if function is a dunder method
616 :rtype: bool
617 """
618 name: str = func.name # Explicitly typed to satisfy mypy
619 return name.startswith("__") and name.endswith("__")
622def _is_function_used_externally(
623 func_name: str, module_path: Path, project_root: Path
624) -> bool:
625 """Check if a function is imported/used by other modules.
627 This is the core logic for privacy detection. If a function is only used
628 within its own module, it's a candidate for being marked as private.
630 WARNING: This builds the entire cross-module usage graph which can be
631 expensive for large projects. The graph is cached via @lru_cache to
632 mitigate repeated scanning.
634 :param func_name: Name of the function to check
635 :type func_name: str
636 :param module_path: Path to the module containing the function
637 :type module_path: Path
638 :param project_root: Root directory of the project
639 :type project_root: Path
640 :returns: True if function is used by other modules, False if only used internally
641 :rtype: bool
642 """
643 usage_graph = _build_cross_module_usage_graph(project_root)
645 if func_name not in usage_graph:
646 return False
648 # Get the module name of the function being checked
649 try:
650 relative_path = module_path.relative_to(project_root)
651 current_module = str(relative_path.with_suffix("")).replace(os.sep, ".")
652 except ValueError:
653 # If we can't determine the module name, assume it's used externally
654 return True
656 # Check if function is used by any module other than its own
657 using_modules = usage_graph[func_name]
658 external_usage = [m for m in using_modules if m != current_module]
660 return len(external_usage) > 0
663def _is_unittest_file(module_name: str) -> bool:
664 """Check if a module name indicates a unit test file.
666 TODO: Improve detection beyond simple string matching:
667 - Check for specific test directory patterns (tests/, test/)
668 - Look for common test file patterns (test_*.py, *_test.py)
669 - Consider checking file content for test frameworks
671 :param module_name: The module name to check
672 :type module_name: str
673 :returns: True if module appears to be a test file
674 :rtype: bool
675 """
676 return "test" in module_name.lower()