Coverage for .tox/py312/lib/python3.12/site-packages/pylint_sort_functions/utils.py: 100%

199 statements  

« prev     ^ index     » next       coverage.py v7.10.2, created at 2025-08-07 04:45 +0200

1"""Utility functions for AST analysis and sorting logic. 

2 

3This module provides the core analysis functions for the pylint-sort-functions plugin. 

4It includes functions for: 

5 

61. Function/method sorting validation 

72. Public/private function separation validation 

83. Function privacy detection (identifying functions that should be private) 

94. Framework-aware sorting with decorator exclusions 

10 

11For detailed information about the sorting algorithm and rules, see the documentation 

12at docs/sorting.rst which explains the complete sorting methodology, special method 

13handling, privacy detection, and configuration options. 

14 

15Function Privacy Detection: 

16The plugin uses import analysis to identify functions that should be private by 

17scanning actual usage patterns across the project: 

18- Analyzes cross-module imports and function calls in all Python files 

19- Identifies functions that are only used within their own module 

20- Skips common public API patterns (main, run, setup, etc.) 

21- Provides accurate detection based on real usage patterns 

22""" 

23 

24import ast 

25import os 

26import re 

27from functools import lru_cache 

28from pathlib import Path 

29 

30from astroid import nodes # type: ignore[import-untyped] 

31 

32# Public functions 

33 

34 

35def are_functions_properly_separated(functions: list[nodes.FunctionDef]) -> bool: 

36 """Check if public and private functions are properly separated. 

37 

38 This function only verifies the ordering constraint: public functions must 

39 appear before private functions. It does not check for section comment headers 

40 like "# Public functions" or "# Private functions" - that would be a separate 

41 validation if implemented. 

42 

43 :param functions: List of function definition nodes 

44 :type functions: list[nodes.FunctionDef] 

45 :returns: True if public functions come before private functions 

46 :rtype: bool 

47 """ 

48 if len(functions) <= 1: 

49 return True 

50 

51 # Track if we've seen any private functions 

52 seen_private = False 

53 

54 for func in functions: 

55 if is_private_function(func): 

56 seen_private = True 

57 elif seen_private: 

58 # Found a public function after a private function 

59 return False 

60 

61 return True 

62 

63 

64def are_functions_sorted(functions: list[nodes.FunctionDef]) -> bool: 

65 """Check if functions are sorted alphabetically within their visibility scope. 

66 

67 Functions are expected to be sorted with: 

68 - Public functions (including dunder methods like __init__) sorted first 

69 - Private functions (single underscore prefix) sorted alphabetically second 

70 

71 Dunder methods are treated as public and will naturally sort to the top due to 

72 the __ prefix (e.g., __init__ comes before add_item). 

73 

74 :param functions: List of function definition nodes 

75 :type functions: list[nodes.FunctionDef] 

76 :returns: True if functions are properly sorted 

77 :rtype: bool 

78 """ 

79 if len(functions) <= 1: 

80 return True 

81 

82 public_functions, private_functions = _get_function_groups(functions) 

83 

84 # Check if public functions are sorted 

85 public_names = [f.name for f in public_functions] 

86 if public_names != sorted(public_names): 

87 return False 

88 

89 # Check if private functions are sorted 

90 private_names = [f.name for f in private_functions] 

91 if private_names != sorted(private_names): 

92 return False 

93 

94 return True 

95 

96 

97def are_functions_sorted_with_exclusions( 

98 functions: list[nodes.FunctionDef], ignore_decorators: list[str] | None = None 

99) -> bool: 

100 """Check if functions are sorted alphabetically, excluding decorator-dependent ones. 

101 

102 This is the enhanced version of are_functions_sorted that supports framework-aware 

103 sorting by excluding functions with specific decorators that create dependencies. 

104 

105 :param functions: List of function definition nodes 

106 :type functions: list[nodes.FunctionDef] 

107 :param ignore_decorators: List of decorator patterns to ignore 

108 :type ignore_decorators: list[str] | None 

109 :returns: True if functions are properly sorted (excluding ignored ones) 

110 :rtype: bool 

111 """ 

112 if ignore_decorators is None: 

113 ignore_decorators = [] 

114 

115 # Filter out functions with excluded decorators 

116 sortable_functions = [ 

117 func 

118 for func in functions 

119 if not function_has_excluded_decorator(func, ignore_decorators) 

120 ] 

121 

122 # Use existing sorting logic on the filtered functions 

123 return are_functions_sorted(sortable_functions) 

124 

125 

126def are_methods_sorted(methods: list[nodes.FunctionDef]) -> bool: 

127 """Check if methods are sorted alphabetically within their visibility scope. 

128 

129 :param methods: List of method definition nodes 

130 :type methods: list[nodes.FunctionDef] 

131 :returns: True if methods are properly sorted 

132 :rtype: bool 

133 """ 

134 # Methods follow the same sorting rules as functions 

135 return are_functions_sorted(methods) 

136 

137 

138def are_methods_sorted_with_exclusions( 

139 methods: list[nodes.FunctionDef], ignore_decorators: list[str] | None = None 

140) -> bool: 

141 """Check if methods are sorted alphabetically, excluding decorator-dependent ones. 

142 

143 :param methods: List of method definition nodes 

144 :type methods: list[nodes.FunctionDef] 

145 :param ignore_decorators: List of decorator patterns to ignore 

146 :type ignore_decorators: list[str] | None 

147 :returns: True if methods are properly sorted (excluding ignored ones) 

148 :rtype: bool 

149 """ 

150 # Methods follow the same sorting rules as functions 

151 return are_functions_sorted_with_exclusions(methods, ignore_decorators) 

152 

153 

154def function_has_excluded_decorator( 

155 func: nodes.FunctionDef, ignore_decorators: list[str] | None 

156) -> bool: 

157 """Check if a function should be excluded from sorting due to its decorators. 

158 

159 Some decorators create dependencies that make alphabetical sorting inappropriate. 

160 For example, Click commands or Flask routes may need specific ordering for proper 

161 framework behavior. 

162 

163 :param func: Function definition node to check 

164 :type func: nodes.FunctionDef 

165 :param ignore_decorators: List of decorator patterns to match against 

166 :type ignore_decorators: list[str] | None 

167 :returns: True if function should be excluded from sorting requirements 

168 :rtype: bool 

169 """ 

170 if not ignore_decorators or not func.decorators: 

171 return False 

172 

173 # Get string representations of all decorators on this function 

174 function_decorators = _get_decorator_strings(func) 

175 

176 # Check if any decorator matches any ignore pattern 

177 for decorator_str in function_decorators: 

178 for ignore_pattern in ignore_decorators: 

179 if _decorator_matches_pattern(decorator_str, ignore_pattern): 

180 return True 

181 

182 return False 

183 

184 

185def get_functions_from_node(node: nodes.Module) -> list[nodes.FunctionDef]: 

186 """Extract all function definitions from a module. 

187 

188 :param node: Module AST node 

189 :type node: nodes.Module 

190 :returns: List of function definition nodes 

191 :rtype: list[nodes.FunctionDef] 

192 """ 

193 functions = [] 

194 for child in node.body: 

195 if isinstance(child, nodes.FunctionDef): 

196 functions.append(child) 

197 return functions 

198 

199 

200def get_methods_from_class(node: nodes.ClassDef) -> list[nodes.FunctionDef]: 

201 """Extract all method definitions from a class. 

202 

203 :param node: Class definition node 

204 :type node: nodes.ClassDef 

205 :returns: List of method definition nodes 

206 :rtype: list[nodes.FunctionDef] 

207 """ 

208 methods = [] 

209 for child in node.body: 

210 if isinstance(child, nodes.FunctionDef): 

211 methods.append(child) 

212 return methods 

213 

214 

215def is_private_function(func: nodes.FunctionDef) -> bool: 

216 """Check if a function is private (starts with underscore). 

217 

218 Functions starting with a single underscore are considered private by convention. 

219 Dunder methods (double underscore) like __init__ are not considered private 

220 as they are special methods with specific meanings in Python. 

221 

222 :param func: Function definition node 

223 :type func: nodes.FunctionDef 

224 :returns: True if function name starts with underscore but not double underscore 

225 :rtype: bool 

226 """ 

227 return func.name.startswith("_") and not _is_dunder_method(func) 

228 

229 

230def should_function_be_private( 

231 func: nodes.FunctionDef, 

232 module_path: Path, 

233 project_root: Path, 

234 public_patterns: set[str] | None = None, 

235) -> bool: 

236 """Detect if a function should be private based on import analysis. 

237 

238 Analyzes actual usage patterns across the project to determine if a function 

239 is only used within its own module and should therefore be made private. 

240 

241 Detection Logic: 

242 1. Skip if already private (starts with underscore) 

243 2. Skip special methods (__init__, __str__, etc.) 

244 3. Skip configurable public API patterns (main, run, setup, etc.) 

245 4. Check if function is imported/used by other modules 

246 5. If not used externally, suggest making it private 

247 

248 :param func: Function definition node to analyze 

249 :type func: nodes.FunctionDef 

250 :param module_path: Path to the module file 

251 :type module_path: Path 

252 :param project_root: Root directory of the project 

253 :type project_root: Path 

254 :param public_patterns: Set of function names to always treat as public. 

255 If None, uses default patterns (main, run, execute, etc.) 

256 :type public_patterns: set[str] | None 

257 :returns: True if the function should be marked as private 

258 :rtype: bool 

259 """ 

260 # Skip if already private 

261 if is_private_function(func): 

262 return False 

263 

264 # Skip special methods (dunder methods) 

265 if _is_dunder_method(func): 

266 return False 

267 

268 # Skip common public API patterns that are called by external systems 

269 # These are entry points, framework callbacks, or conventional APIs that 

270 # won't show up in import analysis (e.g., main() called by Python runtime, 

271 # setup/teardown called by test frameworks) 

272 if public_patterns is None: 

273 public_patterns = { 

274 "main", 

275 "run", 

276 "execute", 

277 "start", 

278 "stop", 

279 "setup", 

280 "teardown", 

281 } 

282 if func.name in public_patterns: 

283 return False 

284 

285 # Check if function is actually used by other modules 

286 is_used_externally = _is_function_used_externally( 

287 func.name, module_path, project_root 

288 ) 

289 

290 # If not used externally, it should probably be private 

291 return not is_used_externally 

292 

293 

294# Private functions 

295 

296 

297@lru_cache(maxsize=1) 

298def _build_cross_module_usage_graph(project_root: Path) -> dict[str, set[str]]: 

299 """Build a graph of which functions are used by which modules. 

300 

301 This creates a mapping from function names to the set of modules that import them. 

302 

303 WARNING: This is an expensive operation that scans the entire project. 

304 Results are cached during the analysis run to avoid redundant scanning. 

305 

306 :param project_root: Root directory of the project 

307 :type project_root: Path 

308 :returns: Dictionary mapping function names to set of importing modules 

309 :rtype: dict[str, set[str]] 

310 """ 

311 usage_graph: dict[str, set[str]] = {} 

312 python_files = _find_python_files(project_root) 

313 

314 for file_path in python_files: 

315 # Get relative module name (e.g., "src/package/module.py" -> "package.module") 

316 try: 

317 relative_path = file_path.relative_to(project_root) 

318 module_name = str(relative_path.with_suffix("")).replace(os.sep, ".") 

319 

320 # Skip __init__ files (they re-export for API organization) 

321 # not actual usage) 

322 # and test files (tests access internals, don't indicate public API) 

323 if module_name.endswith("__init__") or _is_unittest_file(module_name): 

324 continue 

325 

326 # Get file modification time for cache key 

327 try: 

328 file_mtime = file_path.stat().st_mtime 

329 except OSError: # pragma: no cover 

330 # If we can't get mtime, skip this file 

331 continue 

332 

333 _, function_imports, attribute_accesses = _extract_imports_from_file( 

334 file_path, file_mtime 

335 ) 

336 

337 # Record direct function imports 

338 # Example: from utils import calculate_total, validate_input 

339 for _, function_name in function_imports: 

340 if function_name not in usage_graph: 

341 usage_graph[function_name] = set() 

342 usage_graph[function_name].add(module_name) 

343 

344 # Record attribute accesses (module.function calls) 

345 # Example: result = utils.calculate_total(items) 

346 for _, function_name in attribute_accesses: 

347 if function_name not in usage_graph: 

348 usage_graph[function_name] = set() 

349 usage_graph[function_name].add(module_name) 

350 

351 except (ValueError, OSError): 

352 # Skip files that can't be processed 

353 continue 

354 

355 return usage_graph 

356 

357 

358def _decorator_matches_pattern(decorator_str: str, pattern: str) -> bool: 

359 """Check if a decorator string matches an ignore pattern. 

360 

361 Supports exact matches and simple wildcard patterns. This allows users to 

362 exclude functions with specific decorators from sorting requirements when 

363 the decorators create ordering dependencies. 

364 

365 Examples: 

366 - "@app.route" matches both @app.route and @app.route("/path") 

367 - "@*.command" matches @main.command(), @cli.command(), etc. 

368 

369 :param decorator_str: Decorator string to check (e.g., "@main.command()") 

370 :type decorator_str: str 

371 :param pattern: Pattern to match against (e.g., "@main.command", "@*.command") 

372 :type pattern: str 

373 :returns: True if decorator matches the pattern 

374 :rtype: bool 

375 """ 

376 # Normalize patterns by ensuring they start with @ 

377 if not pattern.startswith("@"): 

378 pattern = f"@{pattern}" 

379 

380 # Exact match 

381 if decorator_str == pattern: 

382 return True 

383 

384 # Remove parentheses for pattern matching (treat @main.command() as @main.command) 

385 decorator_base = decorator_str.rstrip("()") 

386 pattern_base = pattern.rstrip("()") 

387 

388 if decorator_base == pattern_base: 

389 return True 

390 

391 # Simple wildcard support: @*.command matches @main.command, @app.command, etc. 

392 if "*" in pattern_base: 

393 # Convert simple wildcard pattern to regex 

394 # First escape the pattern, then replace escaped wildcards with regex 

395 regex_pattern = re.escape(pattern_base) 

396 regex_pattern = regex_pattern.replace(r"\*", r"[^.]+") 

397 regex_pattern = f"^{regex_pattern}$" 

398 if re.match(regex_pattern, decorator_base): 

399 return True 

400 

401 return False 

402 

403 

404def _decorator_node_to_string(decorator: nodes.NodeNG) -> str: 

405 """Convert a decorator AST node to its string representation. 

406 

407 :param decorator: Decorator AST node 

408 :type decorator: nodes.NodeNG 

409 :returns: String representation of the decorator (without @ prefix) 

410 :rtype: str 

411 """ 

412 if isinstance(decorator, nodes.Name): 

413 # Simple decorator: @decorator_name 

414 return str(decorator.name) 

415 

416 if isinstance(decorator, nodes.Attribute): 

417 # Attribute decorator: @obj.method 

418 if isinstance(decorator.expr, nodes.Name): 

419 return f"{decorator.expr.name}.{decorator.attrname}" 

420 # Handle nested attributes: @obj.nested.method 

421 base = _decorator_node_to_string(decorator.expr) 

422 if base: 

423 return f"{base}.{decorator.attrname}" 

424 

425 if isinstance(decorator, nodes.Call): 

426 # Function call decorator: @decorator() or @obj.method(args) 

427 func_str = _decorator_node_to_string(decorator.func) 

428 if func_str: 

429 return f"{func_str}()" 

430 

431 # Fallback for complex decorators - return empty string to skip 

432 return "" 

433 

434 

435def _extract_attribute_accesses( 

436 tree: ast.AST, 

437 imported_modules: dict[str, str], 

438 attribute_accesses: set[tuple[str, str]], 

439) -> None: 

440 """Extract attribute access patterns from AST for import analysis. 

441 

442 Helper function for _extract_imports_from_file to reduce complexity. 

443 

444 :param tree: Parsed AST tree 

445 :type tree: ast.AST 

446 :param imported_modules: Map of aliases to actual module names 

447 :type imported_modules: dict[str, str] 

448 :param attribute_accesses: Set to populate with (module, attribute) tuples 

449 :type attribute_accesses: set[tuple[str, str]] 

450 """ 

451 for node in ast.walk(tree): 

452 if isinstance(node, ast.Attribute): 

453 # Handle: module.function_name or alias.function_name 

454 if isinstance(node.value, ast.Name): 

455 module_alias = node.value.id 

456 if module_alias in imported_modules: 

457 actual_module = imported_modules[module_alias] 

458 attribute_accesses.add((actual_module, node.attr)) 

459 

460 

461@lru_cache(maxsize=128) 

462def _extract_imports_from_file( 

463 file_path: Path, 

464 file_mtime: float, # pylint: disable=unused-argument 

465) -> tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]: 

466 """Extract import information from a Python file. 

467 

468 This function is now cached to prevent redundant parsing of the same files 

469 during a single analysis run. The file modification time is included in the 

470 cache key to ensure cache invalidation when files change. 

471 

472 Performance impact: For projects with 100+ files, this caching can provide 

473 50%+ performance improvement by avoiding repeated AST parsing of the same files. 

474 

475 :param file_path: Path to the Python file to analyze 

476 :type file_path: Path 

477 :param file_mtime: File modification time (used for cache invalidation) 

478 :type file_mtime: float 

479 :returns: Tuple of: 

480 module_imports: Set of module names from direct imports 

481 function_imports: Set of (module, function) tuples from direct imports 

482 attribute_accesses: Set of (module, attribute) tuples from dot notation 

483 :rtype: tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]] 

484 """ 

485 try: 

486 with open(file_path, "r", encoding="utf-8") as f: 

487 content = f.read() 

488 

489 tree = ast.parse(content) 

490 

491 module_imports: set[str] = set() 

492 function_imports: set[tuple[str, str]] = set() 

493 attribute_accesses: set[tuple[str, str]] = set() 

494 

495 # Track module aliases for attribute access detection 

496 imported_modules: dict[str, str] = {} 

497 

498 # First pass: extract direct imports 

499 for node in ast.walk(tree): 

500 if isinstance(node, ast.Import): 

501 # Handle: import module [as alias] 

502 for alias in node.names: 

503 module_name = alias.name 

504 alias_name = alias.asname if alias.asname else alias.name 

505 module_imports.add(module_name) 

506 imported_modules[alias_name] = module_name 

507 

508 elif isinstance(node, ast.ImportFrom): 

509 # Handle: from module import function [as alias] 

510 if node.module: 

511 module_imports.add(node.module) # Add the module itself 

512 for alias in node.names: 

513 function_name = alias.name 

514 alias_name = alias.asname if alias.asname else alias.name 

515 function_imports.add((node.module, function_name)) 

516 # Also track the alias for attribute access detection 

517 imported_modules[alias_name] = node.module 

518 

519 # Second pass: find attribute accesses (module.function calls) 

520 _extract_attribute_accesses(tree, imported_modules, attribute_accesses) 

521 

522 return module_imports, function_imports, attribute_accesses 

523 

524 except (SyntaxError, UnicodeDecodeError, FileNotFoundError): 

525 # If file can't be parsed, return empty sets 

526 return set(), set(), set() 

527 

528 

529def _find_python_files(root_path: Path) -> list[Path]: 

530 """Find all Python files in a project directory. 

531 

532 Recursively searches for files with .py extension while skipping common 

533 directories that should not be analyzed (build artifacts, virtual environments, 

534 caches, etc.). 

535 

536 TODO: Make skip_dirs list configurable for project-specific needs. 

537 

538 :param root_path: Root directory to search for Python files 

539 :type root_path: Path 

540 :returns: List of paths to Python files 

541 :rtype: list[Path] 

542 """ 

543 python_files = [] 

544 

545 # Directories to skip 

546 skip_dirs = { 

547 "__pycache__", 

548 ".git", 

549 ".tox", 

550 ".pytest_cache", 

551 ".mypy_cache", 

552 "venv", 

553 ".venv", 

554 "env", 

555 ".env", 

556 "build", 

557 "dist", 

558 "*.egg-info", 

559 "node_modules", 

560 } 

561 

562 for item in root_path.rglob("*.py"): 

563 # Skip if any parent directory should be skipped 

564 if any(skip_dir in item.parts for skip_dir in skip_dirs): 

565 continue 

566 

567 python_files.append(item) 

568 

569 return python_files 

570 

571 

572def _get_decorator_strings(func: nodes.FunctionDef) -> list[str]: 

573 """Extract string representations of all decorators on a function. 

574 

575 :param func: Function definition node 

576 :type func: nodes.FunctionDef 

577 :returns: List of decorator strings (e.g., ["@main.command()", "@app.route()"]) 

578 :rtype: list[str] 

579 """ 

580 if not func.decorators: 

581 return [] 

582 

583 decorator_strings = [] 

584 for decorator in func.decorators.nodes: 

585 decorator_str = _decorator_node_to_string(decorator) 

586 if decorator_str: 

587 decorator_strings.append(f"@{decorator_str}") 

588 

589 return decorator_strings 

590 

591 

592def _get_function_groups( 

593 functions: list[nodes.FunctionDef], 

594) -> tuple[list[nodes.FunctionDef], list[nodes.FunctionDef]]: 

595 """Split functions into public and private groups. 

596 

597 :param functions: List of function definitions 

598 :type functions: list[nodes.FunctionDef] 

599 :returns: Tuple of (public_functions, private_functions) 

600 :rtype: tuple[list[nodes.FunctionDef], list[nodes.FunctionDef]] 

601 """ 

602 public_functions = [f for f in functions if not is_private_function(f)] 

603 private_functions = [f for f in functions if is_private_function(f)] 

604 return public_functions, private_functions 

605 

606 

607def _is_dunder_method(func: nodes.FunctionDef) -> bool: 

608 """Check if a function is a dunder/magic method. 

609 

610 Dunder methods are special methods that start and end with double underscores, 

611 like __init__, __str__, __call__, etc. 

612 

613 :param func: Function definition node 

614 :type func: nodes.FunctionDef 

615 :returns: True if function is a dunder method 

616 :rtype: bool 

617 """ 

618 name: str = func.name # Explicitly typed to satisfy mypy 

619 return name.startswith("__") and name.endswith("__") 

620 

621 

622def _is_function_used_externally( 

623 func_name: str, module_path: Path, project_root: Path 

624) -> bool: 

625 """Check if a function is imported/used by other modules. 

626 

627 This is the core logic for privacy detection. If a function is only used 

628 within its own module, it's a candidate for being marked as private. 

629 

630 WARNING: This builds the entire cross-module usage graph which can be 

631 expensive for large projects. The graph is cached via @lru_cache to 

632 mitigate repeated scanning. 

633 

634 :param func_name: Name of the function to check 

635 :type func_name: str 

636 :param module_path: Path to the module containing the function 

637 :type module_path: Path 

638 :param project_root: Root directory of the project 

639 :type project_root: Path 

640 :returns: True if function is used by other modules, False if only used internally 

641 :rtype: bool 

642 """ 

643 usage_graph = _build_cross_module_usage_graph(project_root) 

644 

645 if func_name not in usage_graph: 

646 return False 

647 

648 # Get the module name of the function being checked 

649 try: 

650 relative_path = module_path.relative_to(project_root) 

651 current_module = str(relative_path.with_suffix("")).replace(os.sep, ".") 

652 except ValueError: 

653 # If we can't determine the module name, assume it's used externally 

654 return True 

655 

656 # Check if function is used by any module other than its own 

657 using_modules = usage_graph[func_name] 

658 external_usage = [m for m in using_modules if m != current_module] 

659 

660 return len(external_usage) > 0 

661 

662 

663def _is_unittest_file(module_name: str) -> bool: 

664 """Check if a module name indicates a unit test file. 

665 

666 TODO: Improve detection beyond simple string matching: 

667 - Check for specific test directory patterns (tests/, test/) 

668 - Look for common test file patterns (test_*.py, *_test.py) 

669 - Consider checking file content for test frameworks 

670 

671 :param module_name: The module name to check 

672 :type module_name: str 

673 :returns: True if module appears to be a test file 

674 :rtype: bool 

675 """ 

676 return "test" in module_name.lower()