Coverage for src/pylint_sort_functions/utils/privacy.py: 100%
96 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-12 16:06 +0200
« prev ^ index » next coverage.py v7.10.1, created at 2025-08-12 16:06 +0200
1"""Privacy analysis for detecting functions that should be private or public.
3This module provides functionality to analyze function usage patterns across
4a project to detect functions that should be marked as private (only used
5internally) or public (used by other modules).
6"""
8import ast
9import os
10from functools import lru_cache
11from pathlib import Path
12from typing import Any
14from astroid import nodes # type: ignore[import-untyped]
16from .ast_analysis import is_dunder_method, is_private_function
17from .file_patterns import find_python_files, is_unittest_file
20def should_function_be_private(
21 func: nodes.FunctionDef,
22 module_path: Path,
23 project_root: Path,
24 public_patterns: set[str] | None = None,
25 privacy_config: dict[str, Any] | None = None,
26) -> bool:
27 """Detect if a function should be private based on import analysis.
29 Analyzes actual usage patterns across the project to determine if a function
30 is only used within its own module and should therefore be made private.
32 Detection Logic:
33 1. Skip if already private (starts with underscore)
34 2. Skip special methods (__init__, __str__, etc.)
35 3. Skip configurable public API patterns (main, run, setup, etc.)
36 4. Check if function is imported/used by other modules
37 5. If not used externally, suggest making it private
39 :param func: Function definition node to analyze
40 :type func: nodes.FunctionDef
41 :param module_path: Path to the module file
42 :type module_path: Path
43 :param project_root: Root directory of the project
44 :type project_root: Path
45 :param public_patterns: Set of function names to always treat as public.
46 If None, uses default patterns (main, run, execute, etc.)
47 :type public_patterns: set[str] | None
48 :returns: True if the function should be marked as private
49 :rtype: bool
50 """
51 # Skip if already private
52 if is_private_function(func):
53 return False
55 # Skip special methods (dunder methods)
56 if is_dunder_method(func):
57 return False
59 # Skip common public API patterns that are called by external systems
60 # These are entry points, framework callbacks, or conventional APIs that
61 # will not show up in import analysis (e.g., main() called by Python runtime,
62 # setup/teardown called by test frameworks)
63 if public_patterns is None:
64 public_patterns = {
65 "main",
66 "run",
67 "execute",
68 "start",
69 "stop",
70 "setup",
71 "teardown",
72 }
73 if func.name in public_patterns:
74 return False
76 # Check if function is actually used by other modules
77 is_used_externally = _is_function_used_externally(
78 func.name, module_path, project_root, privacy_config
79 )
81 # If not used externally, it should probably be private
82 return not is_used_externally
85def should_function_be_public(
86 func: nodes.FunctionDef,
87 module_path: Path,
88 project_root: Path,
89 privacy_config: dict[str, Any] | None = None,
90) -> bool:
91 """Detect if a private function should be public based on external usage analysis.
93 Analyzes actual usage patterns across the project to determine if a function
94 that is currently marked as private is actually used by other modules and
95 should therefore be made public.
97 Detection Logic:
98 1. Skip if already public (does not start with underscore)
99 2. Skip special methods (dunder methods like __init__, __str__, etc.)
100 3. Check if the private function is imported/used by other modules
101 4. If used externally, suggest making it public
103 :param func: Function definition node to analyze
104 :type func: nodes.FunctionDef
105 :param module_path: Path to the module file
106 :type module_path: Path
107 :param project_root: Root directory of the project
108 :type project_root: Path
109 :returns: True if the function should be made public
110 :rtype: bool
111 """
112 # Skip if already public (does not start with underscore)
113 if not is_private_function(func):
114 return False
116 # Skip special methods (dunder methods like __init__, __str__, etc.)
117 # Note: This check is defensive - current logic means dunder methods
118 # are never considered private by is_private_function above
119 if is_dunder_method(func): # pragma: no cover
120 return False # pragma: no cover
122 # Check if this private function is actually used by other modules
123 is_used_externally = _is_function_used_externally(
124 func.name, module_path, project_root, privacy_config
125 )
127 # If used externally, it should be public
128 return is_used_externally
131def _build_cross_module_usage_graph(
132 project_root: Path, privacy_config: dict[str, Any] | None = None
133) -> dict[str, set[str]]:
134 """Build a graph of which functions are used by which modules.
136 This creates a mapping from function names to the set of modules that import them.
138 WARNING: This is an expensive operation that scans the entire project.
139 Results are cached during the analysis run to avoid redundant scanning.
141 :param project_root: Root directory of the project
142 :type project_root: Path
143 :returns: Dictionary mapping function names to set of importing modules
144 :rtype: dict[str, set[str]]
145 """
146 usage_graph: dict[str, set[str]] = {}
147 python_files = find_python_files(project_root)
149 for file_path in python_files:
150 # Get relative module name (e.g., "src/package/module.py" -> "package.module")
151 try:
152 relative_path = file_path.relative_to(project_root)
153 module_name = str(relative_path.with_suffix("")).replace(os.sep, ".")
155 # Skip __init__ files (they re-export for API organization)
156 # not actual usage)
157 # and test files (tests access internals, do not indicate public API)
158 if module_name.endswith("__init__") or is_unittest_file(
159 module_name, privacy_config
160 ):
161 continue
163 # Get file modification time for cache key
164 try:
165 file_mtime = file_path.stat().st_mtime
166 except OSError: # pragma: no cover
167 # If we cannot get mtime, skip this file
168 continue
170 _, function_imports, attribute_accesses = _extract_imports_from_file(
171 file_path, file_mtime
172 )
174 # Record direct function imports
175 # Example: from utils import calculate_total, validate_input
176 for _, function_name in function_imports:
177 if function_name not in usage_graph:
178 usage_graph[function_name] = set()
179 usage_graph[function_name].add(module_name)
181 # Record attribute accesses (module.function calls)
182 # Example: result = utils.calculate_total(items)
183 for _, function_name in attribute_accesses:
184 if function_name not in usage_graph:
185 usage_graph[function_name] = set()
186 usage_graph[function_name].add(module_name)
188 except (ValueError, OSError):
189 # Skip files that cannot be processed
190 continue
192 return usage_graph
195def _extract_attribute_accesses(
196 tree: ast.AST,
197 imported_modules: dict[str, str],
198 attribute_accesses: set[tuple[str, str]],
199) -> None:
200 """Extract attribute access patterns from AST for import analysis.
202 Helper function for _extract_imports_from_file to reduce complexity.
204 :param tree: Parsed AST tree
205 :type tree: ast.AST
206 :param imported_modules: Map of aliases to actual module names
207 :type imported_modules: dict[str, str]
208 :param attribute_accesses: Set to populate with (module, attribute) tuples
209 :type attribute_accesses: set[tuple[str, str]]
210 """
211 for node in ast.walk(tree):
212 if isinstance(node, ast.Attribute):
213 # Handle: module.function_name or alias.function_name
214 if isinstance(node.value, ast.Name):
215 module_alias = node.value.id
216 if module_alias in imported_modules:
217 actual_module = imported_modules[module_alias]
218 attribute_accesses.add((actual_module, node.attr))
221@lru_cache(maxsize=128)
222def _extract_imports_from_file(
223 file_path: Path,
224 file_mtime: float, # pylint: disable=unused-argument
225) -> tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]:
226 """Extract import information from a Python file.
228 This function is now cached to prevent redundant parsing of the same files
229 during a single analysis run. The file modification time is included in the
230 cache key to ensure cache invalidation when files change.
232 Performance impact: For projects with 100+ files, this caching can provide
233 50%+ performance improvement by avoiding repeated AST parsing of the same files.
235 :param file_path: Path to the Python file to analyze
236 :type file_path: Path
237 :param file_mtime: File modification time (used for cache invalidation)
238 :type file_mtime: float
239 :returns: Tuple of:
240 module_imports: Set of module names from direct imports
241 function_imports: Set of (module, function) tuples from direct imports
242 attribute_accesses: Set of (module, attribute) tuples from dot notation
243 :rtype: tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]
244 """
245 try:
246 with open(file_path, "r", encoding="utf-8") as f:
247 content = f.read()
249 tree = ast.parse(content)
251 module_imports: set[str] = set()
252 function_imports: set[tuple[str, str]] = set()
253 attribute_accesses: set[tuple[str, str]] = set()
255 # Track module aliases for attribute access detection
256 imported_modules: dict[str, str] = {}
258 # First pass: extract direct imports
259 for node in ast.walk(tree):
260 if isinstance(node, ast.Import):
261 # Handle: import module [as alias]
262 for alias in node.names:
263 module_name = alias.name
264 alias_name = alias.asname if alias.asname else alias.name
265 module_imports.add(module_name)
266 imported_modules[alias_name] = module_name
268 elif isinstance(node, ast.ImportFrom):
269 # Handle: from module import function [as alias]
270 if node.module:
271 module_imports.add(node.module) # Add the module itself
272 for alias in node.names:
273 function_name = alias.name
274 alias_name = alias.asname if alias.asname else alias.name
275 function_imports.add((node.module, function_name))
276 # Also track the alias for attribute access detection
277 imported_modules[alias_name] = node.module
279 # Second pass: find attribute accesses (module.function calls)
280 _extract_attribute_accesses(tree, imported_modules, attribute_accesses)
282 return module_imports, function_imports, attribute_accesses
284 except (SyntaxError, UnicodeDecodeError, FileNotFoundError):
285 # If file cannot be parsed, return empty sets
286 return set(), set(), set()
289def _is_function_used_externally(
290 func_name: str,
291 module_path: Path,
292 project_root: Path,
293 privacy_config: dict[str, Any] | None = None,
294) -> bool:
295 """Check if a function is imported/used by other modules.
297 This is the core logic for privacy detection. If a function is only used
298 within its own module, it is a candidate for being marked as private.
300 WARNING: This builds the entire cross-module usage graph which can be
301 expensive for large projects. The graph is cached via @lru_cache to
302 mitigate repeated scanning.
304 :param func_name: Name of the function to check
305 :type func_name: str
306 :param module_path: Path to the module containing the function
307 :type module_path: Path
308 :param project_root: Root directory of the project
309 :type project_root: Path
310 :returns: True if function is used by other modules, False if only used internally
311 :rtype: bool
312 """
313 usage_graph = _build_cross_module_usage_graph(project_root, privacy_config)
315 if func_name not in usage_graph:
316 return False
318 # Get the module name of the function being checked
319 try:
320 relative_path = module_path.relative_to(project_root)
321 current_module = str(relative_path.with_suffix("")).replace(os.sep, ".")
322 except ValueError:
323 # If we cannot determine the module name, assume it is used externally
324 return True
326 # Check if function is used by any module other than its own
327 using_modules = usage_graph[func_name]
328 external_usage = [m for m in using_modules if m != current_module]
330 return len(external_usage) > 0