Coverage for src/pylint_sort_functions/utils/privacy.py: 100%

96 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-08-12 16:06 +0200

1"""Privacy analysis for detecting functions that should be private or public. 

2 

3This module provides functionality to analyze function usage patterns across 

4a project to detect functions that should be marked as private (only used 

5internally) or public (used by other modules). 

6""" 

7 

8import ast 

9import os 

10from functools import lru_cache 

11from pathlib import Path 

12from typing import Any 

13 

14from astroid import nodes # type: ignore[import-untyped] 

15 

16from .ast_analysis import is_dunder_method, is_private_function 

17from .file_patterns import find_python_files, is_unittest_file 

18 

19 

20def should_function_be_private( 

21 func: nodes.FunctionDef, 

22 module_path: Path, 

23 project_root: Path, 

24 public_patterns: set[str] | None = None, 

25 privacy_config: dict[str, Any] | None = None, 

26) -> bool: 

27 """Detect if a function should be private based on import analysis. 

28 

29 Analyzes actual usage patterns across the project to determine if a function 

30 is only used within its own module and should therefore be made private. 

31 

32 Detection Logic: 

33 1. Skip if already private (starts with underscore) 

34 2. Skip special methods (__init__, __str__, etc.) 

35 3. Skip configurable public API patterns (main, run, setup, etc.) 

36 4. Check if function is imported/used by other modules 

37 5. If not used externally, suggest making it private 

38 

39 :param func: Function definition node to analyze 

40 :type func: nodes.FunctionDef 

41 :param module_path: Path to the module file 

42 :type module_path: Path 

43 :param project_root: Root directory of the project 

44 :type project_root: Path 

45 :param public_patterns: Set of function names to always treat as public. 

46 If None, uses default patterns (main, run, execute, etc.) 

47 :type public_patterns: set[str] | None 

48 :returns: True if the function should be marked as private 

49 :rtype: bool 

50 """ 

51 # Skip if already private 

52 if is_private_function(func): 

53 return False 

54 

55 # Skip special methods (dunder methods) 

56 if is_dunder_method(func): 

57 return False 

58 

59 # Skip common public API patterns that are called by external systems 

60 # These are entry points, framework callbacks, or conventional APIs that 

61 # will not show up in import analysis (e.g., main() called by Python runtime, 

62 # setup/teardown called by test frameworks) 

63 if public_patterns is None: 

64 public_patterns = { 

65 "main", 

66 "run", 

67 "execute", 

68 "start", 

69 "stop", 

70 "setup", 

71 "teardown", 

72 } 

73 if func.name in public_patterns: 

74 return False 

75 

76 # Check if function is actually used by other modules 

77 is_used_externally = _is_function_used_externally( 

78 func.name, module_path, project_root, privacy_config 

79 ) 

80 

81 # If not used externally, it should probably be private 

82 return not is_used_externally 

83 

84 

85def should_function_be_public( 

86 func: nodes.FunctionDef, 

87 module_path: Path, 

88 project_root: Path, 

89 privacy_config: dict[str, Any] | None = None, 

90) -> bool: 

91 """Detect if a private function should be public based on external usage analysis. 

92 

93 Analyzes actual usage patterns across the project to determine if a function 

94 that is currently marked as private is actually used by other modules and 

95 should therefore be made public. 

96 

97 Detection Logic: 

98 1. Skip if already public (does not start with underscore) 

99 2. Skip special methods (dunder methods like __init__, __str__, etc.) 

100 3. Check if the private function is imported/used by other modules 

101 4. If used externally, suggest making it public 

102 

103 :param func: Function definition node to analyze 

104 :type func: nodes.FunctionDef 

105 :param module_path: Path to the module file 

106 :type module_path: Path 

107 :param project_root: Root directory of the project 

108 :type project_root: Path 

109 :returns: True if the function should be made public 

110 :rtype: bool 

111 """ 

112 # Skip if already public (does not start with underscore) 

113 if not is_private_function(func): 

114 return False 

115 

116 # Skip special methods (dunder methods like __init__, __str__, etc.) 

117 # Note: This check is defensive - current logic means dunder methods 

118 # are never considered private by is_private_function above 

119 if is_dunder_method(func): # pragma: no cover 

120 return False # pragma: no cover 

121 

122 # Check if this private function is actually used by other modules 

123 is_used_externally = _is_function_used_externally( 

124 func.name, module_path, project_root, privacy_config 

125 ) 

126 

127 # If used externally, it should be public 

128 return is_used_externally 

129 

130 

131def _build_cross_module_usage_graph( 

132 project_root: Path, privacy_config: dict[str, Any] | None = None 

133) -> dict[str, set[str]]: 

134 """Build a graph of which functions are used by which modules. 

135 

136 This creates a mapping from function names to the set of modules that import them. 

137 

138 WARNING: This is an expensive operation that scans the entire project. 

139 Results are cached during the analysis run to avoid redundant scanning. 

140 

141 :param project_root: Root directory of the project 

142 :type project_root: Path 

143 :returns: Dictionary mapping function names to set of importing modules 

144 :rtype: dict[str, set[str]] 

145 """ 

146 usage_graph: dict[str, set[str]] = {} 

147 python_files = find_python_files(project_root) 

148 

149 for file_path in python_files: 

150 # Get relative module name (e.g., "src/package/module.py" -> "package.module") 

151 try: 

152 relative_path = file_path.relative_to(project_root) 

153 module_name = str(relative_path.with_suffix("")).replace(os.sep, ".") 

154 

155 # Skip __init__ files (they re-export for API organization) 

156 # not actual usage) 

157 # and test files (tests access internals, do not indicate public API) 

158 if module_name.endswith("__init__") or is_unittest_file( 

159 module_name, privacy_config 

160 ): 

161 continue 

162 

163 # Get file modification time for cache key 

164 try: 

165 file_mtime = file_path.stat().st_mtime 

166 except OSError: # pragma: no cover 

167 # If we cannot get mtime, skip this file 

168 continue 

169 

170 _, function_imports, attribute_accesses = _extract_imports_from_file( 

171 file_path, file_mtime 

172 ) 

173 

174 # Record direct function imports 

175 # Example: from utils import calculate_total, validate_input 

176 for _, function_name in function_imports: 

177 if function_name not in usage_graph: 

178 usage_graph[function_name] = set() 

179 usage_graph[function_name].add(module_name) 

180 

181 # Record attribute accesses (module.function calls) 

182 # Example: result = utils.calculate_total(items) 

183 for _, function_name in attribute_accesses: 

184 if function_name not in usage_graph: 

185 usage_graph[function_name] = set() 

186 usage_graph[function_name].add(module_name) 

187 

188 except (ValueError, OSError): 

189 # Skip files that cannot be processed 

190 continue 

191 

192 return usage_graph 

193 

194 

195def _extract_attribute_accesses( 

196 tree: ast.AST, 

197 imported_modules: dict[str, str], 

198 attribute_accesses: set[tuple[str, str]], 

199) -> None: 

200 """Extract attribute access patterns from AST for import analysis. 

201 

202 Helper function for _extract_imports_from_file to reduce complexity. 

203 

204 :param tree: Parsed AST tree 

205 :type tree: ast.AST 

206 :param imported_modules: Map of aliases to actual module names 

207 :type imported_modules: dict[str, str] 

208 :param attribute_accesses: Set to populate with (module, attribute) tuples 

209 :type attribute_accesses: set[tuple[str, str]] 

210 """ 

211 for node in ast.walk(tree): 

212 if isinstance(node, ast.Attribute): 

213 # Handle: module.function_name or alias.function_name 

214 if isinstance(node.value, ast.Name): 

215 module_alias = node.value.id 

216 if module_alias in imported_modules: 

217 actual_module = imported_modules[module_alias] 

218 attribute_accesses.add((actual_module, node.attr)) 

219 

220 

221@lru_cache(maxsize=128) 

222def _extract_imports_from_file( 

223 file_path: Path, 

224 file_mtime: float, # pylint: disable=unused-argument 

225) -> tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]]: 

226 """Extract import information from a Python file. 

227 

228 This function is now cached to prevent redundant parsing of the same files 

229 during a single analysis run. The file modification time is included in the 

230 cache key to ensure cache invalidation when files change. 

231 

232 Performance impact: For projects with 100+ files, this caching can provide 

233 50%+ performance improvement by avoiding repeated AST parsing of the same files. 

234 

235 :param file_path: Path to the Python file to analyze 

236 :type file_path: Path 

237 :param file_mtime: File modification time (used for cache invalidation) 

238 :type file_mtime: float 

239 :returns: Tuple of: 

240 module_imports: Set of module names from direct imports 

241 function_imports: Set of (module, function) tuples from direct imports 

242 attribute_accesses: Set of (module, attribute) tuples from dot notation 

243 :rtype: tuple[set[str], set[tuple[str, str]], set[tuple[str, str]]] 

244 """ 

245 try: 

246 with open(file_path, "r", encoding="utf-8") as f: 

247 content = f.read() 

248 

249 tree = ast.parse(content) 

250 

251 module_imports: set[str] = set() 

252 function_imports: set[tuple[str, str]] = set() 

253 attribute_accesses: set[tuple[str, str]] = set() 

254 

255 # Track module aliases for attribute access detection 

256 imported_modules: dict[str, str] = {} 

257 

258 # First pass: extract direct imports 

259 for node in ast.walk(tree): 

260 if isinstance(node, ast.Import): 

261 # Handle: import module [as alias] 

262 for alias in node.names: 

263 module_name = alias.name 

264 alias_name = alias.asname if alias.asname else alias.name 

265 module_imports.add(module_name) 

266 imported_modules[alias_name] = module_name 

267 

268 elif isinstance(node, ast.ImportFrom): 

269 # Handle: from module import function [as alias] 

270 if node.module: 

271 module_imports.add(node.module) # Add the module itself 

272 for alias in node.names: 

273 function_name = alias.name 

274 alias_name = alias.asname if alias.asname else alias.name 

275 function_imports.add((node.module, function_name)) 

276 # Also track the alias for attribute access detection 

277 imported_modules[alias_name] = node.module 

278 

279 # Second pass: find attribute accesses (module.function calls) 

280 _extract_attribute_accesses(tree, imported_modules, attribute_accesses) 

281 

282 return module_imports, function_imports, attribute_accesses 

283 

284 except (SyntaxError, UnicodeDecodeError, FileNotFoundError): 

285 # If file cannot be parsed, return empty sets 

286 return set(), set(), set() 

287 

288 

289def _is_function_used_externally( 

290 func_name: str, 

291 module_path: Path, 

292 project_root: Path, 

293 privacy_config: dict[str, Any] | None = None, 

294) -> bool: 

295 """Check if a function is imported/used by other modules. 

296 

297 This is the core logic for privacy detection. If a function is only used 

298 within its own module, it is a candidate for being marked as private. 

299 

300 WARNING: This builds the entire cross-module usage graph which can be 

301 expensive for large projects. The graph is cached via @lru_cache to 

302 mitigate repeated scanning. 

303 

304 :param func_name: Name of the function to check 

305 :type func_name: str 

306 :param module_path: Path to the module containing the function 

307 :type module_path: Path 

308 :param project_root: Root directory of the project 

309 :type project_root: Path 

310 :returns: True if function is used by other modules, False if only used internally 

311 :rtype: bool 

312 """ 

313 usage_graph = _build_cross_module_usage_graph(project_root, privacy_config) 

314 

315 if func_name not in usage_graph: 

316 return False 

317 

318 # Get the module name of the function being checked 

319 try: 

320 relative_path = module_path.relative_to(project_root) 

321 current_module = str(relative_path.with_suffix("")).replace(os.sep, ".") 

322 except ValueError: 

323 # If we cannot determine the module name, assume it is used externally 

324 return True 

325 

326 # Check if function is used by any module other than its own 

327 using_modules = usage_graph[func_name] 

328 external_usage = [m for m in using_modules if m != current_module] 

329 

330 return len(external_usage) > 0