Coverage for call_graph / parsers / python.py: 90%

124 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-08 15:04 -0800

1"""Python call graph parser.""" 

2 

3from __future__ import annotations 

4 

5from tree_sitter import Node 

6 

7from call_graph.parsers.base import CallGraphParser 

8from models import FunctionDefinition, CallEdge 

9from patterns.common import create_query, run_captures 

10from parsers.python import PY_LANGUAGE 

11 

12 

13class PythonCallParser(CallGraphParser): 

14 def extract_function_definitions(self, root, source, filepath): 

15 query_str = """(function_definition name: (identifier) @func_name) 

16 (function_definition parameters: (parameters (identifier) @param)) 

17 (class_definition name: (identifier) @class_name)""" 

18 

19 query = create_query(PY_LANGUAGE, query_str) 

20 captures = run_captures(query, root) 

21 

22 names = [] 

23 params = [] 

24 classes = [] 

25 name_to_params = {} 

26 

27 for node, capture_name in captures: 

28 text = node.text.decode() if node.text else "" 

29 line = node.start_point[0] + 1 

30 if capture_name == "class_name": 

31 classes.append((text, line)) 

32 elif capture_name == "func_name": 

33 names.append((text, line)) 

34 name_to_params[(text, line)] = [] 

35 elif capture_name == "param": 

36 if names: 

37 names[-1] 

38 params.append((text, line)) 

39 

40 source_lines = source.split("\n") 

41 

42 for param, param_line in params: 

43 best_name = None 

44 min_gap = float("inf") 

45 

46 for name, name_line in names: 

47 if name_line <= param_line <= name_line + 5: 

48 gap = param_line - name_line 

49 if gap < min_gap: 

50 min_gap = gap 

51 best_name = (name, name_line) 

52 

53 if best_name and best_name in name_to_params: 

54 name_to_params[best_name].append(param) 

55 

56 defs = [] 

57 for name, name_line in names: 

58 def_type = "function" 

59 class_name = None 

60 

61 for cls, cls_line in classes: 

62 if cls_line == name_line - 1: 

63 def_type = "method" 

64 class_name = cls 

65 break 

66 

67 func_def = FunctionDefinition( 

68 name=name, 

69 filepath=str(filepath), 

70 line=name_line, 

71 def_type=def_type, 

72 class_name=class_name, 

73 params=name_to_params.get((name, name_line), []), 

74 ) 

75 defs.append(func_def) 

76 

77 return defs 

78 

79 def extract_calls(self, root, source, filepath, all_functions): 

80 source_lines = source.split("\n") 

81 all_func_map = {(f.name, f.filepath): f for f in all_functions} 

82 

83 query_str = """(call) @call""" 

84 query = create_query(PY_LANGUAGE, query_str) 

85 captures = run_captures(query, root) 

86 

87 calls = [] 

88 for node, _ in captures: 

89 call_site_line = node.start_point[0] + 1 

90 

91 caller_info = self._get_caller_info( 

92 node, source_lines, all_func_map, filepath 

93 ) 

94 if not caller_info: 

95 continue 

96 

97 callee_name, receiver, is_method = self._get_callee_info(node) 

98 

99 call_type = "local" 

100 if receiver: 

101 call_type = "method" 

102 elif callee_name not in [f.name for f in all_functions]: 

103 call_type = "external" 

104 

105 call_edge = CallEdge( 

106 caller_file=caller_info["filepath"], 

107 caller_function=caller_info["name"], 

108 caller_line=caller_info["line"], 

109 callee_file=None, 

110 callee_function=callee_name, 

111 callee_line=None, 

112 call_site_line=call_site_line, 

113 call_type=call_type, 

114 receiver_object=receiver, 

115 ) 

116 calls.append(call_edge) 

117 

118 return calls 

119 

120 def _get_caller_info(self, call_node, source_lines, all_func_map, filepath): 

121 line_num = call_node.start_point[0] + 1 

122 filepath_str = str(filepath) 

123 

124 for func_key, func_def in all_func_map.items(): 

125 if func_def.filepath != filepath_str: 

126 continue 

127 if line_num >= func_def.line and line_num <= self._get_func_end_line( 

128 func_def.line, source_lines 

129 ): 

130 return { 

131 "name": func_def.name, 

132 "filepath": func_def.filepath, 

133 "line": func_def.line, 

134 } 

135 

136 return None 

137 

138 def _get_func_end_line(self, start_line, source_lines): 

139 for i in range(start_line, len(source_lines)): 

140 line = source_lines[i] 

141 if self._is_top_level_def(line): 

142 return i 

143 return len(source_lines) 

144 

145 def _is_top_level_def(self, line): 

146 stripped = line.lstrip() 

147 if stripped.startswith("def ") and not stripped.startswith(" "): 

148 return True 

149 if stripped.startswith("class ") and not stripped.startswith(" "): 

150 return True 

151 return False 

152 

153 def _get_callee_info(self, call_node): 

154 for child in call_node.children: 

155 if child.type == "identifier": 

156 return child.text.decode(), None, False 

157 if child.type == "attribute": 

158 return self._get_attribute_callee(child) 

159 if child.type == "member": 

160 if child.parent.type == "call": 

161 return self._get_member_callee(child) 

162 return "<unknown>", None, False 

163 

164 def _get_member_callee(self, member_node): 

165 member_text = member_node.text.decode() 

166 return member_text, None, False 

167 

168 def _get_attribute_callee(self, attr_node): 

169 parts = [] 

170 

171 def extract_parts(node): 

172 if node.type == "identifier": 

173 parts.append(node.text.decode()) 

174 return 

175 if node.type == "attribute": 

176 for c in node.children: 

177 if c.type != ".": 

178 extract_parts(c) 

179 

180 extract_parts(attr_node) 

181 

182 if not parts: 

183 return "<unknown>", None, False 

184 if len(parts) >= 2: 

185 receiver = ".".join(parts[:-1]) 

186 return parts[-1], receiver, True 

187 return parts[0], None, False