Coverage for src / infra / git_utils.py: 25%

40 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2026-01-04 04:43 +0000

1"""Git utility functions for mala. 

2 

3Provides helpers for getting git repository information. 

4""" 

5 

6import logging 

7import re 

8from pathlib import Path 

9 

10from src.infra.tools.command_runner import CommandRunner, run_command_async 

11 

12logger = logging.getLogger(__name__) 

13 

14# Default timeout for git commands (seconds) 

15DEFAULT_GIT_TIMEOUT = 5.0 

16 

17 

18async def get_git_commit_async(cwd: Path, timeout: float = DEFAULT_GIT_TIMEOUT) -> str: 

19 """Get the current git commit hash (short) - async version.""" 

20 result = await run_command_async( 

21 ["git", "rev-parse", "--short", "HEAD"], 

22 cwd=cwd, 

23 timeout_seconds=timeout, 

24 ) 

25 if result.ok: 

26 return result.stdout.strip() 

27 return "" 

28 

29 

30async def get_git_branch_async(cwd: Path, timeout: float = DEFAULT_GIT_TIMEOUT) -> str: 

31 """Get the current git branch name - async version.""" 

32 result = await run_command_async( 

33 ["git", "rev-parse", "--abbrev-ref", "HEAD"], 

34 cwd=cwd, 

35 timeout_seconds=timeout, 

36 ) 

37 if result.ok: 

38 return result.stdout.strip() 

39 return "" 

40 

41 

42async def get_baseline_for_issue( 

43 repo_path: Path, issue_id: str, timeout: float = DEFAULT_GIT_TIMEOUT 

44) -> str | None: 

45 """Get the baseline commit for an issue from git history. 

46 

47 Finds the first commit with "bd-{issue_id}:" prefix and returns its parent. 

48 This allows accurate cumulative diff calculation across resumed sessions. 

49 

50 Args: 

51 repo_path: Path to the git repository. 

52 issue_id: The issue ID (e.g., "mala-123"). 

53 timeout: Timeout in seconds for git operations. 

54 

55 Returns: 

56 The commit hash of the parent of the first issue commit, or None if: 

57 - No commits exist for this issue (fresh issue) 

58 - The first commit is the root commit (no parent) 

59 - Git commands fail or timeout 

60 """ 

61 runner = CommandRunner(cwd=repo_path, timeout_seconds=timeout) 

62 

63 # Find first commit with "bd-{issue_id}:" prefix 

64 # Using --reverse to get chronological order (oldest first) 

65 # Escape regex metacharacters in issue_id to avoid matching wrong issues 

66 # (e.g., "mala-g3h.1" should not match "mala-g3hX1") 

67 escaped_issue_id = re.escape(issue_id) 

68 log_result = await runner.run_async( 

69 [ 

70 "git", 

71 "log", 

72 "--oneline", 

73 "--reverse", 

74 f"--grep=^bd-{escaped_issue_id}:", 

75 ], 

76 ) 

77 

78 if not log_result.ok or not log_result.stdout.strip(): 

79 return None # No commits for this issue 

80 

81 # Get first commit hash (first line, first word) 

82 first_line = log_result.stdout.strip().split("\n")[0] 

83 first_commit = first_line.split()[0] 

84 

85 # Get parent of first commit 

86 parent_result = await runner.run_async( 

87 ["git", "rev-parse", f"{first_commit}^"], 

88 ) 

89 

90 if not parent_result.ok: 

91 return None # Root commit (no parent) 

92 

93 baseline = parent_result.stdout.strip() 

94 logger.debug("Baseline resolved: issue_id=%s commit=%s", issue_id, baseline) 

95 return baseline 

96 

97 

98async def get_issue_commits_async( 

99 repo_path: Path, 

100 issue_id: str, 

101 *, 

102 since_timestamp: int | None = None, 

103 timeout: float = DEFAULT_GIT_TIMEOUT, 

104) -> list[str]: 

105 """Get commit SHAs for an issue, optionally filtered by timestamp. 

106 

107 Finds commits with "bd-{issue_id}:" prefix, ordered oldest -> newest. 

108 

109 Args: 

110 repo_path: Path to the git repository. 

111 issue_id: The issue ID (e.g., "mala-123"). 

112 since_timestamp: Optional Unix timestamp (seconds). If provided, 

113 only commits after this time are returned. 

114 timeout: Timeout in seconds for git operations. 

115 

116 Returns: 

117 List of commit SHAs (full length). Empty if none found or git fails. 

118 """ 

119 runner = CommandRunner(cwd=repo_path, timeout_seconds=timeout) 

120 escaped_issue_id = re.escape(issue_id) 

121 

122 cmd = [ 

123 "git", 

124 "log", 

125 "--format=%H", 

126 "--reverse", 

127 f"--grep=^bd-{escaped_issue_id}:", 

128 ] 

129 if since_timestamp is not None and since_timestamp > 0: 

130 cmd.append(f"--since=@{since_timestamp}") 

131 

132 log_result = await runner.run_async(cmd) 

133 if not log_result.ok: 

134 return [] 

135 

136 return [line.strip() for line in log_result.stdout.splitlines() if line.strip()]