Coverage for src / invariant / hashing.py: 100.00%

42 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 10:21 +0100

1"""Recursive hashing utilities for manifests and cacheable objects.""" 

2 

3import hashlib 

4from collections.abc import Mapping, Sequence 

5from decimal import Decimal 

6from typing import Any 

7 

8from invariant.protocol import ICacheable 

9 

10 

11def hash_value(value: Any) -> str: 

12 """Recursively hash a value to produce a deterministic SHA-256 hash. 

13 

14 Supports: 

15 - ICacheable objects: Uses their get_stable_hash() method 

16 - dict/Mapping: Sorts keys, recursively hashes values 

17 - list/Sequence: Recursively hashes each element 

18 - int, str: Direct hashing 

19 - Decimal: Canonicalized to string then hashed 

20 - None: Special hash value 

21 

22 Args: 

23 value: The value to hash. Can be any of the supported types. 

24 

25 Returns: 

26 A hexadecimal SHA-256 hash string (64 characters). 

27 

28 Raises: 

29 TypeError: If value type is not supported. 

30 """ 

31 if value is None: 

32 return hashlib.sha256(b"None").hexdigest() 

33 

34 if isinstance(value, ICacheable): 

35 return value.get_stable_hash() 

36 

37 if isinstance(value, str): 

38 return hashlib.sha256(value.encode("utf-8")).hexdigest() 

39 

40 if isinstance(value, int): 

41 return hashlib.sha256(str(value).encode("utf-8")).hexdigest() 

42 

43 if isinstance(value, Decimal): 

44 # Canonicalize to string for deterministic hashing 

45 canonical = str(value) 

46 return hashlib.sha256(canonical.encode("utf-8")).hexdigest() 

47 

48 if isinstance(value, Mapping): 

49 # Sort keys for canonical ordering 

50 sorted_items = sorted(value.items(), key=lambda x: x[0]) 

51 hasher = hashlib.sha256() 

52 for key, val in sorted_items: 

53 # Hash key 

54 key_hash = hash_value(key) 

55 hasher.update(key_hash.encode("utf-8")) 

56 # Hash value 

57 val_hash = hash_value(val) 

58 hasher.update(val_hash.encode("utf-8")) 

59 return hasher.hexdigest() 

60 

61 if isinstance(value, Sequence) and not isinstance(value, str): 

62 # Hash each element in order 

63 hasher = hashlib.sha256() 

64 for item in value: 

65 item_hash = hash_value(item) 

66 hasher.update(item_hash.encode("utf-8")) 

67 return hasher.hexdigest() 

68 

69 raise TypeError( 

70 f"Unsupported type for hashing: {type(value).__name__}. " 

71 f"Value must be ICacheable, dict, list, str, int, Decimal, or None." 

72 ) 

73 

74 

75def hash_manifest(manifest: dict[str, Any]) -> str: 

76 """Hash a manifest dictionary to produce a Digest. 

77 

78 A manifest is a dictionary mapping input names to values. The hash is 

79 computed by: 

80 1. Sorting keys for canonical ordering 

81 2. Recursively hashing each value 

82 3. Combining into a single SHA-256 hash 

83 

84 Args: 

85 manifest: The manifest dictionary to hash. Keys should be strings, 

86 values can be ICacheable, dict, list, or primitives. 

87 

88 Returns: 

89 A hexadecimal SHA-256 hash string (64 characters) representing 

90 the Digest (cache key) for this manifest. 

91 """ 

92 # Sort keys for canonical ordering 

93 sorted_items = sorted(manifest.items(), key=lambda x: x[0]) 

94 

95 hasher = hashlib.sha256() 

96 for key, value in sorted_items: 

97 # Hash key 

98 key_hash = hash_value(key) 

99 hasher.update(key_hash.encode("utf-8")) 

100 # Hash value 

101 value_hash = hash_value(value) 

102 hasher.update(value_hash.encode("utf-8")) 

103 

104 return hasher.hexdigest()