Skip to main content

gruel_cache/
fingerprint.rs

1//! BLAKE3-based fingerprinting helpers.
2//!
3//! [`Hasher`] is a thin wrapper over `blake3::Hasher` that exposes the
4//! `update`/`finalize` flow the rest of the compiler uses to build cache
5//! keys. [`CacheKey`] is the finalized 32-byte hash plus a hex string for
6//! filenames.
7
8use std::fmt;
9
10/// Compute a BLAKE3 hash of a single byte slice.
11///
12/// Convenience for callers who already have the full input in hand.
13pub fn blake3_bytes(bytes: &[u8]) -> CacheKey {
14    let mut h = Hasher::new();
15    h.update(bytes);
16    h.finalize()
17}
18
19/// Incremental BLAKE3 hasher.
20///
21/// Wraps `blake3::Hasher` so callers don't need to depend on the `blake3`
22/// crate directly. Use [`Hasher::update`] in any order to mix in inputs;
23/// the resulting [`CacheKey`] is deterministic for a given sequence of
24/// updates.
25#[derive(Default)]
26pub struct Hasher {
27    inner: blake3::Hasher,
28}
29
30impl Hasher {
31    pub fn new() -> Self {
32        Self::default()
33    }
34
35    pub fn update(&mut self, bytes: &[u8]) -> &mut Self {
36        self.inner.update(bytes);
37        self
38    }
39
40    /// Mix in a `u32` in little-endian. Useful for embedding numeric
41    /// discriminants (target arch, opt level, schema version, ...) into a
42    /// key without committing to a string encoding.
43    pub fn update_u32(&mut self, value: u32) -> &mut Self {
44        self.inner.update(&value.to_le_bytes());
45        self
46    }
47
48    /// Mix in a `u64` in little-endian.
49    pub fn update_u64(&mut self, value: u64) -> &mut Self {
50        self.inner.update(&value.to_le_bytes());
51        self
52    }
53
54    /// Mix in a length-prefixed byte string. Use this instead of bare
55    /// `update` when concatenating multiple variable-length fields, so that
56    /// `("ab", "c")` and `("a", "bc")` don't collide.
57    pub fn update_str(&mut self, s: &str) -> &mut Self {
58        self.update_u64(s.len() as u64);
59        self.inner.update(s.as_bytes());
60        self
61    }
62
63    pub fn finalize(self) -> CacheKey {
64        let hash = self.inner.finalize();
65        CacheKey {
66            bytes: *hash.as_bytes(),
67        }
68    }
69}
70
71/// A 32-byte BLAKE3 hash used as a cache identifier.
72///
73/// The hex form ([`CacheKey::hex`]) is the on-disk filename; the byte
74/// form is what gets mixed into compound keys.
75#[derive(Clone, Copy, PartialEq, Eq, Hash)]
76pub struct CacheKey {
77    bytes: [u8; 32],
78}
79
80impl CacheKey {
81    /// Construct a key directly from a 32-byte hash. Mostly useful for
82    /// tests and for re-hydrating keys read from disk; production code
83    /// should produce keys via [`Hasher::finalize`].
84    pub const fn from_bytes(bytes: [u8; 32]) -> Self {
85        Self { bytes }
86    }
87
88    pub const fn as_bytes(&self) -> &[u8; 32] {
89        &self.bytes
90    }
91
92    /// 64-character lowercase hex encoding, used as the on-disk filename.
93    pub fn hex(&self) -> String {
94        let mut s = String::with_capacity(64);
95        for byte in &self.bytes {
96            s.push_str(&format!("{:02x}", byte));
97        }
98        s
99    }
100}
101
102impl fmt::Debug for CacheKey {
103    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
104        // Show only the first 8 hex chars in Debug to keep logs readable.
105        let hex = self.hex();
106        write!(f, "CacheKey({}…)", &hex[..8])
107    }
108}
109
110impl fmt::Display for CacheKey {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        f.write_str(&self.hex())
113    }
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn empty_input_has_stable_hash() {
122        let k = blake3_bytes(b"");
123        // BLAKE3("") is a well-known constant.
124        assert_eq!(
125            k.hex(),
126            "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262"
127        );
128    }
129
130    #[test]
131    fn update_str_is_length_prefixed() {
132        // Without length-prefixing, ("ab", "c") and ("a", "bc") collide.
133        // With it, they must not.
134        let mut h1 = Hasher::new();
135        h1.update_str("ab").update_str("c");
136        let mut h2 = Hasher::new();
137        h2.update_str("a").update_str("bc");
138        assert_ne!(h1.finalize(), h2.finalize());
139    }
140
141    #[test]
142    fn raw_update_is_not_length_prefixed() {
143        // Sanity check that update (raw) does NOT length-prefix, so it
144        // collides on concatenation. This is intentional — callers use
145        // raw update for fixed-size fields and update_str/update_u32
146        // for variable-size fields.
147        let mut h1 = Hasher::new();
148        h1.update(b"abc");
149        let mut h2 = Hasher::new();
150        h2.update(b"a").update(b"bc");
151        assert_eq!(h1.finalize(), h2.finalize());
152    }
153
154    #[test]
155    fn cache_key_hex_is_64_chars() {
156        let k = blake3_bytes(b"some content");
157        assert_eq!(k.hex().len(), 64);
158        assert!(k.hex().chars().all(|c| c.is_ascii_hexdigit()));
159    }
160
161    #[test]
162    fn debug_truncates() {
163        let k = blake3_bytes(b"x");
164        let dbg = format!("{:?}", k);
165        assert!(dbg.starts_with("CacheKey("));
166        assert!(dbg.ends_with("…)"));
167    }
168}