Skip to main content

gruel_cache/
compiler_fp.rs

1//! Hash of the running compiler binary, memoized across invocations.
2//!
3//! See ADR-0074 ("Compiler fingerprint") for the rationale. The short
4//! version: `CARGO_PKG_VERSION` is too coarse (doesn't change across local
5//! `cargo build` cycles), so we hash the binary's bytes instead. Hashing 30+
6//! MB on every invocation would itself be a regression, so we memoize the
7//! result keyed by `(path, mtime, size)` of the binary.
8
9use std::fs;
10use std::io;
11use std::path::{Path, PathBuf};
12
13use tracing::{debug, warn};
14
15use crate::fingerprint::{CacheKey, Hasher};
16
17/// Compute or retrieve the cached hash of the compiler binary at
18/// `binary_path`. Memoization lives under `memo_dir`; callers typically
19/// pass `~/.cache/gruel/binary-hash`.
20///
21/// On any I/O error reading the memo, falls back to recomputing. The cache
22/// is an optimization; correctness comes from the binary hash itself, not
23/// from the memo.
24pub fn compiler_fingerprint(binary_path: &Path, memo_dir: &Path) -> io::Result<CacheKey> {
25    let meta = fs::metadata(binary_path)?;
26    let size = meta.len();
27    let mtime_nanos = mtime_nanos(&meta);
28
29    let memo_filename = format!("{}-{}-{}.hash", path_slug(binary_path), mtime_nanos, size);
30    let memo_path = memo_dir.join(&memo_filename);
31
32    if let Some(cached) = read_cached_hash(&memo_path) {
33        debug!(
34            binary = %binary_path.display(),
35            "compiler_fp: memo hit"
36        );
37        return Ok(cached);
38    }
39
40    debug!(
41        binary = %binary_path.display(),
42        size = size,
43        "compiler_fp: hashing binary"
44    );
45
46    let bytes = fs::read(binary_path)?;
47    let mut hasher = Hasher::new();
48    hasher.update(&bytes);
49    let key = hasher.finalize();
50
51    if let Err(e) = write_cached_hash(&memo_path, &key) {
52        // Don't fail the build over a memo write error — log and continue.
53        warn!(
54            error = %e,
55            memo_path = %memo_path.display(),
56            "compiler_fp: failed to write memo, continuing"
57        );
58    }
59
60    Ok(key)
61}
62
63/// Get the path to the currently-running executable, with sensible
64/// fallback. Used by callers who haven't been handed an explicit binary
65/// path (the common case from the `gruel` CLI).
66pub fn current_binary_path() -> io::Result<PathBuf> {
67    std::env::current_exe()
68}
69
70/// Convert a path into a filename-safe slug. Used so the memo filename is
71/// unique per binary location without needing a directory tree.
72fn path_slug(path: &Path) -> String {
73    path.to_string_lossy()
74        .chars()
75        .map(|c| if c.is_alphanumeric() { c } else { '_' })
76        .collect()
77}
78
79#[cfg(unix)]
80fn mtime_nanos(meta: &fs::Metadata) -> u128 {
81    use std::os::unix::fs::MetadataExt;
82    let secs = meta.mtime() as i128;
83    let nanos = meta.mtime_nsec() as i128;
84    (secs.max(0) as u128) * 1_000_000_000 + nanos.max(0) as u128
85}
86
87#[cfg(not(unix))]
88fn mtime_nanos(meta: &fs::Metadata) -> u128 {
89    use std::time::UNIX_EPOCH;
90    meta.modified()
91        .ok()
92        .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
93        .map(|d| d.as_nanos())
94        .unwrap_or(0)
95}
96
97fn read_cached_hash(path: &Path) -> Option<CacheKey> {
98    let bytes = fs::read(path).ok()?;
99    if bytes.len() != 32 {
100        return None;
101    }
102    let mut arr = [0u8; 32];
103    arr.copy_from_slice(&bytes);
104    Some(CacheKey::from_bytes(arr))
105}
106
107fn write_cached_hash(path: &Path, key: &CacheKey) -> io::Result<()> {
108    if let Some(parent) = path.parent() {
109        fs::create_dir_all(parent)?;
110    }
111    // Atomic write: tmp + rename.
112    let tmp = path.with_extension("hash.tmp");
113    fs::write(&tmp, key.as_bytes())?;
114    fs::rename(&tmp, path)?;
115    Ok(())
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    use tempfile::TempDir;
122
123    #[test]
124    fn fingerprint_is_stable_across_calls() {
125        let bin_dir = TempDir::new().unwrap();
126        let memo_dir = TempDir::new().unwrap();
127        let bin = bin_dir.path().join("fakebin");
128        fs::write(&bin, b"hello world").unwrap();
129
130        let a = compiler_fingerprint(&bin, memo_dir.path()).unwrap();
131        let b = compiler_fingerprint(&bin, memo_dir.path()).unwrap();
132        assert_eq!(a, b, "memo should yield same hash on second call");
133    }
134
135    #[test]
136    fn fingerprint_changes_when_binary_changes() {
137        let bin_dir = TempDir::new().unwrap();
138        let memo_dir = TempDir::new().unwrap();
139        let bin = bin_dir.path().join("fakebin");
140
141        fs::write(&bin, b"version 1").unwrap();
142        let v1 = compiler_fingerprint(&bin, memo_dir.path()).unwrap();
143
144        // Sleep long enough that mtime is guaranteed to differ on coarse
145        // filesystems.
146        std::thread::sleep(std::time::Duration::from_millis(10));
147        fs::write(&bin, b"version 2 different bytes").unwrap();
148        let v2 = compiler_fingerprint(&bin, memo_dir.path()).unwrap();
149
150        assert_ne!(v1, v2);
151    }
152
153    #[test]
154    fn missing_binary_is_an_error() {
155        let memo_dir = TempDir::new().unwrap();
156        let result =
157            compiler_fingerprint(Path::new("/nonexistent/path/to/binary"), memo_dir.path());
158        assert!(result.is_err());
159    }
160}