Skip to main content

gruel_cache/
store.rs

1//! Content-addressed on-disk cache store.
2//!
3//! Layout (rooted at the cache directory passed to [`CacheStore::open`]):
4//!
5//! ```text
6//! <root>/
7//! ├── version              # u32 schema version, decimal text
8//! ├── parse/
9//! │   └── <hash>.bin
10//! ├── air/
11//! │   └── <hash>.bin
12//! ├── llvm-ir/
13//! │   └── <hash>.bc
14//! └── tmp/                 # staging for atomic writes
15//! ```
16//!
17//! On [`CacheStore::open`], if the persisted version doesn't match
18//! [`crate::CACHE_SCHEMA_VERSION`], the entire cache directory is wiped
19//! and recreated. This is the only path by which the store deletes data.
20//!
21//! Concurrency: writes go to `tmp/<random>` and are renamed into place.
22//! Multiple `gruel` processes sharing a cache directory can read and write
23//! safely; the worst case is duplicated work (two processes computing the
24//! same hash and racing on rename).
25
26use std::fs;
27use std::io::{self, Read, Write};
28use std::path::{Path, PathBuf};
29
30use tracing::{info, warn};
31
32use crate::CACHE_SCHEMA_VERSION;
33use crate::fingerprint::CacheKey;
34
35/// Which subdirectory a cache entry belongs in. The variants correspond
36/// 1:1 to the pipeline stages that persist results.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum CacheKind {
39    Parse,
40    Air,
41    LlvmIr,
42}
43
44impl CacheKind {
45    fn dir_name(self) -> &'static str {
46        match self {
47            CacheKind::Parse => "parse",
48            CacheKind::Air => "air",
49            CacheKind::LlvmIr => "llvm-ir",
50        }
51    }
52
53    fn extension(self) -> &'static str {
54        match self {
55            CacheKind::Parse | CacheKind::Air => "bin",
56            CacheKind::LlvmIr => "bc",
57        }
58    }
59
60    /// All known kinds, for iteration in stats / GC code.
61    pub fn all() -> [CacheKind; 3] {
62        [CacheKind::Parse, CacheKind::Air, CacheKind::LlvmIr]
63    }
64}
65
66/// Aggregate per-kind statistics for `gruel cache stats`.
67#[derive(Debug, Clone, Default)]
68pub struct CacheStats {
69    pub entries: usize,
70    pub bytes: u64,
71}
72
73/// Handle to an open cache directory. Cheap to clone if needed (just
74/// holds a `PathBuf`); creating a new one with [`CacheStore::open`] is
75/// also cheap once the version check has run.
76#[derive(Debug, Clone)]
77pub struct CacheStore {
78    root: PathBuf,
79}
80
81impl CacheStore {
82    /// Open (or create) a cache rooted at `root`. If a `version` file
83    /// exists with a value other than [`CACHE_SCHEMA_VERSION`], the
84    /// entire directory is wiped before the store returns.
85    pub fn open(root: impl Into<PathBuf>) -> io::Result<Self> {
86        let root = root.into();
87        let store = Self { root };
88        store.ensure_layout()?;
89        Ok(store)
90    }
91
92    pub fn root(&self) -> &Path {
93        &self.root
94    }
95
96    /// Look up a blob. Returns `Ok(None)` if the entry is absent;
97    /// `Err(_)` only on real I/O failures.
98    pub fn get(&self, kind: CacheKind, key: &CacheKey) -> io::Result<Option<Vec<u8>>> {
99        let path = self.entry_path(kind, key);
100        match fs::File::open(&path) {
101            Ok(mut f) => {
102                let mut buf = Vec::new();
103                f.read_to_end(&mut buf)?;
104                Ok(Some(buf))
105            }
106            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(None),
107            Err(e) => Err(e),
108        }
109    }
110
111    /// Insert (or overwrite) a blob. Atomic: writes to `tmp/<random>`
112    /// and renames into place. Concurrent inserts of the same key are
113    /// safe; whichever rename wins is the visible result.
114    pub fn put(&self, kind: CacheKind, key: &CacheKey, data: &[u8]) -> io::Result<()> {
115        let final_path = self.entry_path(kind, key);
116        let tmp_path = self.tmp_path(key, kind.extension());
117        {
118            let mut f = fs::File::create(&tmp_path)?;
119            f.write_all(data)?;
120            f.sync_all()?;
121        }
122        // On Unix, rename atomically replaces the destination.
123        fs::rename(&tmp_path, &final_path)?;
124        Ok(())
125    }
126
127    /// Wipe every cache file. Equivalent to `gruel cache clean`.
128    pub fn clean(&self) -> io::Result<()> {
129        if !self.root.exists() {
130            return Ok(());
131        }
132        fs::remove_dir_all(&self.root)?;
133        self.ensure_layout()?;
134        info!(root = %self.root.display(), "cache cleaned");
135        Ok(())
136    }
137
138    /// Walk the cache directory and accumulate per-kind stats. O(N) in
139    /// the number of entries; called only by `gruel cache stats` and
140    /// during tests, never on the hot path.
141    pub fn stats(&self) -> io::Result<[(CacheKind, CacheStats); 3]> {
142        let mut out = [
143            (CacheKind::Parse, CacheStats::default()),
144            (CacheKind::Air, CacheStats::default()),
145            (CacheKind::LlvmIr, CacheStats::default()),
146        ];
147        for (kind, stats) in &mut out {
148            let dir = self.kind_dir(*kind);
149            if !dir.exists() {
150                continue;
151            }
152            for entry in fs::read_dir(&dir)? {
153                let entry = entry?;
154                let meta = entry.metadata()?;
155                if meta.is_file() {
156                    stats.entries += 1;
157                    stats.bytes += meta.len();
158                }
159            }
160        }
161        Ok(out)
162    }
163
164    fn entry_path(&self, kind: CacheKind, key: &CacheKey) -> PathBuf {
165        self.kind_dir(kind)
166            .join(format!("{}.{}", key.hex(), kind.extension()))
167    }
168
169    fn kind_dir(&self, kind: CacheKind) -> PathBuf {
170        self.root.join(kind.dir_name())
171    }
172
173    fn tmp_path(&self, key: &CacheKey, ext: &str) -> PathBuf {
174        // Use the key + a process-local counter so concurrent puts of
175        // different keys don't fight over the same tmp filename.
176        use std::sync::atomic::{AtomicU64, Ordering};
177        static COUNTER: AtomicU64 = AtomicU64::new(0);
178        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
179        self.root.join("tmp").join(format!(
180            "{}-{}-{}.{}.tmp",
181            std::process::id(),
182            n,
183            key.hex(),
184            ext
185        ))
186    }
187
188    fn ensure_layout(&self) -> io::Result<()> {
189        // Check version file; wipe if mismatched.
190        let version_path = self.root.join("version");
191        if self.root.exists() && version_path.exists() {
192            match fs::read_to_string(&version_path) {
193                Ok(s) => {
194                    let stored: Option<u32> = s.trim().parse().ok();
195                    if stored != Some(CACHE_SCHEMA_VERSION) {
196                        warn!(
197                            stored = ?stored,
198                            current = CACHE_SCHEMA_VERSION,
199                            "cache schema version mismatch; wiping"
200                        );
201                        // Wipe and recreate.
202                        fs::remove_dir_all(&self.root)?;
203                    }
204                }
205                Err(_) => {
206                    // Unreadable version file → treat as mismatch.
207                    fs::remove_dir_all(&self.root)?;
208                }
209            }
210        }
211
212        // Ensure all subdirs exist.
213        for kind in CacheKind::all() {
214            fs::create_dir_all(self.root.join(kind.dir_name()))?;
215        }
216        fs::create_dir_all(self.root.join("tmp"))?;
217
218        // Persist current version.
219        if !version_path.exists() {
220            fs::write(&version_path, CACHE_SCHEMA_VERSION.to_string())?;
221        }
222
223        Ok(())
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::fingerprint::blake3_bytes;
231    use tempfile::TempDir;
232
233    fn make_store() -> (TempDir, CacheStore) {
234        let tmp = TempDir::new().unwrap();
235        let store = CacheStore::open(tmp.path().join("cache")).unwrap();
236        (tmp, store)
237    }
238
239    #[test]
240    fn missing_entry_returns_none() {
241        let (_tmp, store) = make_store();
242        let key = blake3_bytes(b"never inserted");
243        assert!(store.get(CacheKind::Parse, &key).unwrap().is_none());
244    }
245
246    #[test]
247    fn put_then_get_round_trips() {
248        let (_tmp, store) = make_store();
249        let key = blake3_bytes(b"hello");
250        let data = b"some serialized blob";
251        store.put(CacheKind::Air, &key, data).unwrap();
252        assert_eq!(
253            store.get(CacheKind::Air, &key).unwrap().as_deref(),
254            Some(data.as_ref())
255        );
256    }
257
258    #[test]
259    fn put_is_atomic_no_partial_files() {
260        // After a successful put, the entry file exists and tmp/ contains
261        // no leftover .tmp files.
262        let (_tmp, store) = make_store();
263        let key = blake3_bytes(b"k");
264        store.put(CacheKind::Parse, &key, b"data").unwrap();
265
266        let tmp_dir = store.root().join("tmp");
267        let leftovers: Vec<_> = fs::read_dir(&tmp_dir).unwrap().collect();
268        assert!(
269            leftovers.is_empty(),
270            "tmp/ should be empty after successful put, found: {:?}",
271            leftovers
272        );
273    }
274
275    #[test]
276    fn clean_wipes_everything_then_layout_returns() {
277        let (_tmp, store) = make_store();
278        let key = blake3_bytes(b"k");
279        store.put(CacheKind::Air, &key, b"x").unwrap();
280        assert!(store.get(CacheKind::Air, &key).unwrap().is_some());
281
282        store.clean().unwrap();
283        assert!(store.get(CacheKind::Air, &key).unwrap().is_none());
284
285        // Layout still usable (subdirs and version file present).
286        for kind in CacheKind::all() {
287            assert!(store.root().join(kind.dir_name()).is_dir());
288        }
289        assert!(store.root().join("version").is_file());
290    }
291
292    #[test]
293    fn version_mismatch_wipes_cache() {
294        let tmp = TempDir::new().unwrap();
295        let root = tmp.path().join("cache");
296
297        // Open once, populate.
298        {
299            let store = CacheStore::open(&root).unwrap();
300            let key = blake3_bytes(b"k");
301            store.put(CacheKind::Parse, &key, b"x").unwrap();
302        }
303
304        // Corrupt the version file.
305        fs::write(root.join("version"), "999").unwrap();
306
307        // Re-opening should wipe and recreate.
308        let store = CacheStore::open(&root).unwrap();
309        let key = blake3_bytes(b"k");
310        assert!(store.get(CacheKind::Parse, &key).unwrap().is_none());
311        assert_eq!(
312            fs::read_to_string(root.join("version")).unwrap().trim(),
313            CACHE_SCHEMA_VERSION.to_string()
314        );
315    }
316
317    #[test]
318    fn stats_reports_entry_counts_and_bytes() {
319        let (_tmp, store) = make_store();
320        let k1 = blake3_bytes(b"one");
321        let k2 = blake3_bytes(b"two");
322        store.put(CacheKind::Parse, &k1, b"abcde").unwrap();
323        store.put(CacheKind::Parse, &k2, b"xy").unwrap();
324        store.put(CacheKind::Air, &k1, b"123").unwrap();
325
326        let stats = store.stats().unwrap();
327        let parse = &stats[0].1;
328        let air = &stats[1].1;
329        let llvm = &stats[2].1;
330        assert_eq!(parse.entries, 2);
331        assert_eq!(parse.bytes, 7);
332        assert_eq!(air.entries, 1);
333        assert_eq!(air.bytes, 3);
334        assert_eq!(llvm.entries, 0);
335    }
336
337    #[test]
338    fn put_overwrite_replaces_existing() {
339        let (_tmp, store) = make_store();
340        let key = blake3_bytes(b"k");
341        store.put(CacheKind::Air, &key, b"old").unwrap();
342        store.put(CacheKind::Air, &key, b"new").unwrap();
343        assert_eq!(
344            store.get(CacheKind::Air, &key).unwrap().as_deref(),
345            Some(b"new".as_ref())
346        );
347    }
348}