Skip to main content

gruel_cache/
wire.rs

1//! Wire format for cached parse and RIR outputs (ADR-0074 Phase 2).
2//!
3//! The two top-level types — [`CachedParseOutput`] and [`CachedRirOutput`] —
4//! bundle the serialized IR with a per-file interner snapshot. On cache hit,
5//! the snapshot is re-interned into the build's shared `ThreadedRodeo` and
6//! the IR's `Spur` values are remapped from the cached numbering to the
7//! build's numbering.
8//!
9//! Per ADR-0074: "Spurs are file-local; on load they get re-interned into
10//! the build-wide interner."
11//!
12//! The actual remapping (walking AST/RIR to substitute `Spur` values) is
13//! implemented in `gruel-cache/src/remap.rs` and lives behind the same
14//! preview feature as the rest of the cache. This module only handles the
15//! envelope: serialize, deserialize, snapshot/restore the interner.
16
17use lasso::{Key, Spur, ThreadedRodeo};
18use serde::{Deserialize, Serialize};
19
20use gruel_parser::ast::Ast;
21use gruel_rir::Rir;
22
23/// A per-file interner snapshot — `strings[i]` is the string value of the
24/// `Spur` whose raw key equals `i`. Indexed by `Key::into_usize`, which for
25/// lasso's default `Spur` is `NonZeroU32` minus one (i.e. the first
26/// interned string has key 0, the second has key 1, etc.).
27#[derive(Debug, Clone, Default, Serialize, Deserialize)]
28pub struct InternerSnapshot {
29    pub strings: Vec<String>,
30}
31
32impl InternerSnapshot {
33    /// Build a snapshot from a `ThreadedRodeo`, capturing every interned
34    /// string in `Spur::into_usize` order.
35    ///
36    /// Used at cache *write* time, after a file has finished parsing into
37    /// its own per-file interner.
38    pub fn capture(interner: &ThreadedRodeo) -> Self {
39        // ThreadedRodeo doesn't give us strings in Spur order directly; we
40        // collect (Spur, &str) pairs and sort by Spur index.
41        let mut pairs: Vec<(usize, String)> = interner
42            .iter()
43            .map(|(spur, s)| (spur.into_usize(), s.to_string()))
44            .collect();
45        pairs.sort_by_key(|(idx, _)| *idx);
46
47        // Sanity check: the indices should form a contiguous range starting
48        // at 0. If not, the cache assumes Spur ordering it can't honour.
49        for (expected, (actual, _)) in pairs.iter().enumerate() {
50            debug_assert_eq!(
51                expected, *actual,
52                "ThreadedRodeo Spurs not contiguous starting at 0; \
53                 cache assumes lasso's standard packing"
54            );
55        }
56
57        Self {
58            strings: pairs.into_iter().map(|(_, s)| s).collect(),
59        }
60    }
61
62    /// Re-intern every string into `target`, returning a remap table where
63    /// `remap[i]` is the new `Spur` for the cached string at index `i`.
64    ///
65    /// Used at cache *read* time, after deserializing a `CachedParseOutput`
66    /// or `CachedRirOutput`, before the AST/RIR's `Spur` values can be
67    /// trusted against the build's shared interner.
68    pub fn restore_into(&self, target: &ThreadedRodeo) -> Vec<Spur> {
69        self.strings
70            .iter()
71            .map(|s| target.get_or_intern(s))
72            .collect()
73    }
74}
75
76/// Envelope around a parsed file's AST + interner snapshot, ready for
77/// bincode serialization to the parse cache.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct CachedParseOutput {
80    pub interner: InternerSnapshot,
81    pub ast: Ast,
82}
83
84impl CachedParseOutput {
85    /// Serialize to the bincode wire format used by `CacheStore::put`.
86    pub fn encode(&self) -> Result<Vec<u8>, bincode::error::EncodeError> {
87        bincode::serde::encode_to_vec(self, bincode::config::standard())
88    }
89
90    /// Deserialize from the bincode wire format. Pairs with
91    /// [`CachedParseOutput::encode`].
92    pub fn decode(bytes: &[u8]) -> Result<Self, bincode::error::DecodeError> {
93        let (out, _read) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?;
94        Ok(out)
95    }
96}
97
98/// Envelope around a per-file RIR + interner snapshot, ready for bincode
99/// serialization to the RIR cache.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct CachedRirOutput {
102    pub interner: InternerSnapshot,
103    pub rir: Rir,
104}
105
106impl CachedRirOutput {
107    pub fn encode(&self) -> Result<Vec<u8>, bincode::error::EncodeError> {
108        bincode::serde::encode_to_vec(self, bincode::config::standard())
109    }
110
111    pub fn decode(bytes: &[u8]) -> Result<Self, bincode::error::DecodeError> {
112        let (out, _read) = bincode::serde::decode_from_slice(bytes, bincode::config::standard())?;
113        Ok(out)
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn snapshot_captures_strings_in_spur_order() {
123        let interner = ThreadedRodeo::new();
124        let s_a = interner.get_or_intern("alpha");
125        let s_b = interner.get_or_intern("beta");
126        let s_c = interner.get_or_intern("gamma");
127
128        let snapshot = InternerSnapshot::capture(&interner);
129        assert_eq!(snapshot.strings.len(), 3);
130        assert_eq!(snapshot.strings[s_a.into_usize()], "alpha");
131        assert_eq!(snapshot.strings[s_b.into_usize()], "beta");
132        assert_eq!(snapshot.strings[s_c.into_usize()], "gamma");
133    }
134
135    #[test]
136    fn snapshot_round_trips_through_bincode() {
137        let interner = ThreadedRodeo::new();
138        interner.get_or_intern("hello");
139        interner.get_or_intern("world");
140        let snap = InternerSnapshot::capture(&interner);
141
142        let encoded = bincode::serde::encode_to_vec(&snap, bincode::config::standard()).unwrap();
143        let (decoded, _): (InternerSnapshot, _) =
144            bincode::serde::decode_from_slice(&encoded, bincode::config::standard()).unwrap();
145        assert_eq!(decoded.strings, snap.strings);
146    }
147
148    #[test]
149    fn restore_reinterns_strings_into_target() {
150        // Source interner has three strings.
151        let src = ThreadedRodeo::new();
152        let s_x = src.get_or_intern("x");
153        let s_y = src.get_or_intern("y");
154        let s_z = src.get_or_intern("z");
155        let snap = InternerSnapshot::capture(&src);
156
157        // Target interner already has "y" interned at some Spur.
158        let tgt = ThreadedRodeo::new();
159        let pre_y = tgt.get_or_intern("y");
160
161        let remap = snap.restore_into(&tgt);
162
163        // "y" in the cache maps to the *existing* Spur in the target.
164        assert_eq!(remap[s_y.into_usize()], pre_y);
165        // "x" and "z" got Spurs in the target whose strings resolve back
166        // to the cached values. We don't assert the Spur values themselves
167        // — they may coincidentally match the source interner's Spurs
168        // depending on order, and only the string mapping is load-bearing.
169        assert_eq!(tgt.resolve(&remap[s_x.into_usize()]), "x");
170        assert_eq!(tgt.resolve(&remap[s_z.into_usize()]), "z");
171    }
172
173    #[test]
174    fn empty_ast_round_trips() {
175        let cached = CachedParseOutput {
176            interner: InternerSnapshot::default(),
177            ast: Ast {
178                module_doc: None,
179                items: Vec::new(),
180            },
181        };
182        let bytes = cached.encode().unwrap();
183        let decoded = CachedParseOutput::decode(&bytes).unwrap();
184        assert!(decoded.ast.items.is_empty());
185        assert!(decoded.interner.strings.is_empty());
186    }
187}