Skip to main content

gruel_compiler/
parse_cache.rs

1//! Parse-cache integration (ADR-0074 Phase 2b).
2//!
3//! Wraps the per-file lex+parse loop with a content-addressed cache. On
4//! cache hit, the AST is deserialized and its file-local Spurs are
5//! re-interned into the build's shared `ThreadedRodeo`. On miss, parsing
6//! runs normally and the resulting AST + per-file interner snapshot is
7//! written back to the cache.
8//!
9//! ## Design
10//!
11//! Each file is parsed into its own `ThreadedRodeo` (rather than the
12//! historical shared interner). After parse, the per-file interner is
13//! snapshotted, the AST is cached, and the snapshot is then replayed
14//! into the build's shared interner — producing a remap table that the
15//! `RemapSpurs` walker uses to substitute the AST's Spurs into the
16//! build's numbering.
17//!
18//! This per-file architecture is what makes the cache key independent
19//! of compilation order: the snapshot for `foo.gruel` only contains
20//! strings the parser of `foo.gruel` interned, regardless of which
21//! other files were parsed in the same build.
22
23use lasso::ThreadedRodeo;
24use tracing::{debug, info, info_span, warn};
25
26use gruel_cache::{
27    CacheKey, CacheKind, CacheStore, CachedParseOutput, Hasher, InternerSnapshot, RemapSpurs,
28    blake3_bytes,
29};
30use gruel_lexer::Lexer;
31use gruel_parser::Parser;
32use gruel_util::{CompileErrors, MultiErrorResult, PreviewFeatures};
33
34use crate::{ParsedFile, ParsedProgram, SourceFile};
35#[cfg(test)]
36use gruel_util::FileId;
37
38/// Hit/miss counts for one parse-pipeline invocation. Surfaced to
39/// `--time-passes` so users can see whether the cache is doing work.
40#[derive(Debug, Clone, Default)]
41pub struct ParseCacheStats {
42    pub hits: usize,
43    pub misses: usize,
44}
45
46impl ParseCacheStats {
47    pub fn total(&self) -> usize {
48        self.hits + self.misses
49    }
50}
51
52/// Compute the parse-cache key for one source file.
53///
54/// `build_fp` mixes in the compiler binary hash, target, opt level, and
55/// preview-feature set; `file_fp` is the BLAKE3 of the source bytes. The
56/// resulting key is stable as long as both stay constant.
57pub fn parse_key(build_fp: &CacheKey, source_bytes: &[u8]) -> CacheKey {
58    let file_fp = blake3_bytes(source_bytes);
59    let mut h = Hasher::new();
60    h.update(build_fp.as_bytes());
61    h.update(file_fp.as_bytes());
62    h.finalize()
63}
64
65/// Run the parse pipeline with cache lookup/store enabled.
66///
67/// Behavior:
68/// - For each `SourceFile`, compute `parse_key` and probe the cache.
69/// - On hit: deserialize `CachedParseOutput`, re-intern its snapshot
70///   into the build's shared `ThreadedRodeo`, and walk the AST to
71///   substitute Spurs via the remap. Skip lex+parse for that file.
72/// - On miss: lex+parse into a fresh per-file interner, snapshot it,
73///   store the cached output, then merge into the build interner the
74///   same way as a hit (ensuring the merge path is exercised on every
75///   build, not just hits).
76///
77/// Returns the parsed program plus per-stage cache stats.
78pub fn parse_all_files_cached(
79    sources: &[SourceFile<'_>],
80    preview_features: &PreviewFeatures,
81    cache: &CacheStore,
82    build_fp: &CacheKey,
83) -> MultiErrorResult<(ParsedProgram, ParseCacheStats)> {
84    let build_interner = ThreadedRodeo::new();
85    let (files, stats) =
86        parse_files_into(&build_interner, sources, preview_features, cache, build_fp)?;
87    Ok((
88        ParsedProgram {
89            files,
90            interner: build_interner,
91        },
92        stats,
93    ))
94}
95
96/// Like [`parse_all_files_cached`], but appends parsed files into a
97/// caller-supplied build interner. Used by `CompilationUnit::parse` to
98/// share one `ThreadedRodeo` between the synthetic prelude (parsed
99/// uncached, the existing path) and user files (parsed cached, this
100/// path).
101pub fn parse_files_into(
102    build_interner: &ThreadedRodeo,
103    sources: &[SourceFile<'_>],
104    preview_features: &PreviewFeatures,
105    cache: &CacheStore,
106    build_fp: &CacheKey,
107) -> MultiErrorResult<(Vec<ParsedFile>, ParseCacheStats)> {
108    let _span = info_span!("parse_cached", file_count = sources.len()).entered();
109
110    let mut stats = ParseCacheStats::default();
111    let mut parsed_files = Vec::with_capacity(sources.len());
112
113    for source in sources {
114        let key = parse_key(build_fp, source.source.as_bytes());
115
116        // Try the cache first.
117        let (mut ast, file_interner_snap) = match cache.get(CacheKind::Parse, &key) {
118            Ok(Some(bytes)) => match CachedParseOutput::decode(&bytes) {
119                Ok(cached) => {
120                    debug!(path = %source.path, "parse-cache hit");
121                    stats.hits += 1;
122                    (cached.ast, cached.interner)
123                }
124                Err(e) => {
125                    // Correctness fallback: cache miss on any deserialize error.
126                    warn!(
127                        path = %source.path,
128                        error = %e,
129                        "parse-cache deserialize failed; recomputing"
130                    );
131                    stats.misses += 1;
132                    parse_uncached(source, preview_features)?
133                }
134            },
135            Ok(None) => {
136                debug!(path = %source.path, "parse-cache miss");
137                stats.misses += 1;
138                let (ast, snap) = parse_uncached(source, preview_features)?;
139                // Best-effort store; cache write failure is not a build error.
140                let cached = CachedParseOutput {
141                    interner: snap.clone(),
142                    ast: ast.clone(),
143                };
144                match cached.encode() {
145                    Ok(bytes) => {
146                        if let Err(e) = cache.put(CacheKind::Parse, &key, &bytes) {
147                            warn!(error = %e, "parse-cache write failed");
148                        }
149                    }
150                    Err(e) => warn!(error = %e, "parse-cache encode failed"),
151                }
152                (ast, snap)
153            }
154            Err(e) => {
155                warn!(error = %e, "parse-cache read failed; recomputing");
156                stats.misses += 1;
157                parse_uncached(source, preview_features)?
158            }
159        };
160
161        // Merge per-file interner snapshot into the build interner; remap
162        // the AST's Spurs from cached numbering to build numbering. The
163        // path is identical for hits and misses, so any latent bug in
164        // remap shows up on cold builds too.
165        let remap = file_interner_snap.restore_into(build_interner);
166        ast.remap_spurs(&remap);
167
168        parsed_files.push(ParsedFile {
169            path: source.path.to_string(),
170            file_id: source.file_id,
171            ast,
172            // Per-file interner field is API-compat only; the real
173            // interner is the build-shared one in ParsedProgram.
174            interner: ThreadedRodeo::new(),
175        });
176    }
177
178    info!(
179        hits = stats.hits,
180        misses = stats.misses,
181        files = sources.len(),
182        "parse cache pass complete"
183    );
184
185    Ok((parsed_files, stats))
186}
187
188/// Lex + parse one file into its own fresh `ThreadedRodeo`, returning
189/// the AST and a snapshot of the per-file interner.
190///
191/// Mirrors what `parse_all_files_with_preview` does for one file, but
192/// uses a per-file interner so the cache snapshot is independent of
193/// other files in the same build.
194fn parse_uncached(
195    source: &SourceFile<'_>,
196    preview_features: &PreviewFeatures,
197) -> MultiErrorResult<(gruel_parser::Ast, InternerSnapshot)> {
198    let file_interner = ThreadedRodeo::new();
199
200    let lexer = Lexer::with_interner_and_file_id(source.source, file_interner, source.file_id);
201    let (tokens, file_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
202
203    let parser = Parser::new(tokens, file_interner)
204        .with_preview_features(preview_features.clone())
205        .with_source(source.source);
206    let (ast, file_interner) = parser.parse()?;
207
208    let snapshot = InternerSnapshot::capture(&file_interner);
209    Ok((ast, snapshot))
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use tempfile::TempDir;
216
217    fn fake_build_fp() -> CacheKey {
218        blake3_bytes(b"fake-compiler-fingerprint")
219    }
220
221    #[test]
222    fn cold_then_warm_run_produce_identical_asts() {
223        let tmp = TempDir::new().unwrap();
224        let cache = CacheStore::open(tmp.path().join("cache")).unwrap();
225        let build_fp = fake_build_fp();
226
227        let src = "fn main() -> i32 { 42 }";
228        let sources = vec![SourceFile::new("main.gruel", src, FileId::new(1))];
229
230        // Cold: cache miss expected.
231        let (cold_program, cold_stats) =
232            parse_all_files_cached(&sources, &PreviewFeatures::default(), &cache, &build_fp)
233                .expect("cold parse should succeed");
234        assert_eq!(cold_stats.hits, 0);
235        assert_eq!(cold_stats.misses, 1);
236        assert_eq!(cold_program.files.len(), 1);
237
238        // Warm: cache hit expected; AST should structurally match.
239        let (warm_program, warm_stats) =
240            parse_all_files_cached(&sources, &PreviewFeatures::default(), &cache, &build_fp)
241                .expect("warm parse should succeed");
242        assert_eq!(warm_stats.hits, 1);
243        assert_eq!(warm_stats.misses, 0);
244        assert_eq!(
245            cold_program.files[0].ast.items.len(),
246            warm_program.files[0].ast.items.len(),
247        );
248    }
249
250    /// ADR-0088 regression test: ensures every Spur in a re-loaded AST
251    /// is correctly remapped into the build interner.
252    ///
253    /// The original bug was that `RemapSpurs for MethodSig` didn't walk
254    /// the `directives` field, so cached directive-arg Spurs leaked
255    /// through with their cached numbering. The leak only manifested
256    /// when the build interner already contained *other* strings (e.g.
257    /// the prelude was loaded first) — the un-remapped Spur then
258    /// resolved to whichever build-interner string happened to occupy
259    /// that slot. To reproduce: pre-warm a build interner with strings
260    /// the cached file doesn't contain, then parse-load the cached AST
261    /// into it, and verify that resolving the cached directive's args
262    /// yields the *source* spelling, not whatever happened to be at the
263    /// collision index.
264    #[test]
265    fn cached_ast_remaps_directives_into_warmed_build_interner() {
266        let tmp = TempDir::new().unwrap();
267        let cache = CacheStore::open(tmp.path().join("cache")).unwrap();
268        let build_fp = fake_build_fp();
269
270        // Build a file whose only directive arg is the unique token
271        // "totally_unique_marker_name". If RemapSpurs misses any field,
272        // the warm reload's resolved arg will land on whatever the
273        // un-remapped Spur happens to point at in the pre-warmed
274        // build interner — almost certainly NOT this string.
275        let src = r#"
276interface Bad {
277    @mark(totally_unique_marker_name) fn foo(self) -> i32;
278}
279fn main() -> i32 { 0 }
280"#;
281        let sources = vec![SourceFile::new("bad.gruel", src, FileId::new(1))];
282
283        // Cold parse — populates the cache.
284        let _ = parse_all_files_cached(&sources, &PreviewFeatures::default(), &cache, &build_fp)
285            .expect("cold parse should succeed");
286
287        // Warm path: pre-warm a fresh build interner with unrelated
288        // strings (simulating the prelude having been parsed first),
289        // then load the cached AST into it via `parse_files_into`.
290        let build_interner = ThreadedRodeo::new();
291        for s in [
292            "prelude_padding_0",
293            "prelude_padding_1",
294            "prelude_padding_2",
295            "prelude_padding_3",
296            "prelude_padding_4",
297            "prelude_padding_5",
298            "prelude_padding_6",
299            "prelude_padding_7",
300            "prelude_padding_8",
301            "prelude_padding_9",
302        ] {
303            build_interner.get_or_intern(s);
304        }
305
306        let (warm_files, stats) = parse_files_into(
307            &build_interner,
308            &sources,
309            &PreviewFeatures::default(),
310            &cache,
311            &build_fp,
312        )
313        .expect("warm parse should succeed");
314        assert_eq!(stats.hits, 1);
315
316        // Drill down: Interface → first MethodSig → first directive →
317        // first arg. After RemapSpurs walks the cached MethodSig's
318        // directives list, this arg must resolve to the source spelling
319        // in the warmed build interner.
320        let item = &warm_files[0].ast.items[0];
321        let iface = match item {
322            gruel_parser::ast::Item::Interface(i) => i,
323            other => panic!("expected Interface, got {:?}", other),
324        };
325        let sig = &iface.methods[0];
326        assert_eq!(sig.directives.len(), 1, "expected one @mark directive");
327        let d = &sig.directives[0];
328        assert_eq!(build_interner.resolve(&d.name.name), "mark");
329        assert_eq!(d.args.len(), 1, "expected one directive arg");
330        match &d.args[0] {
331            gruel_parser::ast::DirectiveArg::Ident(i) => {
332                assert_eq!(
333                    build_interner.resolve(&i.name),
334                    "totally_unique_marker_name",
335                    "cached directive arg leaked unremapped Spur into the warmed build interner"
336                );
337            }
338            other => panic!("expected Ident arg, got {:?}", other),
339        }
340    }
341
342    #[test]
343    fn editing_source_invalidates_only_changed_file() {
344        let tmp = TempDir::new().unwrap();
345        let cache = CacheStore::open(tmp.path().join("cache")).unwrap();
346        let build_fp = fake_build_fp();
347
348        // Two files, both miss on first build.
349        let a = SourceFile::new("a.gruel", "fn a() -> i32 { 1 }", FileId::new(1));
350        let b = SourceFile::new("b.gruel", "fn b() -> i32 { 2 }", FileId::new(2));
351
352        let (_, cold_stats) = parse_all_files_cached(
353            &[a.clone(), b.clone()],
354            &PreviewFeatures::default(),
355            &cache,
356            &build_fp,
357        )
358        .unwrap();
359        assert_eq!(cold_stats.misses, 2);
360
361        // Modify a, leave b unchanged.
362        let a2 = SourceFile::new("a.gruel", "fn a() -> i32 { 99 }", FileId::new(1));
363        let (_, warm_stats) =
364            parse_all_files_cached(&[a2, b], &PreviewFeatures::default(), &cache, &build_fp)
365                .unwrap();
366        // a missed, b hit.
367        assert_eq!(warm_stats.hits, 1);
368        assert_eq!(warm_stats.misses, 1);
369    }
370}