Skip to main content

gruel_cache/
signature.rs

1//! Per-file signature fingerprinting (ADR-0074 Phase 3).
2//!
3//! `sig_fp(file)` is a BLAKE3 hash of the file's `pub` interface — the
4//! exact set of names, types, and signatures importers can see. Two
5//! files whose `pub` items match byte-for-byte after canonical encoding
6//! produce the same `sig_fp`; changing a body inside a pub function
7//! does NOT change `sig_fp`, so editing a private helper doesn't
8//! invalidate downstream files' AIR cache.
9//!
10//! ## Encoding stability matters
11//!
12//! Once `sig_fp` is in use by the AIR cache (Phase 4), any change to
13//! the canonical encoding silently invalidates every cached AIR entry
14//! across every workspace that uses this compiler. The encoding is
15//! locked by golden tests: bumping the encoding requires bumping
16//! [`SIG_FP_VERSION`] AND the `CACHE_SCHEMA_VERSION` so old caches get
17//! wiped on first build with the new compiler.
18//!
19//! ## What's hashed
20//!
21//! For each public item (`pub fn`, `pub struct`, `pub enum`,
22//! `pub interface`, `pub const`, public methods on pub types), the
23//! encoder emits:
24//!
25//! - A discriminant byte identifying the item kind.
26//! - A length-prefixed name.
27//! - Length-prefixed canonical text forms of types (for fn params /
28//!   return / struct fields / enum variants / const types). Spurs are
29//!   resolved to their string content via the supplied interner so
30//!   the hash is stable across builds with different Spur numberings.
31//! - Recursive method signatures for inline-method-bearing types.
32//!
33//! Items are encoded in a deterministic order: lexicographic by name,
34//! and within composite items (struct fields, enum variants, methods),
35//! by declaration order — declaration order is part of the
36//! observable interface (e.g. for tuple struct positional fields).
37//!
38//! Function bodies, struct field initializers, and method bodies are
39//! NOT hashed. Only signatures.
40//!
41//! ## What's NOT hashed
42//!
43//! - Item docstrings and comments (pre-stripped by the parser).
44//! - Span positions (would make the hash sensitive to whitespace).
45//! - Directives whose effect is purely on body code generation.
46//!   We DO hash directives that affect signature semantics (e.g.
47//!   visibility-affecting directives).
48//! - Private items. Editing a private function never changes
49//!   `sig_fp`, regardless of whether it's called by pub items
50//!   transitively (sema sees those calls; the cache invariant is on
51//!   the source-level `pub` boundary).
52
53use lasso::ThreadedRodeo;
54
55use gruel_parser::ast::{
56    AnonStructField, Ast, ConstDecl, DeriveDecl, EnumDecl, EnumVariant, EnumVariantField,
57    EnumVariantKind, FieldDecl, Function, Ident, InterfaceDecl, Item, Method, MethodSig, Param,
58    SelfParam, SelfReceiverKind, StructDecl, TypeExpr, Visibility,
59};
60
61use crate::fingerprint::{CacheKey, Hasher};
62
63/// Bumped any time the canonical encoding changes. Mixed into every
64/// `sig_fp` so old caches (which used a different encoding) are
65/// invalidated even if the file content matches.
66pub const SIG_FP_VERSION: u32 = 2;
67
68/// Compute `sig_fp` for a single file's AST.
69///
70/// `interner` resolves the Spurs in `ast` to their string content so
71/// the hash is stable across different Spur numberings (e.g. after
72/// re-interning into a different build's interner).
73pub fn compute_sig_fp(ast: &Ast, interner: &ThreadedRodeo) -> CacheKey {
74    let mut h = Hasher::new();
75    h.update_u32(SIG_FP_VERSION);
76
77    // Collect public items in lexicographic-by-name order. The order
78    // within each item kind doesn't strictly matter for correctness
79    // (any deterministic order works), but lex order makes diffs
80    // legible if the encoding ever needs to be debugged.
81    let mut pub_items: Vec<(String, &Item)> = Vec::new();
82    for item in &ast.items {
83        match item {
84            Item::Function(f) if is_pub(&f.visibility) => {
85                pub_items.push((interner.resolve(&f.name.name).to_string(), item));
86            }
87            Item::Struct(s) if is_pub(&s.visibility) => {
88                pub_items.push((interner.resolve(&s.name.name).to_string(), item));
89            }
90            Item::Enum(e) if is_pub(&e.visibility) => {
91                pub_items.push((interner.resolve(&e.name.name).to_string(), item));
92            }
93            Item::Interface(i) => {
94                // ADR-0056: interface visibility is currently always private
95                // (see `pub visibility` doc comment in InterfaceDecl). Module
96                // system makes interfaces visible across files via re-export
97                // through `pub const`. We still include them in the
98                // signature hash because adding/changing an interface
99                // affects how derives and conformance witnesses resolve.
100                let _ = is_pub(&i.visibility);
101                pub_items.push((interner.resolve(&i.name.name).to_string(), item));
102            }
103            Item::Const(c) if is_pub(&c.visibility) => {
104                pub_items.push((interner.resolve(&c.name.name).to_string(), item));
105            }
106            Item::Derive(d) => {
107                // Derives apply to their host type's signature. Always
108                // include them, keyed by their decl name.
109                pub_items.push((interner.resolve(&d.name.name).to_string(), item));
110            }
111            // Private items and Error nodes are not part of the public
112            // interface. Skip them.
113            _ => {}
114        }
115    }
116    pub_items.sort_by(|a, b| a.0.cmp(&b.0));
117
118    h.update_u64(pub_items.len() as u64);
119    for (_name, item) in pub_items {
120        encode_item(&mut h, item, interner);
121    }
122
123    h.finalize()
124}
125
126fn is_pub(v: &Visibility) -> bool {
127    matches!(v, Visibility::Public)
128}
129
130const TAG_FN: u8 = 1;
131const TAG_STRUCT: u8 = 2;
132const TAG_ENUM: u8 = 3;
133const TAG_INTERFACE: u8 = 4;
134const TAG_DERIVE: u8 = 5;
135const TAG_CONST: u8 = 6;
136const TAG_LINK_EXTERN: u8 = 7;
137
138fn encode_item(h: &mut Hasher, item: &Item, interner: &ThreadedRodeo) {
139    match item {
140        Item::Function(f) => encode_function(h, f, interner),
141        Item::Struct(s) => encode_struct(h, s, interner),
142        Item::Enum(e) => encode_enum(h, e, interner),
143        Item::Interface(i) => encode_interface(h, i, interner),
144        Item::Derive(d) => encode_derive(h, d, interner),
145        Item::Const(c) => encode_const(h, c, interner),
146        Item::LinkExtern(b) => encode_link_extern(h, b, interner),
147        Item::Error(_) => {}
148    }
149}
150
151fn encode_link_extern(
152    h: &mut Hasher,
153    block: &gruel_parser::ast::LinkExternBlock,
154    interner: &ThreadedRodeo,
155) {
156    // ADR-0088 follow-up: destructure for exhaustiveness — adding a
157    // field to LinkExternBlock fails to compile here until it's
158    // explicitly handled (or `_`-bound).
159    let gruel_parser::ast::LinkExternBlock {
160        doc: _,
161        library,
162        items,
163        link_mode: _,
164        span: _,
165    } = block;
166    h.update(&[TAG_LINK_EXTERN]);
167    h.update(interner.resolve(&library.value).as_bytes());
168    h.update(&[0]);
169    for item in items {
170        let gruel_parser::ast::ExternFn {
171            doc: _,
172            directives: _,
173            name,
174            params,
175            return_type,
176            span: _,
177        } = item;
178        encode_ident(h, name, interner);
179        encode_params(h, params, interner);
180        encode_return_type(h, return_type.as_ref(), interner);
181    }
182}
183
184fn encode_function(h: &mut Hasher, f: &Function, interner: &ThreadedRodeo) {
185    let Function {
186        doc: _,
187        directives: _,
188        visibility: _,
189        is_unchecked,
190        name,
191        params,
192        return_type,
193        body: _,
194        span: _,
195    } = f;
196    h.update(&[TAG_FN]);
197    encode_ident(h, name, interner);
198    h.update(&[u8::from(*is_unchecked)]);
199    encode_params(h, params, interner);
200    encode_return_type(h, return_type.as_ref(), interner);
201}
202
203fn encode_struct(h: &mut Hasher, s: &StructDecl, interner: &ThreadedRodeo) {
204    let StructDecl {
205        doc: _,
206        directives: _,
207        visibility: _,
208        posture: _,
209        name,
210        fields,
211        methods,
212        span: _,
213    } = s;
214    h.update(&[TAG_STRUCT]);
215    encode_ident(h, name, interner);
216
217    // Public fields contribute to the signature. Private fields don't
218    // (ADR-0073: a private field is invisible to importers, so a
219    // change to it doesn't change the public interface).
220    let pub_fields: Vec<&FieldDecl> = fields.iter().filter(|f| is_pub(&f.visibility)).collect();
221    h.update_u64(pub_fields.len() as u64);
222    for field in pub_fields {
223        encode_field(h, field, interner);
224    }
225
226    // Public methods contribute their signatures (NOT bodies).
227    let pub_methods: Vec<&Method> = methods.iter().filter(|m| is_pub(&m.visibility)).collect();
228    h.update_u64(pub_methods.len() as u64);
229    for method in pub_methods {
230        encode_method_sig_from_method(h, method, interner);
231    }
232}
233
234fn encode_enum(h: &mut Hasher, e: &EnumDecl, interner: &ThreadedRodeo) {
235    let EnumDecl {
236        doc: _,
237        directives: _,
238        visibility: _,
239        posture: _,
240        name,
241        variants,
242        methods,
243        span: _,
244    } = e;
245    h.update(&[TAG_ENUM]);
246    encode_ident(h, name, interner);
247
248    h.update_u64(variants.len() as u64);
249    for variant in variants {
250        encode_variant(h, variant, interner);
251    }
252
253    let pub_methods: Vec<&Method> = methods.iter().filter(|m| is_pub(&m.visibility)).collect();
254    h.update_u64(pub_methods.len() as u64);
255    for method in pub_methods {
256        encode_method_sig_from_method(h, method, interner);
257    }
258}
259
260fn encode_interface(h: &mut Hasher, i: &InterfaceDecl, interner: &ThreadedRodeo) {
261    let InterfaceDecl {
262        doc: _,
263        directives: _,
264        visibility: _,
265        name,
266        methods,
267        span: _,
268    } = i;
269    h.update(&[TAG_INTERFACE]);
270    encode_ident(h, name, interner);
271    h.update_u64(methods.len() as u64);
272    for method_sig in methods {
273        encode_method_sig(h, method_sig, interner);
274    }
275}
276
277fn encode_derive(h: &mut Hasher, d: &DeriveDecl, interner: &ThreadedRodeo) {
278    let DeriveDecl {
279        doc: _,
280        name,
281        methods,
282        span: _,
283    } = d;
284    h.update(&[TAG_DERIVE]);
285    encode_ident(h, name, interner);
286    h.update_u64(methods.len() as u64);
287    for method in methods {
288        encode_method_sig_from_method(h, method, interner);
289    }
290}
291
292fn encode_const(h: &mut Hasher, c: &ConstDecl, interner: &ThreadedRodeo) {
293    let ConstDecl {
294        doc: _,
295        directives: _,
296        visibility: _,
297        name,
298        ty,
299        init: _,
300        span: _,
301    } = c;
302    h.update(&[TAG_CONST]);
303    encode_ident(h, name, interner);
304    encode_type_opt(h, ty.as_ref(), interner);
305    // Const initializer expressions are part of the public-interface
306    // because they're inlined at use sites (ADR-0026 module re-exports
307    // are the main case). Encode the canonical text form via the same
308    // interner-resolved approach we use for type expressions.
309    //
310    // For now: skip the init expression — full const-init encoding is
311    // its own follow-up (it requires Expr-level canonical encoding,
312    // which is much larger surface than TypeExpr). The Phase 3 ADR
313    // says "stable canonical encoding of pub items with bodies
314    // stripped"; pub const init *is* the body-equivalent for consts,
315    // and we follow the same convention. Changing a const init
316    // shouldn't typically change the file's pub interface (the type is
317    // what importers see in declarations; the value flows through
318    // sema's const evaluator). If/when that turns out to be wrong,
319    // bump SIG_FP_VERSION and add init encoding.
320}
321
322fn encode_ident(h: &mut Hasher, ident: &Ident, interner: &ThreadedRodeo) {
323    h.update_str(interner.resolve(&ident.name));
324}
325
326fn encode_params(h: &mut Hasher, params: &[Param], interner: &ThreadedRodeo) {
327    h.update_u64(params.len() as u64);
328    for p in params {
329        h.update(&[u8::from(p.is_comptime)]);
330        h.update(&[encode_param_mode(&p.mode)]);
331        encode_ident(h, &p.name, interner);
332        encode_type(h, &p.ty, interner);
333    }
334}
335
336fn encode_param_mode(m: &gruel_parser::ast::ParamMode) -> u8 {
337    use gruel_parser::ast::ParamMode;
338    match m {
339        ParamMode::Normal => 0,
340        ParamMode::Comptime => 3,
341    }
342}
343
344fn encode_return_type(h: &mut Hasher, ret: Option<&TypeExpr>, interner: &ThreadedRodeo) {
345    encode_type_opt(h, ret, interner);
346}
347
348fn encode_type_opt(h: &mut Hasher, ty: Option<&TypeExpr>, interner: &ThreadedRodeo) {
349    match ty {
350        None => {
351            h.update(&[0]);
352        }
353        Some(t) => {
354            h.update(&[1]);
355            encode_type(h, t, interner);
356        }
357    }
358}
359
360fn encode_type(h: &mut Hasher, ty: &TypeExpr, interner: &ThreadedRodeo) {
361    match ty {
362        TypeExpr::Named(ident) => {
363            h.update(&[1]);
364            encode_ident(h, ident, interner);
365        }
366        TypeExpr::Unit(_) => {
367            h.update(&[2]);
368        }
369        TypeExpr::Never(_) => {
370            h.update(&[3]);
371        }
372        TypeExpr::Array {
373            element, length, ..
374        } => {
375            h.update(&[4]);
376            encode_type(h, element, interner);
377            h.update_u64(*length);
378        }
379        TypeExpr::AnonymousStruct {
380            fields, methods, ..
381        } => {
382            h.update(&[5]);
383            h.update_u64(fields.len() as u64);
384            for f in fields {
385                encode_anon_field(h, f, interner);
386            }
387            h.update_u64(methods.len() as u64);
388            for m in methods {
389                encode_method_sig_from_method(h, m, interner);
390            }
391        }
392        TypeExpr::AnonymousEnum {
393            variants, methods, ..
394        } => {
395            h.update(&[6]);
396            h.update_u64(variants.len() as u64);
397            for v in variants {
398                encode_variant(h, v, interner);
399            }
400            h.update_u64(methods.len() as u64);
401            for m in methods {
402                encode_method_sig_from_method(h, m, interner);
403            }
404        }
405        TypeExpr::AnonymousInterface { methods, .. } => {
406            h.update(&[7]);
407            h.update_u64(methods.len() as u64);
408            for m in methods {
409                encode_method_sig(h, m, interner);
410            }
411        }
412        TypeExpr::TypeCall { callee, args, .. } => {
413            h.update(&[8]);
414            encode_ident(h, callee, interner);
415            h.update_u64(args.len() as u64);
416            for a in args {
417                encode_type(h, a, interner);
418            }
419        }
420        TypeExpr::Tuple { elems, .. } => {
421            h.update(&[9]);
422            h.update_u64(elems.len() as u64);
423            for e in elems {
424                encode_type(h, e, interner);
425            }
426        }
427    }
428}
429
430fn encode_field(h: &mut Hasher, field: &FieldDecl, interner: &ThreadedRodeo) {
431    let FieldDecl {
432        doc: _,
433        visibility: _,
434        name,
435        ty,
436        span: _,
437    } = field;
438    encode_ident(h, name, interner);
439    encode_type(h, ty, interner);
440}
441
442fn encode_anon_field(h: &mut Hasher, field: &AnonStructField, interner: &ThreadedRodeo) {
443    let AnonStructField {
444        doc: _,
445        name,
446        ty,
447        span: _,
448    } = field;
449    encode_ident(h, name, interner);
450    encode_type(h, ty, interner);
451}
452
453fn encode_variant(h: &mut Hasher, v: &EnumVariant, interner: &ThreadedRodeo) {
454    let EnumVariant {
455        doc: _,
456        name,
457        kind,
458        span: _,
459    } = v;
460    encode_ident(h, name, interner);
461    match kind {
462        EnumVariantKind::Unit => {
463            h.update(&[0]);
464        }
465        EnumVariantKind::Tuple(types) => {
466            h.update(&[1]);
467            h.update_u64(types.len() as u64);
468            for t in types {
469                encode_type(h, t, interner);
470            }
471        }
472        EnumVariantKind::Struct(fields) => {
473            h.update(&[2]);
474            h.update_u64(fields.len() as u64);
475            for f in fields {
476                encode_variant_field(h, f, interner);
477            }
478        }
479    }
480}
481
482fn encode_variant_field(h: &mut Hasher, f: &EnumVariantField, interner: &ThreadedRodeo) {
483    let EnumVariantField {
484        doc: _,
485        visibility: _,
486        name,
487        ty,
488        span: _,
489    } = f;
490    encode_ident(h, name, interner);
491    encode_type(h, ty, interner);
492}
493
494fn encode_method_sig(h: &mut Hasher, m: &MethodSig, interner: &ThreadedRodeo) {
495    let MethodSig {
496        doc: _,
497        directives: _,
498        is_unchecked,
499        name,
500        receiver,
501        params,
502        return_type,
503        span: _,
504    } = m;
505    encode_ident(h, name, interner);
506    encode_self_param(h, receiver);
507    encode_params(h, params, interner);
508    encode_return_type(h, return_type.as_ref(), interner);
509    // ADR-0088: `@mark(unchecked)` on an interface method signature
510    // is part of the conformance signature — flip the bit and any
511    // implementor stops conforming. Hash it explicitly so changing
512    // it on a pub interface invalidates downstream AIR caches.
513    h.update(&[*is_unchecked as u8]);
514}
515
516fn encode_method_sig_from_method(h: &mut Hasher, m: &Method, interner: &ThreadedRodeo) {
517    let Method {
518        doc: _,
519        directives: _,
520        visibility: _,
521        is_unchecked,
522        name,
523        receiver,
524        params,
525        return_type,
526        body: _,
527        span: _,
528    } = m;
529    encode_ident(h, name, interner);
530    h.update(&[match receiver {
531        None => 0,
532        Some(_) => 1,
533    }]);
534    if let Some(r) = receiver {
535        encode_self_param(h, r);
536    }
537    encode_params(h, params, interner);
538    encode_return_type(h, return_type.as_ref(), interner);
539    // ADR-0088: mirror `encode_method_sig` so the unchecked-ness of a
540    // pub struct/enum method is part of the signature fingerprint.
541    h.update(&[*is_unchecked as u8]);
542}
543
544fn encode_self_param(h: &mut Hasher, p: &SelfParam) {
545    let SelfParam { kind, span: _ } = p;
546    let tag = match kind {
547        SelfReceiverKind::ByValue => 0,
548        SelfReceiverKind::MutRef => 1,
549        SelfReceiverKind::Ref => 2,
550    };
551    h.update(&[tag]);
552}
553
554#[cfg(test)]
555mod tests {
556    use super::*;
557    use gruel_lexer::Lexer;
558    use gruel_parser::Parser;
559    use gruel_util::FileId;
560
561    fn parse(source: &str) -> (Ast, ThreadedRodeo) {
562        let interner = ThreadedRodeo::new();
563        let lexer = Lexer::with_interner_and_file_id(source, interner, FileId::new(1));
564        let (tokens, interner) = lexer.tokenize().unwrap();
565        let parser = Parser::new(tokens, interner);
566        let (ast, interner) = parser.parse().unwrap();
567        (ast, interner)
568    }
569
570    fn sig_fp(source: &str) -> CacheKey {
571        let (ast, interner) = parse(source);
572        compute_sig_fp(&ast, &interner)
573    }
574
575    #[test]
576    fn empty_program_has_stable_hash() {
577        // Locks the encoding for an empty program. Any change to the
578        // SIG_FP_VERSION constant or to the leading
579        // `[version_le, count_le=0]` framing should change this.
580        let (ast, interner) = parse("");
581        let key = compute_sig_fp(&ast, &interner);
582        // Golden hex — bumping this requires bumping SIG_FP_VERSION.
583        assert_eq!(
584            key.hex(),
585            "0e8506853418e7be8ee126da8f4b8289e925f0013189221330ab27c85cac23f7",
586        );
587    }
588
589    #[test]
590    fn private_function_does_not_affect_sig_fp() {
591        let only_pub = sig_fp("pub fn add(a: i32, b: i32) -> i32 { a + b }");
592        let with_private =
593            sig_fp("pub fn add(a: i32, b: i32) -> i32 { a + b } fn helper() -> i32 { 0 }");
594        assert_eq!(only_pub, with_private);
595    }
596
597    #[test]
598    fn editing_pub_function_body_does_not_affect_sig_fp() {
599        let v1 = sig_fp("pub fn answer() -> i32 { 42 }");
600        let v2 = sig_fp("pub fn answer() -> i32 { 41 + 1 }");
601        assert_eq!(v1, v2);
602    }
603
604    #[test]
605    fn editing_pub_function_signature_changes_sig_fp() {
606        let v1 = sig_fp("pub fn answer() -> i32 { 42 }");
607        let v2 = sig_fp("pub fn answer() -> i64 { 42 }");
608        assert_ne!(v1, v2);
609    }
610
611    #[test]
612    fn renaming_pub_function_changes_sig_fp() {
613        let v1 = sig_fp("pub fn foo() -> i32 { 0 }");
614        let v2 = sig_fp("pub fn bar() -> i32 { 0 }");
615        assert_ne!(v1, v2);
616    }
617
618    #[test]
619    fn declaration_order_does_not_affect_sig_fp() {
620        let v1 = sig_fp("pub fn a() -> i32 { 0 } pub fn b() -> i32 { 0 }");
621        let v2 = sig_fp("pub fn b() -> i32 { 0 } pub fn a() -> i32 { 0 }");
622        assert_eq!(v1, v2);
623    }
624
625    #[test]
626    fn pub_struct_field_change_changes_sig_fp() {
627        let v1 = sig_fp("pub struct Point { pub x: i32, pub y: i32 }");
628        let v2 = sig_fp("pub struct Point { pub x: i32, pub y: i64 }");
629        assert_ne!(v1, v2);
630    }
631
632    #[test]
633    fn private_struct_field_does_not_affect_sig_fp() {
634        let v1 = sig_fp("pub struct Point { pub x: i32, pub y: i32 }");
635        let v2 = sig_fp("pub struct Point { pub x: i32, pub y: i32, hidden: i32 }");
636        assert_eq!(v1, v2);
637    }
638
639    #[test]
640    fn parameter_count_change_changes_sig_fp() {
641        let v1 = sig_fp("pub fn f(a: i32) -> i32 { 0 }");
642        let v2 = sig_fp("pub fn f(a: i32, b: i32) -> i32 { 0 }");
643        assert_ne!(v1, v2);
644    }
645}