Skip to main content

gruel_air/sema/
mod.rs

1//! Semantic analysis - RIR to AIR conversion.
2//!
3//! Sema performs type checking and converts untyped RIR to typed AIR.
4//! This is analogous to Zig's Sema phase.
5//!
6//! # Module Organization
7//!
8//! This module is split into several submodules for maintainability:
9//!
10//! - [`context`] - Analysis context and helper types (LocalVar, AnalysisContext, etc.)
11//! - [`declarations`] - Declaration gathering (register_type_names, resolve_declarations)
12//! - [`builtins`] - Built-in type injection (String, etc.)
13//! - [`typeck`] - Type resolution and checking helpers
14//! - [`analysis`] - Function analysis and type inference coordination
15//! - [`airgen`] - RIR instruction to AIR instruction lowering
16//! - [`info`] - Function, method, and constant info types
17//! - [`gather`] - Declaration gathering output
18//! - [`output`] - Semantic analysis output types
19//! - [`inference_ctx`] - Pre-computed type information for inference
20//! - [`visibility`] - Module visibility checking
21//! - [`imports`] - Import resolution and const evaluation
22//! - [`anon_structs`] - Anonymous struct structural equality
23//! - [`anon_enums`] - Anonymous enum structural equality
24//! - [`sema_ctx_builder`] - SemaContext builder for parallel analysis
25//! - [`file_paths`] - File path management for multi-file compilation
26//!
27//! The main entry points are:
28//! - [`Sema::new`] - Create a new semantic analyzer
29//! - [`Sema::analyze_all`] - Perform full semantic analysis
30//! - [`Sema::analyze_all_bodies`] - Analyze function bodies after declarations
31
32mod airgen;
33mod analysis;
34mod analyze_ops;
35mod anon_enums;
36mod anon_interfaces;
37mod anon_structs;
38mod builtins;
39mod comptime;
40mod conformance;
41mod context;
42mod declarations;
43mod file_paths;
44mod gather;
45mod imports;
46mod inference_ctx;
47mod info;
48mod intrinsics;
49mod known_symbols;
50mod lang_items;
51pub mod module_path;
52mod output;
53mod pointer_ops;
54mod sema_ctx_builder;
55mod typeck;
56mod usefulness;
57mod vec_methods;
58mod visibility;
59
60// Public re-exports
61pub use context::ConstValue;
62pub use gather::GatherOutput;
63pub use inference_ctx::InferenceContext;
64pub use info::{AnonMethodSig, ConstInfo, DeriveBinding, DeriveInfo, FunctionInfo, MethodInfo};
65pub use known_symbols::KnownSymbols;
66pub use lang_items::LangItems;
67pub use output::{AnalyzedFunction, InterfaceVtables, SemaOutput};
68
69use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
70
71use gruel_rir::Rir;
72use gruel_util::FileId;
73use gruel_util::{CompileErrors, MultiErrorResult, PreviewFeatures};
74use lasso::{Spur, ThreadedRodeo};
75
76use crate::intern_pool::TypeInternPool;
77use crate::param_arena::ParamArena;
78use crate::types::{EnumId, InterfaceDef, InterfaceId, StructId, Type};
79
80use context::ComptimeHeapItem;
81
82/// Semantic analyzer that converts RIR to AIR.
83pub struct Sema<'a> {
84    pub(crate) rir: &'a Rir,
85    pub(crate) interner: &'a ThreadedRodeo,
86    /// Function table: maps function name symbols to their info
87    pub(crate) functions: HashMap<Spur, FunctionInfo>,
88    /// Struct table: maps struct name symbols to their StructId
89    pub(crate) structs: HashMap<Spur, StructId>,
90    /// Enum table: maps enum name symbols to their EnumId
91    pub(crate) enums: HashMap<Spur, EnumId>,
92    /// Interface table: maps interface name symbols to InterfaceId (ADR-0056).
93    pub(crate) interfaces: HashMap<Spur, InterfaceId>,
94    /// Definitions for each interface. Indexed by InterfaceId.0.
95    pub(crate) interface_defs: Vec<InterfaceDef>,
96    /// Interface bounds on comptime type parameters (ADR-0056 Phase 3).
97    ///
98    /// Keyed by `(owner, param_name)` where `owner` is either a function
99    /// name (top-level functions) or a `"StructName.method"` interned spur
100    /// (methods). Looked up at specialization time to drive `check_conforms`.
101    pub(crate) comptime_interface_bounds: HashMap<(Spur, Spur), InterfaceId>,
102    /// (StructId, InterfaceId) pairs that need a vtable emitted (ADR-0056
103    /// Phase 4d). Populated by sema when a runtime coercion is detected;
104    /// consumed by codegen to emit one `@__vtable__C__I` global per pair.
105    /// The value is the conformance witness — the conforming type's method
106    /// keys in interface declaration order, ready for codegen to look up the
107    /// LLVM function.
108    pub(crate) interface_vtables_needed: InterfaceVtables,
109    /// Method table: maps (struct_id, method_name) to method info
110    pub(crate) methods: HashMap<(StructId, Spur), MethodInfo>,
111    /// Enum method table: maps (enum_id, method_name) to method info
112    pub(crate) enum_methods: HashMap<(EnumId, Spur), MethodInfo>,
113    /// Derive table: maps a derive name to its method-template info
114    /// (ADR-0058). Populated during declaration gathering; consumed by
115    /// `@derive(...)` expansion.
116    pub(crate) derives: HashMap<Spur, DeriveInfo>,
117    /// Pending `@derive(D)` bindings on named struct/enum declarations
118    /// (ADR-0058). Populated during directive resolution; consumed by the
119    /// derive-expansion sub-phase.
120    pub(crate) derive_bindings: Vec<DeriveBinding>,
121    /// Errors raised during anonymous-host derive expansion (ADR-0058).
122    /// The comptime interpreter returns `Option<...>` so it cannot
123    /// propagate these via `?`; we buffer them here and surface after
124    /// analysis so users still see actionable diagnostics for an
125    /// `@derive(...)` error on an anonymous struct/enum.
126    pub(crate) pending_anon_derive_errors: Vec<gruel_util::CompileError>,
127    /// Validation errors raised while evaluating an anonymous struct/enum
128    /// type literal at comptime (empty body, duplicate method names).
129    /// Buffered for the same `Option<...>` reason as `pending_anon_derive_errors`.
130    /// `evaluate_type_ctor_body` drains the entries it caused so the call site
131    /// surfaces the specific error instead of a generic "comptime evaluation
132    /// failed"; any leftover entries are surfaced by `analyze_all` at the end.
133    pub(crate) pending_anon_eval_errors: Vec<gruel_util::CompileError>,
134    /// Constant table: maps const name symbol to const info
135    pub(crate) constants: HashMap<Spur, ConstInfo>,
136    /// Enabled preview features
137    pub(crate) preview_features: PreviewFeatures,
138    /// StructId of the synthetic String type.
139    pub(crate) builtin_string_id: Option<StructId>,
140    /// EnumId of the synthetic Arch enum (for @target_arch intrinsic).
141    pub(crate) builtin_arch_id: Option<EnumId>,
142    /// EnumId of the synthetic Os enum (for @target_os intrinsic).
143    pub(crate) builtin_os_id: Option<EnumId>,
144    /// EnumId of the synthetic TypeKind enum (for @type_info intrinsic).
145    pub(crate) builtin_typekind_id: Option<EnumId>,
146    /// EnumId of the synthetic Ownership enum (for @ownership intrinsic).
147    pub(crate) builtin_ownership_id: Option<EnumId>,
148    /// EnumId of the prelude `ThreadSafety` enum (ADR-0084), used by
149    /// the `@thread_safety` intrinsic to materialize a value of the
150    /// classification ladder.
151    pub(crate) builtin_thread_safety_id: Option<EnumId>,
152    /// EnumId of the prelude `Ordering` enum (ADR-0078 Phase 4: target of
153    /// `Ord::cmp`; analyzed at every `<`/`<=`/`>`/`>=` desugaring on a
154    /// type that conforms to `Ord`).
155    pub(crate) builtin_ordering_id: Option<EnumId>,
156    /// ADR-0079: lang-item registry. Populated from `@lang("…")`
157    /// directives on prelude declarations; the compiler keys
158    /// drop/copy/clone/handle/Eq/Ord/Ordering behaviors off these IDs
159    /// instead of the historical name-string match.
160    pub(crate) lang_items: LangItems,
161    /// Pre-interned known symbols for fast comparison.
162    pub(crate) known: KnownSymbols,
163    /// Type intern pool for unified type representation (ADR-0024 Phase 1).
164    pub(crate) type_pool: TypeInternPool,
165    /// Module registry for tracking imported modules (Phase 1 modules).
166    pub(crate) module_registry: crate::sema_context::ModuleRegistry,
167    /// Maps FileId to source file paths (for module resolution).
168    pub(crate) file_paths: HashMap<FileId, String>,
169    /// Arena storage for function/method parameter data.
170    pub(crate) param_arena: ParamArena,
171    /// Inline destructor bodies keyed by struct id (ADR-0053).
172    ///
173    /// Populated when a struct body contains `fn __drop(self)`. The analysis pass
174    /// looks these up to run `analyze_destructor_function` against the method
175    /// body.
176    pub(crate) inline_struct_drops: HashMap<StructId, (gruel_rir::InstRef, gruel_util::Span)>,
177    /// Inline destructor bodies keyed by enum id (ADR-0053 phase 3b).
178    /// Same contract as `inline_struct_drops` but for enums.
179    pub(crate) inline_enum_drops: HashMap<EnumId, (gruel_rir::InstRef, gruel_util::Span)>,
180    /// Method signatures for anonymous structs, used for structural equality comparison.
181    pub(crate) anon_struct_method_sigs: HashMap<StructId, Vec<AnonMethodSig>>,
182    /// Captured comptime values for anonymous structs.
183    /// When an anonymous struct with methods is created inside a comptime function,
184    /// the comptime parameter values (e.g., N=42 in FixedBuffer(comptime N: i32)) are
185    /// stored here, keyed by StructId. These values become part of type identity:
186    /// FixedBuffer(42) and FixedBuffer(100) are different types.
187    pub(crate) anon_struct_captured_values: HashMap<StructId, HashMap<Spur, ConstValue>>,
188    /// ADR-0082: captured comptime *type* substitutions for anonymous
189    /// structs created from a parameterized comptime function (e.g.
190    /// `pub fn Vec(comptime T: type) -> type { struct { ... } }`). Stores
191    /// `T → I32` per `Vec(I32)` instance. Looked up when analyzing method
192    /// bodies so type names that reference the outer fn's comptime params
193    /// resolve at body-analysis time. Parallels `anon_struct_captured_values`
194    /// for type substitutions instead of value substitutions.
195    pub(crate) anon_struct_type_subst: HashMap<StructId, HashMap<Spur, Type>>,
196    /// Method signatures for anonymous enums, used for structural equality comparison.
197    pub(crate) anon_enum_method_sigs: HashMap<EnumId, Vec<AnonMethodSig>>,
198    /// Captured comptime values for anonymous enums (same semantics as anonymous structs).
199    pub(crate) anon_enum_captured_values: HashMap<EnumId, HashMap<Spur, ConstValue>>,
200    /// ADR-0082: captured comptime *type* substitutions for anonymous
201    /// enums (parallel to `anon_struct_type_subst`).
202    pub(crate) anon_enum_type_subst: HashMap<EnumId, HashMap<Spur, Type>>,
203    /// ADR-0082: registry of `StructId`s produced by instantiating the
204    /// `@lang("vec")` function for some element type `T`. Maps the
205    /// instance struct's `StructId` to the element type. Populated when
206    /// `Vec(T)` is evaluated in type position; consulted by the
207    /// `as_vec_instance` helper used by indexing, slice borrow, drop
208    /// synthesis, and method dispatch.
209    pub(crate) vec_instance_registry: HashMap<StructId, Type>,
210    /// ADR-0082: the prelude `@lang(...)` function whose body is
211    /// currently being evaluated by the comptime interpreter. Set
212    /// transiently by callers of `try_evaluate_const_with_subst` /
213    /// `evaluate_type_ctor_body` so the anon-struct evaluation path
214    /// can detect "this struct is a Vec instance" and populate
215    /// `vec_instance_registry`. `None` outside such evaluations.
216    pub(crate) comptime_ctor_fn: Option<Spur>,
217    /// Loop iteration counter for the current comptime block evaluation.
218    /// Reset to 0 at the start of each `evaluate_comptime_block` call.
219    /// Incremented once per loop iteration; triggers an error when it exceeds
220    /// `COMPTIME_MAX_STEPS` to prevent infinite loops at compile time.
221    pub(crate) comptime_steps_used: u64,
222    /// Pending return value for the comptime interpreter.
223    /// Set by `Ret` instructions inside comptime function bodies; consumed
224    /// immediately by the enclosing `Call` handler in `evaluate_comptime_inst`.
225    pub(crate) comptime_return_value: Option<ConstValue>,
226    /// Current call stack depth in the comptime interpreter.
227    /// Incremented on each comptime `Call`, decremented on return.
228    /// Triggers an error if it exceeds `COMPTIME_CALL_DEPTH_LIMIT`.
229    pub(crate) comptime_call_depth: u32,
230    /// Comptime heap: stores composite values (structs, arrays) created during
231    /// comptime evaluation. `ConstValue::Struct(idx)` and `ConstValue::Array(idx)`
232    /// index into this vec. Cleared at the start of each `evaluate_comptime_block`.
233    pub(crate) comptime_heap: Vec<ComptimeHeapItem>,
234    /// Type overrides for the comptime interpreter during generic function calls.
235    /// When a comptime generic call is executing, type parameters are stored here
236    /// so that enum/struct resolution can find them. Checked before `ctx.comptime_type_vars`.
237    pub(crate) comptime_type_overrides: HashMap<Spur, Type>,
238    /// Buffer for `@dbg` output collected during comptime evaluation.
239    /// Each entry is one formatted line (without trailing newline), matching
240    /// the format of the runtime `__gruel_dbg_*` functions.
241    pub(crate) comptime_dbg_output: Vec<String>,
242    /// Pending warnings for comptime `@dbg` calls. Each entry is (message, span).
243    pub(crate) comptime_log_output: Vec<(String, gruel_util::Span)>,
244    /// When true, comptime `@dbg` does not print to stderr on-the-fly. The output
245    /// is still appended to `comptime_dbg_output` and a warning is still emitted.
246    /// Set by the `--capture-comptime-dbg` CLI flag (used by the fuzzer).
247    pub(crate) suppress_comptime_dbg_print: bool,
248    /// ADR-0076: the in-scope `Self` type, set whenever we resolve types
249    /// inside a struct/enum body (its methods or its inline destructor),
250    /// inside a `derive` splice into a host type, or inside the body of a
251    /// method that was synthesized for a comptime-built anonymous type.
252    /// Consumed by `resolve_type` / `resolve_type_for_comptime_with_subst`
253    /// to substitute the literal symbol `Self` at any depth in a type
254    /// expression. `None` means `Self` is not in scope; using it is an
255    /// error.
256    pub(crate) current_self: Option<Type>,
257    /// The compilation target. Read by `@target_arch()` / `@target_os()`
258    /// so conditional code reflects the *compile* target, not the host.
259    /// Defaults to the host target; the driver overrides via
260    /// [`Sema::set_target`] when a different `--target` is requested.
261    pub(crate) target: gruel_target::Target,
262    /// ADR-0083: names of struct/enum declarations that carry
263    /// `@mark(affine)`. Affine is a Copy *suppressor*: a type whose
264    /// members would otherwise infer Copy is forced to remain Affine.
265    /// Tracked here as a side set because `StructDef.posture =
266    /// Posture::Affine` is the same shape "no declaration" produces.
267    pub(crate) mark_affine_decls: HashSet<Spur>,
268
269    /// ADR-0084: names of struct/enum declarations that carry one of
270    /// the thread-safety override markers (`@mark(unsend)` /
271    /// `@mark(checked_send)` / `@mark(checked_sync)`). Tracked as a
272    /// side map so `validate_consistency` can apply the override after
273    /// computing the structural minimum, mirroring the
274    /// `mark_affine_decls` carve-out for `@mark(affine)`.
275    pub(crate) mark_thread_safety_decls: rustc_hash::FxHashMap<Spur, gruel_builtins::ThreadSafety>,
276}
277
278impl<'a> Sema<'a> {
279    /// Create a new semantic analyzer.
280    pub fn new(
281        rir: &'a Rir,
282        interner: &'a ThreadedRodeo,
283        preview_features: PreviewFeatures,
284    ) -> Self {
285        let type_pool = TypeInternPool::new();
286        Self {
287            rir,
288            interner,
289            functions: HashMap::default(),
290            structs: HashMap::default(),
291            enums: HashMap::default(),
292            interfaces: HashMap::default(),
293            interface_defs: Vec::new(),
294            comptime_interface_bounds: HashMap::default(),
295            interface_vtables_needed: HashMap::default(),
296            methods: HashMap::default(),
297            enum_methods: HashMap::default(),
298            derives: HashMap::default(),
299            derive_bindings: Vec::new(),
300            pending_anon_derive_errors: Vec::new(),
301            pending_anon_eval_errors: Vec::new(),
302            constants: HashMap::default(),
303            preview_features,
304            builtin_string_id: None,
305            builtin_arch_id: None,
306            builtin_os_id: None,
307            builtin_typekind_id: None,
308            builtin_ownership_id: None,
309            builtin_thread_safety_id: None,
310            builtin_ordering_id: None,
311            lang_items: LangItems::default(),
312            known: KnownSymbols::new(interner),
313            type_pool,
314            module_registry: crate::sema_context::ModuleRegistry::new(),
315            file_paths: HashMap::default(),
316            param_arena: ParamArena::new(),
317            inline_struct_drops: HashMap::default(),
318            inline_enum_drops: HashMap::default(),
319            anon_struct_method_sigs: HashMap::default(),
320            anon_struct_captured_values: HashMap::default(),
321            anon_struct_type_subst: HashMap::default(),
322            anon_enum_method_sigs: HashMap::default(),
323            anon_enum_captured_values: HashMap::default(),
324            anon_enum_type_subst: HashMap::default(),
325            vec_instance_registry: HashMap::default(),
326            comptime_ctor_fn: None,
327            comptime_steps_used: 0,
328            comptime_return_value: None,
329            comptime_call_depth: 0,
330            comptime_heap: Vec::new(),
331            comptime_type_overrides: HashMap::default(),
332            comptime_dbg_output: Vec::new(),
333            comptime_log_output: Vec::new(),
334            suppress_comptime_dbg_print: false,
335            current_self: None,
336            target: gruel_target::Target::host(),
337            mark_affine_decls: HashSet::default(),
338            mark_thread_safety_decls: rustc_hash::FxHashMap::default(),
339        }
340    }
341
342    /// Override the compile target read by `@target_arch()` and
343    /// `@target_os()`. Defaults to [`gruel_target::Target::host()`].
344    pub fn set_target(&mut self, target: gruel_target::Target) {
345        self.target = target;
346    }
347
348    /// Configure whether comptime `@dbg` prints to stderr on-the-fly.
349    /// When suppressed, output is still buffered into `comptime_dbg_output`
350    /// and warnings are still emitted.
351    pub fn set_suppress_comptime_dbg_print(&mut self, suppress: bool) {
352        self.suppress_comptime_dbg_print = suppress;
353    }
354
355    /// Perform semantic analysis on the RIR.
356    ///
357    /// This is the main entry point for semantic analysis. It returns analyzed
358    /// functions, struct definitions, enum definitions, and any warnings.
359    pub fn analyze_all(mut self) -> MultiErrorResult<SemaOutput> {
360        // Phase 0: Inject built-in types (String, etc.) before user code
361        self.inject_builtin_types();
362
363        // Phase 1: Register type names
364        // Phase 2: Resolve all declarations (this also validates interface
365        // declarations between struct/enum field resolution and function
366        // gathering — ADR-0056).
367        self.register_type_names().map_err(CompileErrors::from)?;
368        self.resolve_declarations().map_err(CompileErrors::from)?;
369
370        // ADR-0078 Phase 3: cache EnumIds for the prelude-resident builtin
371        // enums (Arch, Os, TypeKind, Ownership) now that the prelude has
372        // been resolved. Intrinsics that produce values of these types
373        // read `builtin_arch_id` etc. directly.
374        self.cache_builtin_enum_ids();
375
376        // Phase 2.5: Evaluate const initializers (e.g., const x = @import(...))
377        self.evaluate_const_initializers()
378            .map_err(CompileErrors::from)?;
379
380        // Delegate to the analysis module for function body analysis
381        analysis::analyze_all_function_bodies(self)
382    }
383
384    /// Analyze all function bodies, assuming declarations are already collected.
385    pub fn analyze_all_bodies(self) -> MultiErrorResult<SemaOutput> {
386        analysis::analyze_all_function_bodies(self)
387    }
388}
389
390#[cfg(test)]
391mod consistency_tests;
392#[cfg(test)]
393mod tests;