gruel_air/sema/mod.rs
1//! Semantic analysis - RIR to AIR conversion.
2//!
3//! Sema performs type checking and converts untyped RIR to typed AIR.
4//! This is analogous to Zig's Sema phase.
5//!
6//! # Module Organization
7//!
8//! This module is split into several submodules for maintainability:
9//!
10//! - [`context`] - Analysis context and helper types (LocalVar, AnalysisContext, etc.)
11//! - [`declarations`] - Declaration gathering (register_type_names, resolve_declarations)
12//! - [`builtins`] - Built-in type injection (String, etc.)
13//! - [`typeck`] - Type resolution and checking helpers
14//! - [`analysis`] - Function analysis and type inference coordination
15//! - [`airgen`] - RIR instruction to AIR instruction lowering
16//! - [`info`] - Function, method, and constant info types
17//! - [`gather`] - Declaration gathering output
18//! - [`output`] - Semantic analysis output types
19//! - [`inference_ctx`] - Pre-computed type information for inference
20//! - [`visibility`] - Module visibility checking
21//! - [`imports`] - Import resolution and const evaluation
22//! - [`anon_structs`] - Anonymous struct structural equality
23//! - [`anon_enums`] - Anonymous enum structural equality
24//! - [`sema_ctx_builder`] - SemaContext builder for parallel analysis
25//! - [`file_paths`] - File path management for multi-file compilation
26//!
27//! The main entry points are:
28//! - [`Sema::new`] - Create a new semantic analyzer
29//! - [`Sema::analyze_all`] - Perform full semantic analysis
30//! - [`Sema::analyze_all_bodies`] - Analyze function bodies after declarations
31
32mod airgen;
33mod analysis;
34mod analyze_ops;
35mod anon_enums;
36mod anon_interfaces;
37mod anon_structs;
38mod builtins;
39mod comptime;
40mod conformance;
41mod context;
42mod declarations;
43mod file_paths;
44mod gather;
45mod imports;
46mod inference_ctx;
47mod info;
48mod intrinsics;
49mod known_symbols;
50mod lang_items;
51pub mod module_path;
52mod output;
53mod pointer_ops;
54mod sema_ctx_builder;
55mod typeck;
56mod usefulness;
57mod vec_methods;
58mod visibility;
59
60// Public re-exports
61pub use context::ConstValue;
62pub use gather::GatherOutput;
63pub use inference_ctx::InferenceContext;
64pub use info::{AnonMethodSig, ConstInfo, DeriveBinding, DeriveInfo, FunctionInfo, MethodInfo};
65pub use known_symbols::KnownSymbols;
66pub use lang_items::LangItems;
67pub use output::{AnalyzedFunction, InterfaceVtables, SemaOutput};
68
69use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
70
71use gruel_rir::Rir;
72use gruel_util::FileId;
73use gruel_util::{CompileErrors, MultiErrorResult, PreviewFeatures};
74use lasso::{Spur, ThreadedRodeo};
75
76use crate::intern_pool::TypeInternPool;
77use crate::param_arena::ParamArena;
78use crate::types::{EnumId, InterfaceDef, InterfaceId, StructId, Type};
79
80use context::ComptimeHeapItem;
81
82/// Semantic analyzer that converts RIR to AIR.
83pub struct Sema<'a> {
84 pub(crate) rir: &'a Rir,
85 pub(crate) interner: &'a ThreadedRodeo,
86 /// Function table: maps function name symbols to their info
87 pub(crate) functions: HashMap<Spur, FunctionInfo>,
88 /// Struct table: maps struct name symbols to their StructId
89 pub(crate) structs: HashMap<Spur, StructId>,
90 /// Enum table: maps enum name symbols to their EnumId
91 pub(crate) enums: HashMap<Spur, EnumId>,
92 /// Interface table: maps interface name symbols to InterfaceId (ADR-0056).
93 pub(crate) interfaces: HashMap<Spur, InterfaceId>,
94 /// Definitions for each interface. Indexed by InterfaceId.0.
95 pub(crate) interface_defs: Vec<InterfaceDef>,
96 /// Interface bounds on comptime type parameters (ADR-0056 Phase 3).
97 ///
98 /// Keyed by `(owner, param_name)` where `owner` is either a function
99 /// name (top-level functions) or a `"StructName.method"` interned spur
100 /// (methods). Looked up at specialization time to drive `check_conforms`.
101 pub(crate) comptime_interface_bounds: HashMap<(Spur, Spur), InterfaceId>,
102 /// (StructId, InterfaceId) pairs that need a vtable emitted (ADR-0056
103 /// Phase 4d). Populated by sema when a runtime coercion is detected;
104 /// consumed by codegen to emit one `@__vtable__C__I` global per pair.
105 /// The value is the conformance witness — the conforming type's method
106 /// keys in interface declaration order, ready for codegen to look up the
107 /// LLVM function.
108 pub(crate) interface_vtables_needed: InterfaceVtables,
109 /// Method table: maps (struct_id, method_name) to method info
110 pub(crate) methods: HashMap<(StructId, Spur), MethodInfo>,
111 /// Enum method table: maps (enum_id, method_name) to method info
112 pub(crate) enum_methods: HashMap<(EnumId, Spur), MethodInfo>,
113 /// Derive table: maps a derive name to its method-template info
114 /// (ADR-0058). Populated during declaration gathering; consumed by
115 /// `@derive(...)` expansion.
116 pub(crate) derives: HashMap<Spur, DeriveInfo>,
117 /// Pending `@derive(D)` bindings on named struct/enum declarations
118 /// (ADR-0058). Populated during directive resolution; consumed by the
119 /// derive-expansion sub-phase.
120 pub(crate) derive_bindings: Vec<DeriveBinding>,
121 /// Errors raised during anonymous-host derive expansion (ADR-0058).
122 /// The comptime interpreter returns `Option<...>` so it cannot
123 /// propagate these via `?`; we buffer them here and surface after
124 /// analysis so users still see actionable diagnostics for an
125 /// `@derive(...)` error on an anonymous struct/enum.
126 pub(crate) pending_anon_derive_errors: Vec<gruel_util::CompileError>,
127 /// Validation errors raised while evaluating an anonymous struct/enum
128 /// type literal at comptime (empty body, duplicate method names).
129 /// Buffered for the same `Option<...>` reason as `pending_anon_derive_errors`.
130 /// `evaluate_type_ctor_body` drains the entries it caused so the call site
131 /// surfaces the specific error instead of a generic "comptime evaluation
132 /// failed"; any leftover entries are surfaced by `analyze_all` at the end.
133 pub(crate) pending_anon_eval_errors: Vec<gruel_util::CompileError>,
134 /// Constant table: maps const name symbol to const info
135 pub(crate) constants: HashMap<Spur, ConstInfo>,
136 /// Enabled preview features
137 pub(crate) preview_features: PreviewFeatures,
138 /// StructId of the synthetic String type.
139 pub(crate) builtin_string_id: Option<StructId>,
140 /// EnumId of the synthetic Arch enum (for @target_arch intrinsic).
141 pub(crate) builtin_arch_id: Option<EnumId>,
142 /// EnumId of the synthetic Os enum (for @target_os intrinsic).
143 pub(crate) builtin_os_id: Option<EnumId>,
144 /// EnumId of the synthetic TypeKind enum (for @type_info intrinsic).
145 pub(crate) builtin_typekind_id: Option<EnumId>,
146 /// EnumId of the synthetic Ownership enum (for @ownership intrinsic).
147 pub(crate) builtin_ownership_id: Option<EnumId>,
148 /// EnumId of the prelude `ThreadSafety` enum (ADR-0084), used by
149 /// the `@thread_safety` intrinsic to materialize a value of the
150 /// classification ladder.
151 pub(crate) builtin_thread_safety_id: Option<EnumId>,
152 /// EnumId of the prelude `Ordering` enum (ADR-0078 Phase 4: target of
153 /// `Ord::cmp`; analyzed at every `<`/`<=`/`>`/`>=` desugaring on a
154 /// type that conforms to `Ord`).
155 pub(crate) builtin_ordering_id: Option<EnumId>,
156 /// ADR-0079: lang-item registry. Populated from `@lang("…")`
157 /// directives on prelude declarations; the compiler keys
158 /// drop/copy/clone/handle/Eq/Ord/Ordering behaviors off these IDs
159 /// instead of the historical name-string match.
160 pub(crate) lang_items: LangItems,
161 /// Pre-interned known symbols for fast comparison.
162 pub(crate) known: KnownSymbols,
163 /// Type intern pool for unified type representation (ADR-0024 Phase 1).
164 pub(crate) type_pool: TypeInternPool,
165 /// Module registry for tracking imported modules (Phase 1 modules).
166 pub(crate) module_registry: crate::sema_context::ModuleRegistry,
167 /// Maps FileId to source file paths (for module resolution).
168 pub(crate) file_paths: HashMap<FileId, String>,
169 /// Arena storage for function/method parameter data.
170 pub(crate) param_arena: ParamArena,
171 /// Inline destructor bodies keyed by struct id (ADR-0053).
172 ///
173 /// Populated when a struct body contains `fn __drop(self)`. The analysis pass
174 /// looks these up to run `analyze_destructor_function` against the method
175 /// body.
176 pub(crate) inline_struct_drops: HashMap<StructId, (gruel_rir::InstRef, gruel_util::Span)>,
177 /// Inline destructor bodies keyed by enum id (ADR-0053 phase 3b).
178 /// Same contract as `inline_struct_drops` but for enums.
179 pub(crate) inline_enum_drops: HashMap<EnumId, (gruel_rir::InstRef, gruel_util::Span)>,
180 /// Method signatures for anonymous structs, used for structural equality comparison.
181 pub(crate) anon_struct_method_sigs: HashMap<StructId, Vec<AnonMethodSig>>,
182 /// Captured comptime values for anonymous structs.
183 /// When an anonymous struct with methods is created inside a comptime function,
184 /// the comptime parameter values (e.g., N=42 in FixedBuffer(comptime N: i32)) are
185 /// stored here, keyed by StructId. These values become part of type identity:
186 /// FixedBuffer(42) and FixedBuffer(100) are different types.
187 pub(crate) anon_struct_captured_values: HashMap<StructId, HashMap<Spur, ConstValue>>,
188 /// ADR-0082: captured comptime *type* substitutions for anonymous
189 /// structs created from a parameterized comptime function (e.g.
190 /// `pub fn Vec(comptime T: type) -> type { struct { ... } }`). Stores
191 /// `T → I32` per `Vec(I32)` instance. Looked up when analyzing method
192 /// bodies so type names that reference the outer fn's comptime params
193 /// resolve at body-analysis time. Parallels `anon_struct_captured_values`
194 /// for type substitutions instead of value substitutions.
195 pub(crate) anon_struct_type_subst: HashMap<StructId, HashMap<Spur, Type>>,
196 /// Method signatures for anonymous enums, used for structural equality comparison.
197 pub(crate) anon_enum_method_sigs: HashMap<EnumId, Vec<AnonMethodSig>>,
198 /// Captured comptime values for anonymous enums (same semantics as anonymous structs).
199 pub(crate) anon_enum_captured_values: HashMap<EnumId, HashMap<Spur, ConstValue>>,
200 /// ADR-0082: captured comptime *type* substitutions for anonymous
201 /// enums (parallel to `anon_struct_type_subst`).
202 pub(crate) anon_enum_type_subst: HashMap<EnumId, HashMap<Spur, Type>>,
203 /// ADR-0082: registry of `StructId`s produced by instantiating the
204 /// `@lang("vec")` function for some element type `T`. Maps the
205 /// instance struct's `StructId` to the element type. Populated when
206 /// `Vec(T)` is evaluated in type position; consulted by the
207 /// `as_vec_instance` helper used by indexing, slice borrow, drop
208 /// synthesis, and method dispatch.
209 pub(crate) vec_instance_registry: HashMap<StructId, Type>,
210 /// ADR-0082: the prelude `@lang(...)` function whose body is
211 /// currently being evaluated by the comptime interpreter. Set
212 /// transiently by callers of `try_evaluate_const_with_subst` /
213 /// `evaluate_type_ctor_body` so the anon-struct evaluation path
214 /// can detect "this struct is a Vec instance" and populate
215 /// `vec_instance_registry`. `None` outside such evaluations.
216 pub(crate) comptime_ctor_fn: Option<Spur>,
217 /// Loop iteration counter for the current comptime block evaluation.
218 /// Reset to 0 at the start of each `evaluate_comptime_block` call.
219 /// Incremented once per loop iteration; triggers an error when it exceeds
220 /// `COMPTIME_MAX_STEPS` to prevent infinite loops at compile time.
221 pub(crate) comptime_steps_used: u64,
222 /// Pending return value for the comptime interpreter.
223 /// Set by `Ret` instructions inside comptime function bodies; consumed
224 /// immediately by the enclosing `Call` handler in `evaluate_comptime_inst`.
225 pub(crate) comptime_return_value: Option<ConstValue>,
226 /// Current call stack depth in the comptime interpreter.
227 /// Incremented on each comptime `Call`, decremented on return.
228 /// Triggers an error if it exceeds `COMPTIME_CALL_DEPTH_LIMIT`.
229 pub(crate) comptime_call_depth: u32,
230 /// Comptime heap: stores composite values (structs, arrays) created during
231 /// comptime evaluation. `ConstValue::Struct(idx)` and `ConstValue::Array(idx)`
232 /// index into this vec. Cleared at the start of each `evaluate_comptime_block`.
233 pub(crate) comptime_heap: Vec<ComptimeHeapItem>,
234 /// Type overrides for the comptime interpreter during generic function calls.
235 /// When a comptime generic call is executing, type parameters are stored here
236 /// so that enum/struct resolution can find them. Checked before `ctx.comptime_type_vars`.
237 pub(crate) comptime_type_overrides: HashMap<Spur, Type>,
238 /// Buffer for `@dbg` output collected during comptime evaluation.
239 /// Each entry is one formatted line (without trailing newline), matching
240 /// the format of the runtime `__gruel_dbg_*` functions.
241 pub(crate) comptime_dbg_output: Vec<String>,
242 /// Pending warnings for comptime `@dbg` calls. Each entry is (message, span).
243 pub(crate) comptime_log_output: Vec<(String, gruel_util::Span)>,
244 /// When true, comptime `@dbg` does not print to stderr on-the-fly. The output
245 /// is still appended to `comptime_dbg_output` and a warning is still emitted.
246 /// Set by the `--capture-comptime-dbg` CLI flag (used by the fuzzer).
247 pub(crate) suppress_comptime_dbg_print: bool,
248 /// ADR-0076: the in-scope `Self` type, set whenever we resolve types
249 /// inside a struct/enum body (its methods or its inline destructor),
250 /// inside a `derive` splice into a host type, or inside the body of a
251 /// method that was synthesized for a comptime-built anonymous type.
252 /// Consumed by `resolve_type` / `resolve_type_for_comptime_with_subst`
253 /// to substitute the literal symbol `Self` at any depth in a type
254 /// expression. `None` means `Self` is not in scope; using it is an
255 /// error.
256 pub(crate) current_self: Option<Type>,
257 /// The compilation target. Read by `@target_arch()` / `@target_os()`
258 /// so conditional code reflects the *compile* target, not the host.
259 /// Defaults to the host target; the driver overrides via
260 /// [`Sema::set_target`] when a different `--target` is requested.
261 pub(crate) target: gruel_target::Target,
262 /// ADR-0083: names of struct/enum declarations that carry
263 /// `@mark(affine)`. Affine is a Copy *suppressor*: a type whose
264 /// members would otherwise infer Copy is forced to remain Affine.
265 /// Tracked here as a side set because `StructDef.posture =
266 /// Posture::Affine` is the same shape "no declaration" produces.
267 pub(crate) mark_affine_decls: HashSet<Spur>,
268
269 /// ADR-0084: names of struct/enum declarations that carry one of
270 /// the thread-safety override markers (`@mark(unsend)` /
271 /// `@mark(checked_send)` / `@mark(checked_sync)`). Tracked as a
272 /// side map so `validate_consistency` can apply the override after
273 /// computing the structural minimum, mirroring the
274 /// `mark_affine_decls` carve-out for `@mark(affine)`.
275 pub(crate) mark_thread_safety_decls: rustc_hash::FxHashMap<Spur, gruel_builtins::ThreadSafety>,
276}
277
278impl<'a> Sema<'a> {
279 /// Create a new semantic analyzer.
280 pub fn new(
281 rir: &'a Rir,
282 interner: &'a ThreadedRodeo,
283 preview_features: PreviewFeatures,
284 ) -> Self {
285 let type_pool = TypeInternPool::new();
286 Self {
287 rir,
288 interner,
289 functions: HashMap::default(),
290 structs: HashMap::default(),
291 enums: HashMap::default(),
292 interfaces: HashMap::default(),
293 interface_defs: Vec::new(),
294 comptime_interface_bounds: HashMap::default(),
295 interface_vtables_needed: HashMap::default(),
296 methods: HashMap::default(),
297 enum_methods: HashMap::default(),
298 derives: HashMap::default(),
299 derive_bindings: Vec::new(),
300 pending_anon_derive_errors: Vec::new(),
301 pending_anon_eval_errors: Vec::new(),
302 constants: HashMap::default(),
303 preview_features,
304 builtin_string_id: None,
305 builtin_arch_id: None,
306 builtin_os_id: None,
307 builtin_typekind_id: None,
308 builtin_ownership_id: None,
309 builtin_thread_safety_id: None,
310 builtin_ordering_id: None,
311 lang_items: LangItems::default(),
312 known: KnownSymbols::new(interner),
313 type_pool,
314 module_registry: crate::sema_context::ModuleRegistry::new(),
315 file_paths: HashMap::default(),
316 param_arena: ParamArena::new(),
317 inline_struct_drops: HashMap::default(),
318 inline_enum_drops: HashMap::default(),
319 anon_struct_method_sigs: HashMap::default(),
320 anon_struct_captured_values: HashMap::default(),
321 anon_struct_type_subst: HashMap::default(),
322 anon_enum_method_sigs: HashMap::default(),
323 anon_enum_captured_values: HashMap::default(),
324 anon_enum_type_subst: HashMap::default(),
325 vec_instance_registry: HashMap::default(),
326 comptime_ctor_fn: None,
327 comptime_steps_used: 0,
328 comptime_return_value: None,
329 comptime_call_depth: 0,
330 comptime_heap: Vec::new(),
331 comptime_type_overrides: HashMap::default(),
332 comptime_dbg_output: Vec::new(),
333 comptime_log_output: Vec::new(),
334 suppress_comptime_dbg_print: false,
335 current_self: None,
336 target: gruel_target::Target::host(),
337 mark_affine_decls: HashSet::default(),
338 mark_thread_safety_decls: rustc_hash::FxHashMap::default(),
339 }
340 }
341
342 /// Override the compile target read by `@target_arch()` and
343 /// `@target_os()`. Defaults to [`gruel_target::Target::host()`].
344 pub fn set_target(&mut self, target: gruel_target::Target) {
345 self.target = target;
346 }
347
348 /// Configure whether comptime `@dbg` prints to stderr on-the-fly.
349 /// When suppressed, output is still buffered into `comptime_dbg_output`
350 /// and warnings are still emitted.
351 pub fn set_suppress_comptime_dbg_print(&mut self, suppress: bool) {
352 self.suppress_comptime_dbg_print = suppress;
353 }
354
355 /// Perform semantic analysis on the RIR.
356 ///
357 /// This is the main entry point for semantic analysis. It returns analyzed
358 /// functions, struct definitions, enum definitions, and any warnings.
359 pub fn analyze_all(mut self) -> MultiErrorResult<SemaOutput> {
360 // Phase 0: Inject built-in types (String, etc.) before user code
361 self.inject_builtin_types();
362
363 // Phase 1: Register type names
364 // Phase 2: Resolve all declarations (this also validates interface
365 // declarations between struct/enum field resolution and function
366 // gathering — ADR-0056).
367 self.register_type_names().map_err(CompileErrors::from)?;
368 self.resolve_declarations().map_err(CompileErrors::from)?;
369
370 // ADR-0078 Phase 3: cache EnumIds for the prelude-resident builtin
371 // enums (Arch, Os, TypeKind, Ownership) now that the prelude has
372 // been resolved. Intrinsics that produce values of these types
373 // read `builtin_arch_id` etc. directly.
374 self.cache_builtin_enum_ids();
375
376 // Phase 2.5: Evaluate const initializers (e.g., const x = @import(...))
377 self.evaluate_const_initializers()
378 .map_err(CompileErrors::from)?;
379
380 // Delegate to the analysis module for function body analysis
381 analysis::analyze_all_function_bodies(self)
382 }
383
384 /// Analyze all function bodies, assuming declarations are already collected.
385 pub fn analyze_all_bodies(self) -> MultiErrorResult<SemaOutput> {
386 analysis::analyze_all_function_bodies(self)
387 }
388}
389
390#[cfg(test)]
391mod consistency_tests;
392#[cfg(test)]
393mod tests;