Skip to main content

gruel_compiler/
unit.rs

1//! Unified compilation unit that owns all compilation artifacts.
2//!
3//! The [`CompilationUnit`] provides a single source of truth for all compilation state,
4//! from source files through to machine code. It enforces phase ordering through the
5//! type system - you can't access AIR without first running semantic analysis.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use gruel_compiler::{CompilationUnit, SourceFile, CompileOptions};
11//! use gruel_util::FileId;
12//!
13//! // Create source files
14//! let sources = vec![
15//!     SourceFile::new("main.gruel", "fn main() -> i32 { 42 }", FileId::new(1)),
16//! ];
17//!
18//! // Create compilation unit and run phases
19//! let mut unit = CompilationUnit::new(sources, CompileOptions::default())?;
20//! unit.parse()?;
21//! unit.analyze()?;
22//! let output = unit.compile()?;
23//! ```
24
25use rustc_hash::FxHashMap as HashMap;
26
27use lasso::ThreadedRodeo;
28use rayon::prelude::*;
29use tracing::{info, info_span};
30
31/// ADR-0085 + ADR-0086: collect the deduplicated, lex-sorted set of
32/// library names declared via `link_extern("…")` / `static_link_extern("…")`
33/// blocks. Each contributes one `-l<name>` flag at the linker step.
34/// ADR-0086: the per-library `LinkMode` controls whether the linker
35/// gets `-Wl,-Bstatic` bracketing (ELF) or `-Wl,-search_paths_first`
36/// (Mach-O) around the `-l<name>`.
37///
38/// ADR-0087: `link_extern("gruel_runtime")` is the prelude's way of
39/// declaring extern fn signatures against the Rust runtime archive.
40/// The archive itself is linked by absolute path (see
41/// `link_system_with_warnings`), so emitting `-lgruel_runtime` would
42/// cause the linker to search for a separate copy and fail. Skip
43/// the name from the library-set; the source-level declaration is
44/// still load-bearing for sema's binding resolution.
45fn collect_extern_link_libraries(
46    rir: &gruel_rir::Rir,
47    interner: &ThreadedRodeo,
48) -> Vec<(String, crate::link::LinkMode)> {
49    use crate::link::LinkMode;
50    use gruel_rir::inst::RirLinkMode;
51    let mut libs: std::collections::BTreeMap<String, LinkMode> = std::collections::BTreeMap::new();
52    let convert = |m: RirLinkMode| match m {
53        RirLinkMode::Dynamic => LinkMode::Dynamic,
54        RirLinkMode::Static => LinkMode::Static,
55    };
56    let is_skipped = |name: &str| name == "gruel_runtime";
57    for ext in rir.extern_fns() {
58        let lib = interner.resolve(&ext.library).to_string();
59        if is_skipped(&lib) {
60            continue;
61        }
62        libs.insert(lib, convert(ext.link_mode));
63    }
64    for (lib, link_mode, _) in rir.empty_link_extern_blocks() {
65        let name = interner.resolve(lib).to_string();
66        if is_skipped(&name) {
67            continue;
68        }
69        libs.insert(name, convert(*link_mode));
70    }
71    libs.into_iter().collect()
72}
73
74use crate::{
75    AnalyzedFunction, Ast, AstGen, Cfg, CfgBuilder, CompileError, CompileErrors, CompileOptions,
76    CompileOutput, CompileWarning, ErrorKind, FunctionWithCfg, Lexer, MultiErrorResult, OptLevel,
77    Parser, Rir, Sema, SourceFile, Type, TypeInternPool, compile_backend,
78};
79use gruel_cache::{CacheKey, CacheStore, Hasher, compiler_fingerprint};
80use gruel_util::FileId;
81
82fn opt_level_to_u32(o: OptLevel) -> u32 {
83    match o {
84        OptLevel::O0 => 0,
85        OptLevel::O1 => 1,
86        OptLevel::O2 => 2,
87        OptLevel::O3 => 3,
88    }
89}
90
91/// Run sema on the merged RIR. Extracted so the AIR-cache miss path
92/// (ADR-0074 Phase 4) can call it in two places without duplicating
93/// the boilerplate.
94fn run_sema(
95    rir: &Rir,
96    interner: &ThreadedRodeo,
97    options: &CompileOptions,
98    file_paths: HashMap<FileId, String>,
99) -> MultiErrorResult<gruel_air::SemaOutput> {
100    let _span = info_span!("sema").entered();
101    let mut sema = Sema::new(rir, interner, options.preview_features.clone());
102    sema.set_file_paths(file_paths);
103    sema.set_suppress_comptime_dbg_print(options.capture_comptime_dbg);
104    sema.set_target(options.target.clone());
105    let output = sema.analyze_all()?;
106    info!(
107        function_count = output.functions.len(),
108        struct_count = output.type_pool.stats().struct_count,
109        "semantic analysis complete"
110    );
111    Ok(output)
112}
113
114/// Clone a slice of analyzed functions for the AIR cache write path.
115fn clone_functions(fns: &[gruel_air::AnalyzedFunction]) -> Vec<gruel_air::AnalyzedFunction> {
116    fns.to_vec()
117}
118
119/// Snapshot a TypeInternPool for the AIR cache write path.
120fn clone_type_pool(pool: &gruel_air::TypeInternPool) -> gruel_air::TypeInternPool {
121    pool.clone_snapshot()
122}
123
124// ADR-0078: the prelude content has moved to `prelude/*.gruel`. The
125// concatenated source is built at parse time via
126// `prelude_source::assemble_prelude_source`, which prefers the on-disk files
127// but falls back to embedded copies (`include_str!`). The virtual prelude
128// source still parses under `FileId::PRELUDE`, so all downstream code
129// (visibility carve-outs, span paths) is unchanged.
130
131/// Result of parsing a single file within a compilation unit.
132#[derive(Debug)]
133struct ParsedFileData {
134    /// Path to the source file.
135    path: String,
136    /// The parsed abstract syntax tree.
137    ast: Ast,
138}
139
140/// A unified compilation unit that owns all artifacts from source to machine code.
141///
142/// The compilation unit progresses through phases:
143/// 1. **New**: Just source files
144/// 2. **Parsed**: ASTs and interner from parsing
145/// 3. **Lowered**: RIR (untyped intermediate representation)
146/// 4. **Analyzed**: AIR (typed IR) and CFGs for all functions
147///
148/// Each phase builds on the previous one. The unit validates that phases
149/// are run in order - you can't analyze before parsing.
150///
151/// # Thread Safety
152///
153/// The compilation unit uses [`ThreadedRodeo`] for string interning, which is
154/// thread-safe. Parallel operations (like per-function CFG construction) can
155/// safely share the interner.
156#[derive(Debug)]
157pub struct CompilationUnit<'src> {
158    // === Configuration ===
159    /// Compilation options (target, optimization level, etc.)
160    options: CompileOptions,
161
162    // === Source ===
163    /// Source files being compiled.
164    sources: Vec<SourceFile<'src>>,
165
166    // === Phase 1: Parsing ===
167    /// Parsed ASTs for each file (populated by `parse()`).
168    parsed_files: Option<Vec<ParsedFileData>>,
169    /// Merged AST containing all items (populated by `parse()`).
170    merged_ast: Option<Ast>,
171    /// String interner shared across all files.
172    interner: Option<ThreadedRodeo>,
173    /// Maps FileId to source file path (for error messages).
174    file_paths: HashMap<FileId, String>,
175
176    // === Phase 2: RIR Generation ===
177    /// Untyped intermediate representation (populated by `lower()`).
178    rir: Option<Rir>,
179
180    // === Phase 3: Semantic Analysis + CFG ===
181    /// Analyzed functions with typed IR and control flow graphs.
182    functions: Option<Vec<FunctionWithCfg>>,
183    /// Type intern pool containing all struct and enum definitions.
184    type_pool: Option<TypeInternPool>,
185    /// String literals indexed by their string_const index.
186    strings: Option<Vec<String>>,
187    /// Byte-blob literals from `@embed_file`, indexed by bytes_const index.
188    bytes: Option<Vec<Vec<u8>>>,
189    /// Warnings collected during compilation.
190    warnings: Vec<CompileWarning>,
191    /// Interface definitions (ADR-0056), indexed by InterfaceId.0.
192    interface_defs: Option<Vec<gruel_air::InterfaceDef>>,
193    /// (StructId, InterfaceId) → conformance witness; codegen uses this to
194    /// emit one vtable global per pair.
195    interface_vtables: Option<gruel_air::InterfaceVtables>,
196}
197
198impl<'src> CompilationUnit<'src> {
199    /// Create a new compilation unit from source files.
200    ///
201    /// This initializes the unit with source files but does not run any
202    /// compilation phases. Call [`parse()`](Self::parse), [`lower()`](Self::lower),
203    /// and [`analyze()`](Self::analyze) to progress through compilation.
204    ///
205    /// # Arguments
206    ///
207    /// * `sources` - Source files to compile
208    /// * `options` - Compilation options (target, optimization, etc.)
209    pub fn new(sources: Vec<SourceFile<'src>>, options: CompileOptions) -> Self {
210        let file_paths: HashMap<FileId, String> = sources
211            .iter()
212            .map(|s| (s.file_id, s.path.to_string()))
213            .collect();
214
215        Self {
216            options,
217            sources,
218            parsed_files: None,
219            merged_ast: None,
220            interner: None,
221            file_paths,
222            rir: None,
223            functions: None,
224            type_pool: None,
225            strings: None,
226            bytes: None,
227            warnings: Vec::new(),
228            interface_defs: None,
229            interface_vtables: None,
230        }
231    }
232
233    // =========================================================================
234    // Phase 1: Parsing
235    // =========================================================================
236
237    /// Open the AIR cache for whole-program AIR caching (ADR-0074
238    /// Phase 4). Returns `None` when the preview gate is off, the
239    /// cache_dir is not configured, or the underlying machinery
240    /// (CacheStore / compiler_fingerprint) fails. The cache key
241    /// concatenates every source file's content so any change to any
242    /// file invalidates the whole-program AIR — the per-file
243    /// granularity from the ADR design needs sema to run per-file,
244    /// which is its own refactor.
245    fn open_air_cache(&self) -> Option<(CacheStore, CacheKey)> {
246        let (store, build_fp) = self.open_parse_cache()?;
247        let mut h = Hasher::new();
248        h.update(build_fp.as_bytes());
249        for source in &self.sources {
250            h.update_str(source.path);
251            h.update_str(source.source);
252        }
253        Some((store, h.finalize()))
254    }
255
256    /// Open the parse cache when `cache_dir` is configured. Returns
257    /// `None` if no cache directory is set (e.g. `--no-cache`), or if
258    /// opening the store / hashing the compiler binary fails (in which
259    /// case the build silently continues uncached — correctness is
260    /// preserved).
261    fn open_parse_cache(&self) -> Option<(CacheStore, CacheKey)> {
262        let dir = self.options.cache_dir.as_ref()?;
263
264        let store = match CacheStore::open(dir) {
265            Ok(s) => s,
266            Err(e) => {
267                tracing::warn!(error = %e, dir = %dir.display(), "failed to open cache");
268                return None;
269            }
270        };
271
272        // Compose the compilation fingerprint from compiler-binary hash
273        // + target + opt level + sorted preview features.
274        let bin_path = match gruel_cache::current_binary_path() {
275            Ok(p) => p,
276            Err(e) => {
277                tracing::warn!(error = %e, "could not resolve current binary path");
278                return None;
279            }
280        };
281        let memo_dir = std::env::var_os("HOME")
282            .map(std::path::PathBuf::from)
283            .map(|h| h.join(".cache").join("gruel").join("binary-hash"))
284            .unwrap_or_else(|| std::env::temp_dir().join("gruel-binary-hash"));
285        let compiler_fp = match compiler_fingerprint(&bin_path, &memo_dir) {
286            Ok(fp) => fp,
287            Err(e) => {
288                tracing::warn!(error = %e, "compiler_fingerprint failed");
289                return None;
290            }
291        };
292
293        let mut h = Hasher::new();
294        h.update(compiler_fp.as_bytes());
295        h.update_str(&format!("{}", self.options.target));
296        h.update_u32(opt_level_to_u32(self.options.opt_level));
297        let mut feats: Vec<&'static str> = self
298            .options
299            .preview_features
300            .iter()
301            .map(|f| f.name())
302            .collect();
303        feats.sort_unstable();
304        for f in feats {
305            h.update_str(f);
306        }
307        let build_fp = h.finalize();
308        Some((store, build_fp))
309    }
310
311    /// Parse all source files.
312    ///
313    /// This runs lexing and parsing on each source file, producing ASTs.
314    /// The ASTs are then merged into a single program, detecting any
315    /// duplicate symbol definitions.
316    ///
317    /// # Errors
318    ///
319    /// Returns errors if:
320    /// - Any file fails to lex or parse
321    /// - Duplicate function, struct, or enum definitions are found
322    pub fn parse(&mut self) -> MultiErrorResult<()> {
323        let _span = info_span!("parse", file_count = self.sources.len()).entered();
324
325        // Parse all files with a shared interner
326        let mut parsed_files = Vec::with_capacity(self.sources.len() + 1);
327        let mut interner = ThreadedRodeo::new();
328
329        // ADR-0065 / ADR-0078: load the prelude as an implicitly-imported
330        // module rooted at `std/_prelude.gruel`. The root is the only file
331        // the compiler hand-loads — it uses `@import` internally to pull
332        // in `prelude/*.gruel` submodules and re-exports their pub
333        // items. Submodules are pre-staged in `file_paths` (and pre-parsed
334        // when their disk copy isn't available) so the @import resolver
335        // finds them whether the host has an on-disk stdlib or not. Only
336        // the root's pub items become globally available; submodule items
337        // accessed only through the root's `pub const` re-exports.
338        let resolved = crate::prelude_source::resolved_prelude();
339        let mut prelude_file_id = FileId::PRELUDE.index();
340
341        // Stage prelude submodules: register their paths in `file_paths`
342        // so @import resolution finds them, and pre-parse them so they
343        // don't need to hit the disk. `other_std_files` (`_std.gruel`,
344        // `math.gruel`, etc.) are NOT staged here — they get loaded only
345        // when user code explicitly `@import("std")`s them, via the
346        // existing on-disk module resolver.
347        for file in &resolved.prelude_dir {
348            let file_id = FileId::new(prelude_file_id);
349            self.file_paths.insert(file_id, file.path.clone());
350            let lexer = Lexer::with_interner_and_file_id(&file.source, interner, file_id);
351            let (tokens, returned_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
352            interner = returned_interner;
353            let parser = Parser::new(tokens, interner)
354                .with_preview_features(self.options.preview_features.clone())
355                .with_source(&*file.source);
356            let (ast, returned_interner) = parser.parse()?;
357            interner = returned_interner;
358            parsed_files.push(ParsedFileData {
359                path: file.path.clone(),
360                ast,
361            });
362            prelude_file_id = prelude_file_id.wrapping_sub(1);
363        }
364
365        // Parse the prelude root itself (`std/_prelude.gruel`).
366        let root_file_id = FileId::new(prelude_file_id);
367        self.file_paths
368            .insert(root_file_id, resolved.root.path.clone());
369        let root_lexer =
370            Lexer::with_interner_and_file_id(&resolved.root.source, interner, root_file_id);
371        let (root_tokens, returned_interner) =
372            root_lexer.tokenize().map_err(CompileErrors::from)?;
373        interner = returned_interner;
374        let root_parser = Parser::new(root_tokens, interner)
375            .with_preview_features(self.options.preview_features.clone())
376            .with_source(&*resolved.root.source);
377        let (root_ast, returned_interner) = root_parser.parse()?;
378        interner = returned_interner;
379        parsed_files.push(ParsedFileData {
380            path: resolved.root.path,
381            ast: root_ast,
382        });
383
384        // ADR-0074: when a cache_dir is configured, route user-file
385        // parsing through the on-disk cache. The prelude (above) is
386        // always parsed uncached because its source is a constant in
387        // the binary; its Spurs are already in `interner`, which the
388        // cache wiring then reuses as the build-shared interner.
389        let cache_handle = self.open_parse_cache();
390        if let Some((store, build_fp)) = cache_handle {
391            let (cached_files, stats) = crate::parse_cache::parse_files_into(
392                &interner,
393                &self.sources,
394                &self.options.preview_features,
395                &store,
396                &build_fp,
397            )?;
398            info!(
399                hits = stats.hits,
400                misses = stats.misses,
401                files = self.sources.len(),
402                "parse cache complete"
403            );
404            for file in cached_files {
405                parsed_files.push(ParsedFileData {
406                    path: file.path,
407                    ast: file.ast,
408                });
409            }
410        } else {
411            for source in &self.sources {
412                let _file_span = info_span!("parse_file", path = %source.path).entered();
413
414                // Create lexer with shared interner and file ID
415                let lexer =
416                    Lexer::with_interner_and_file_id(source.source, interner, source.file_id);
417
418                // Tokenize
419                let (tokens, returned_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
420                interner = returned_interner;
421
422                info!(token_count = tokens.len(), "lexing complete");
423
424                // Parse
425                let parser = Parser::new(tokens, interner)
426                    .with_preview_features(self.options.preview_features.clone())
427                    .with_source(source.source);
428                let (ast, returned_interner) = parser.parse()?;
429                interner = returned_interner;
430
431                info!(item_count = ast.items.len(), "parsing complete");
432
433                parsed_files.push(ParsedFileData {
434                    path: source.path.to_string(),
435                    ast,
436                });
437            }
438        }
439
440        // Merge symbols and check for duplicates
441        let merged_ast = self.merge_symbols(&parsed_files, &interner)?;
442
443        self.parsed_files = Some(parsed_files);
444        self.merged_ast = Some(merged_ast);
445        self.interner = Some(interner);
446
447        Ok(())
448    }
449
450    /// Merge symbols from all parsed files, checking for duplicates.
451    fn merge_symbols(
452        &self,
453        files: &[ParsedFileData],
454        interner: &ThreadedRodeo,
455    ) -> MultiErrorResult<Ast> {
456        use crate::{Item, Span};
457
458        /// Information about a symbol definition for duplicate detection.
459        struct SymbolDef {
460            span: Span,
461            file_path: String,
462        }
463
464        let _span = info_span!("merge_symbols", file_count = files.len()).entered();
465
466        let mut functions: HashMap<String, SymbolDef> = HashMap::default();
467        let mut structs: HashMap<String, SymbolDef> = HashMap::default();
468        let mut enums: HashMap<String, SymbolDef> = HashMap::default();
469        let mut all_items = Vec::new();
470        let mut errors = Vec::new();
471
472        for file in files {
473            for item in &file.ast.items {
474                match item {
475                    Item::Function(func) => {
476                        let name = interner.resolve(&func.name.name).to_string();
477                        if let Some(first) = functions.get(&name) {
478                            errors.push(
479                                CompileError::new(
480                                    ErrorKind::DuplicateTypeDefinition {
481                                        type_name: format!("function `{}`", name),
482                                    },
483                                    func.span,
484                                )
485                                .with_label(
486                                    format!("first defined in {}", first.file_path),
487                                    first.span,
488                                ),
489                            );
490                        } else {
491                            functions.insert(
492                                name,
493                                SymbolDef {
494                                    span: func.span,
495                                    file_path: file.path.clone(),
496                                },
497                            );
498                        }
499                    }
500                    Item::Struct(s) => {
501                        let name = interner.resolve(&s.name.name).to_string();
502                        if let Some(first) = structs.get(&name) {
503                            errors.push(
504                                CompileError::new(
505                                    ErrorKind::DuplicateTypeDefinition {
506                                        type_name: format!("struct `{}`", name),
507                                    },
508                                    s.span,
509                                )
510                                .with_label(
511                                    format!("first defined in {}", first.file_path),
512                                    first.span,
513                                ),
514                            );
515                        } else if let Some(first) = enums.get(&name) {
516                            errors.push(
517                                CompileError::new(
518                                    ErrorKind::DuplicateTypeDefinition {
519                                        type_name: format!(
520                                            "struct `{}` (conflicts with enum)",
521                                            name
522                                        ),
523                                    },
524                                    s.span,
525                                )
526                                .with_label(
527                                    format!("enum first defined in {}", first.file_path),
528                                    first.span,
529                                ),
530                            );
531                        } else {
532                            structs.insert(
533                                name,
534                                SymbolDef {
535                                    span: s.span,
536                                    file_path: file.path.clone(),
537                                },
538                            );
539                        }
540                    }
541                    Item::Enum(e) => {
542                        let name = interner.resolve(&e.name.name).to_string();
543                        if let Some(first) = enums.get(&name) {
544                            errors.push(
545                                CompileError::new(
546                                    ErrorKind::DuplicateTypeDefinition {
547                                        type_name: format!("enum `{}`", name),
548                                    },
549                                    e.span,
550                                )
551                                .with_label(
552                                    format!("first defined in {}", first.file_path),
553                                    first.span,
554                                ),
555                            );
556                        } else if let Some(first) = structs.get(&name) {
557                            errors.push(
558                                CompileError::new(
559                                    ErrorKind::DuplicateTypeDefinition {
560                                        type_name: format!(
561                                            "enum `{}` (conflicts with struct)",
562                                            name
563                                        ),
564                                    },
565                                    e.span,
566                                )
567                                .with_label(
568                                    format!("struct first defined in {}", first.file_path),
569                                    first.span,
570                                ),
571                            );
572                        } else {
573                            enums.insert(
574                                name,
575                                SymbolDef {
576                                    span: e.span,
577                                    file_path: file.path.clone(),
578                                },
579                            );
580                        }
581                    }
582                    Item::Interface(_) => {
583                        // Interfaces (ADR-0056) are validated in Sema; cross-file
584                        // duplicate detection is added in Phase 2.
585                    }
586                    Item::Derive(_) => {
587                        // Derives (ADR-0058) are validated in Sema; cross-file
588                        // duplicate detection follows the interface model.
589                    }
590                    Item::Const(_) => {
591                        // Validated in Sema
592                    }
593                    Item::LinkExtern(_) => {
594                        // ADR-0085: extern fn declarations live on the
595                        // RIR side-table; cross-file duplicate detection
596                        // happens in sema.
597                    }
598                    Item::Error(_) => {
599                        // Error nodes from parser recovery are skipped
600                    }
601                }
602                all_items.push(item.clone());
603            }
604        }
605
606        if !errors.is_empty() {
607            return Err(CompileErrors::from(errors));
608        }
609
610        info!(
611            function_count = functions.len(),
612            struct_count = structs.len(),
613            enum_count = enums.len(),
614            "symbol merging complete"
615        );
616
617        Ok(Ast {
618            module_doc: None,
619            items: all_items,
620        })
621    }
622
623    // =========================================================================
624    // Phase 2: RIR Generation
625    // =========================================================================
626
627    /// Generate untyped intermediate representation (RIR).
628    ///
629    /// This transforms the merged AST into RIR, which is a more uniform
630    /// representation suitable for semantic analysis.
631    ///
632    /// # Panics
633    ///
634    /// Panics if called before [`parse()`](Self::parse).
635    pub fn lower(&mut self) -> MultiErrorResult<()> {
636        let ast = self
637            .merged_ast
638            .as_ref()
639            .expect("lower() called before parse()");
640        let interner = self.interner.as_ref().expect("interner not initialized");
641
642        let _span = info_span!("astgen").entered();
643
644        let astgen = AstGen::new(ast, interner);
645        let rir = astgen.generate();
646
647        info!(instruction_count = rir.len(), "RIR generation complete");
648
649        self.rir = Some(rir);
650        Ok(())
651    }
652
653    // =========================================================================
654    // Phase 3: Semantic Analysis + CFG Construction
655    // =========================================================================
656
657    /// Perform semantic analysis and build control flow graphs.
658    ///
659    /// This runs type checking, symbol resolution, and other semantic checks,
660    /// then builds CFGs for each function. Optimizations are applied based
661    /// on the configured optimization level.
662    ///
663    /// # Panics
664    ///
665    /// Panics if called before [`lower()`](Self::lower).
666    pub fn analyze(&mut self) -> MultiErrorResult<()> {
667        let rir = self.rir.as_ref().expect("analyze() called before lower()");
668        let interner = self.interner.as_ref().expect("interner not initialized");
669
670        // ADR-0074 Phase 4: try the whole-program AIR cache first.
671        // Per-file AIR caching needs sema to run per-file (currently
672        // it runs program-wide on the merged AST). Whole-program is
673        // coarser but exercises the full cache pipeline end-to-end.
674        let air_cache_handle = self.open_air_cache();
675
676        // Semantic analysis
677        let sema_output = if let Some((store, key)) = &air_cache_handle {
678            match store.get(gruel_cache::CacheKind::Air, key) {
679                Ok(Some(bytes)) => match gruel_cache::CachedAirOutput::decode(&bytes) {
680                    Ok(cached) => {
681                        info!("air cache hit");
682                        // Restore the build's interner to the cached
683                        // state. This is sound because the AIR cache is
684                        // whole-program — every Spur in cached AIR was
685                        // produced against this snapshot. Replay
686                        // re-interns each cached string back into the
687                        // build's interner; for newly-empty interners
688                        // (typical at this point), this restores the
689                        // original Spur values.
690                        let _remap = cached.interner.restore_into(interner);
691                        // Replay comptime @dbg output to stderr so cache
692                        // hits are observably identical to cold builds
693                        // (ADR-0074 "Comptime side-effects replay").
694                        if !self.options.capture_comptime_dbg {
695                            for line in &cached.comptime_dbg_output {
696                                eprintln!("{}", line);
697                            }
698                        }
699                        // Note: warnings are not cached yet (DiagnosticWrapper
700                        // serde is its own follow-up); cache hits omit
701                        // them. Documented in ADR-0074.
702                        gruel_air::SemaOutput {
703                            functions: cached.functions,
704                            strings: cached.strings,
705                            bytes: cached.bytes,
706                            warnings: Vec::new(),
707                            type_pool: cached.type_pool,
708                            comptime_dbg_output: cached.comptime_dbg_output,
709                            interface_defs: cached.interface_defs,
710                            interface_vtables: cached.interface_vtables,
711                        }
712                    }
713                    Err(e) => {
714                        tracing::warn!(error = %e, "air cache decode failed; recomputing");
715                        run_sema(rir, interner, &self.options, self.file_paths.clone())?
716                    }
717                },
718                _ => {
719                    info!("air cache miss");
720                    let output = run_sema(rir, interner, &self.options, self.file_paths.clone())?;
721                    // Best-effort cache write; failure is not a build error.
722                    let cached = gruel_cache::CachedAirOutput {
723                        interner: gruel_cache::InternerSnapshot::capture(interner),
724                        functions: clone_functions(&output.functions),
725                        type_pool: clone_type_pool(&output.type_pool),
726                        strings: output.strings.clone(),
727                        bytes: output.bytes.clone(),
728                        interface_defs: output.interface_defs.clone(),
729                        interface_vtables: output.interface_vtables.clone(),
730                        comptime_dbg_output: output.comptime_dbg_output.clone(),
731                    };
732                    match cached.encode() {
733                        Ok(bytes) => {
734                            if let Err(e) = store.put(gruel_cache::CacheKind::Air, key, &bytes) {
735                                tracing::warn!(error = %e, "air cache write failed");
736                            }
737                        }
738                        Err(e) => tracing::warn!(error = %e, "air cache encode failed"),
739                    }
740                    output
741                }
742            }
743        } else {
744            run_sema(rir, interner, &self.options, self.file_paths.clone())?
745        };
746
747        // Synthesize drop glue functions
748        let drop_glue_functions =
749            crate::drop_glue::synthesize_drop_glue(&sema_output.type_pool, interner);
750        // ADR-0079: clone glue retired — prelude `derive Clone` emits
751        // the clone method via the standard derive-expansion path.
752
753        // Combine user functions with drop glue, filtering out comptime-only functions
754        let all_functions: Vec<_> = sema_output
755            .functions
756            .into_iter()
757            .filter(|f| f.air.return_type() != Type::COMPTIME_TYPE)
758            .chain(drop_glue_functions)
759            .collect();
760
761        // Build CFGs in parallel
762        let interner_ref = self.interner.as_ref().expect("interner not initialized");
763        let (functions, cfg_warnings) =
764            self.build_cfgs(all_functions, &sema_output.type_pool, interner_ref);
765
766        self.functions = Some(functions);
767        self.type_pool = Some(sema_output.type_pool);
768        self.strings = Some(sema_output.strings);
769        self.bytes = Some(sema_output.bytes);
770        self.warnings.extend(sema_output.warnings);
771        self.warnings.extend(cfg_warnings);
772        self.interface_defs = Some(sema_output.interface_defs);
773        self.interface_vtables = Some(sema_output.interface_vtables);
774
775        Ok(())
776    }
777
778    /// Build CFGs for all functions in parallel.
779    fn build_cfgs(
780        &self,
781        functions: Vec<AnalyzedFunction>,
782        type_pool: &TypeInternPool,
783        interner: &ThreadedRodeo,
784    ) -> (Vec<FunctionWithCfg>, Vec<CompileWarning>) {
785        let _span = info_span!("cfg_construction").entered();
786
787        let results: Vec<(FunctionWithCfg, Vec<CompileWarning>)> = functions
788            .into_par_iter()
789            .map(|func| {
790                let cfg_output = CfgBuilder::build(&func, type_pool, interner);
791
792                (
793                    FunctionWithCfg {
794                        analyzed: func,
795                        cfg: cfg_output.cfg,
796                    },
797                    cfg_output.warnings,
798                )
799            })
800            .collect();
801
802        let mut functions = Vec::with_capacity(results.len());
803        let mut warnings = Vec::new();
804        for (func, func_warnings) in results {
805            functions.push(func);
806            warnings.extend(func_warnings);
807        }
808
809        info!(
810            function_count = functions.len(),
811            "CFG construction complete"
812        );
813
814        (functions, warnings)
815    }
816
817    // =========================================================================
818    // Phase 4: Code Generation + Linking
819    // =========================================================================
820
821    /// Generate machine code and link into an executable.
822    ///
823    /// This is the final compilation phase. It generates machine code for
824    /// all functions and links them into an executable binary.
825    ///
826    /// # Panics
827    ///
828    /// Panics if called before [`analyze()`](Self::analyze).
829    pub fn compile(&self) -> MultiErrorResult<CompileOutput> {
830        let functions = self
831            .functions
832            .as_ref()
833            .expect("compile() called before analyze()");
834        let type_pool = self.type_pool.as_ref().expect("type_pool not available");
835        let strings = self.strings.as_ref().expect("strings not available");
836        let bytes = self.bytes.as_ref().expect("bytes not available");
837        let interner = self.interner.as_ref().expect("interner not available");
838
839        let empty_iface_defs: Vec<gruel_air::InterfaceDef> = Vec::new();
840        let empty_iface_vtables: gruel_air::InterfaceVtables = rustc_hash::FxHashMap::default();
841        let interface_defs = self.interface_defs.as_ref().unwrap_or(&empty_iface_defs);
842        let interface_vtables = self
843            .interface_vtables
844            .as_ref()
845            .unwrap_or(&empty_iface_vtables);
846        // ADR-0085: collect the deduplicated, lex-sorted set of library
847        // names declared by `link_extern("…")` blocks (including empty
848        // ones). Linker turns each into a `-l<name>` flag.
849        let rir = self.rir.as_ref().expect("compile() called before lower()");
850        let extra_link_libraries = collect_extern_link_libraries(rir, interner);
851
852        let inputs = crate::BackendInputs {
853            functions,
854            type_pool,
855            strings,
856            bytes,
857            interner,
858            interface_defs,
859            interface_vtables,
860            target: &self.options.target,
861            extra_link_libraries: &extra_link_libraries,
862        };
863
864        // ADR-0074 Phase 5: bitcode cache. If the AIR cache is configured
865        // and air_key matches, we may have cached pre-optimization LLVM
866        // bitcode that lets us skip the AIR→IR translation step. The
867        // LLVM optimizer + back-end + linker still run on every build.
868        if let Some((store, key)) = self.open_air_cache() {
869            return self.compile_with_bitcode_cache(&inputs, &store, &key);
870        }
871        compile_backend(&inputs, &self.options, &self.warnings)
872    }
873
874    /// Codegen path that consults the LLVM bitcode cache (ADR-0074
875    /// Phase 5). On hit, parses the cached bitcode and runs the
876    /// optimizer + back-end on it. On miss, generates bitcode, writes
877    /// it to the cache, then runs optimizer + back-end. Either way the
878    /// optimizer pipeline runs — the cache only saves the AIR→IR
879    /// translation step.
880    fn compile_with_bitcode_cache(
881        &self,
882        inputs: &crate::BackendInputs<'_>,
883        store: &CacheStore,
884        air_key: &CacheKey,
885    ) -> MultiErrorResult<CompileOutput> {
886        // Check for main function (matches compile_backend).
887        let _main_fn = inputs
888            .functions
889            .iter()
890            .find(|f| f.analyzed.name == "main")
891            .ok_or_else(|| {
892                CompileErrors::from(CompileError::without_span(ErrorKind::NoMainFunction))
893            })?;
894
895        // Bitcode cache key is the same as air_key — bitcode is a
896        // deterministic function of AIR, and cached AIR keys already
897        // factor in everything that influences codegen (target, opt
898        // level, preview features, source content, compiler binary).
899        let cfgs: Vec<&Cfg> = inputs.functions.iter().map(|f| &f.cfg).collect();
900        let codegen_inputs = inputs.to_codegen_inputs(&cfgs);
901
902        let object_bytes = match store.get(gruel_cache::CacheKind::LlvmIr, air_key) {
903            Ok(Some(bitcode)) => {
904                info!("bitcode cache hit");
905                gruel_codegen_llvm::compile_bitcode_to_object(
906                    &bitcode,
907                    self.options.opt_level,
908                    &self.options.target,
909                )
910                .map_err(CompileErrors::from)?
911            }
912            _ => {
913                info!("bitcode cache miss");
914                let bitcode = gruel_codegen_llvm::generate_bitcode(&codegen_inputs)
915                    .map_err(CompileErrors::from)?;
916                if let Err(e) = store.put(gruel_cache::CacheKind::LlvmIr, air_key, &bitcode) {
917                    tracing::warn!(error = %e, "bitcode cache write failed");
918                }
919                gruel_codegen_llvm::compile_bitcode_to_object(
920                    &bitcode,
921                    self.options.opt_level,
922                    &self.options.target,
923                )
924                .map_err(CompileErrors::from)?
925            }
926        };
927
928        // Reuse the same link tail as generate_llvm_objects_and_link.
929        let object_files = vec![object_bytes];
930        let linker_cmd = match &self.options.linker {
931            crate::LinkerMode::System(cmd) => cmd.clone(),
932            crate::LinkerMode::Internal => "cc".to_string(),
933        };
934        crate::link::link_system_with_warnings(
935            &self.options,
936            &object_files,
937            &linker_cmd,
938            &self.warnings,
939            inputs.extra_link_libraries,
940        )
941    }
942
943    // =========================================================================
944    // Convenience Methods
945    // =========================================================================
946
947    /// Run all frontend phases (parse, lower, analyze).
948    ///
949    /// This is a convenience method that runs the complete frontend pipeline.
950    /// Equivalent to calling `parse()`, `lower()`, and `analyze()` in sequence.
951    pub fn run_frontend(&mut self) -> MultiErrorResult<()> {
952        self.parse()?;
953        self.lower()?;
954        self.analyze()?;
955        Ok(())
956    }
957
958    /// Run all phases and produce a compiled binary.
959    ///
960    /// This is a convenience method that runs the complete compilation pipeline.
961    /// Equivalent to calling `run_frontend()` followed by `compile()`, with an
962    /// early `NoMainFunction` check after parsing so we surface the missing-main
963    /// error directly instead of letting sema's "analyze every top-level fn"
964    /// fallback churn through the prelude and report something unrelated first.
965    pub fn run_all(&mut self) -> MultiErrorResult<CompileOutput> {
966        self.parse()?;
967        self.require_user_main()?;
968        self.lower()?;
969        self.analyze()?;
970        self.compile()
971    }
972
973    /// Bail out with `NoMainFunction` if the merged AST has no `fn main` in any
974    /// user source file. Prelude files get auto-assigned `FileId`s during
975    /// `parse()` that don't appear in `self.sources`; restricting the search to
976    /// user-provided file IDs keeps the helper robust even if a future prelude
977    /// module ever defined an internal `main` helper.
978    fn require_user_main(&self) -> MultiErrorResult<()> {
979        use gruel_parser::ast::Item;
980        let ast = self
981            .merged_ast
982            .as_ref()
983            .expect("require_user_main called before parse()");
984        let interner = self.interner.as_ref().expect("interner not initialized");
985        let user_file_ids: rustc_hash::FxHashSet<u32> =
986            self.sources.iter().map(|s| s.file_id.index()).collect();
987        let has_user_main = ast.items.iter().any(|item| {
988            let Item::Function(f) = item else {
989                return false;
990            };
991            if !user_file_ids.contains(&f.name.span.file_id.index()) {
992                return false;
993            }
994            interner.resolve(&f.name.name) == "main"
995        });
996        if !has_user_main {
997            return Err(CompileErrors::from(CompileError::without_span(
998                ErrorKind::NoMainFunction,
999            )));
1000        }
1001        Ok(())
1002    }
1003
1004    /// Check if parsing has been completed.
1005    pub fn is_parsed(&self) -> bool {
1006        self.merged_ast.is_some()
1007    }
1008
1009    /// Check if RIR generation has been completed.
1010    pub fn is_lowered(&self) -> bool {
1011        self.rir.is_some()
1012    }
1013
1014    /// Check if semantic analysis has been completed.
1015    pub fn is_analyzed(&self) -> bool {
1016        self.functions.is_some()
1017    }
1018
1019    // =========================================================================
1020    // Accessors
1021    // =========================================================================
1022
1023    /// Get the compilation options.
1024    pub fn options(&self) -> &CompileOptions {
1025        &self.options
1026    }
1027
1028    /// Get the merged AST (after parsing).
1029    ///
1030    /// # Panics
1031    ///
1032    /// Panics if called before [`parse()`](Self::parse).
1033    pub fn ast(&self) -> &Ast {
1034        self.merged_ast
1035            .as_ref()
1036            .expect("ast() called before parse()")
1037    }
1038
1039    /// Get the string interner.
1040    ///
1041    /// # Panics
1042    ///
1043    /// Panics if called before [`parse()`](Self::parse).
1044    pub fn interner(&self) -> &ThreadedRodeo {
1045        self.interner
1046            .as_ref()
1047            .expect("interner() called before parse()")
1048    }
1049
1050    /// Get the RIR (after lowering).
1051    ///
1052    /// # Panics
1053    ///
1054    /// Panics if called before [`lower()`](Self::lower).
1055    pub fn rir(&self) -> &Rir {
1056        self.rir.as_ref().expect("rir() called before lower()")
1057    }
1058
1059    /// Get the analyzed functions with CFGs (after analysis).
1060    ///
1061    /// # Panics
1062    ///
1063    /// Panics if called before [`analyze()`](Self::analyze).
1064    pub fn functions(&self) -> &[FunctionWithCfg] {
1065        self.functions
1066            .as_ref()
1067            .expect("functions() called before analyze()")
1068    }
1069
1070    /// Get the type pool (after analysis).
1071    ///
1072    /// # Panics
1073    ///
1074    /// Panics if called before [`analyze()`](Self::analyze).
1075    pub fn type_pool(&self) -> &TypeInternPool {
1076        self.type_pool
1077            .as_ref()
1078            .expect("type_pool() called before analyze()")
1079    }
1080
1081    /// Get string literals (after analysis).
1082    ///
1083    /// # Panics
1084    ///
1085    /// Panics if called before [`analyze()`](Self::analyze).
1086    pub fn strings(&self) -> &[String] {
1087        self.strings
1088            .as_ref()
1089            .expect("strings() called before analyze()")
1090    }
1091
1092    /// Get all warnings collected during compilation.
1093    pub fn warnings(&self) -> &[CompileWarning] {
1094        &self.warnings
1095    }
1096
1097    /// Get the file paths map.
1098    pub fn file_paths(&self) -> &HashMap<FileId, String> {
1099        &self.file_paths
1100    }
1101
1102    /// Take the interner out of the compilation unit.
1103    ///
1104    /// This is useful when you need ownership of the interner (e.g., for
1105    /// code generation).
1106    ///
1107    /// # Panics
1108    ///
1109    /// Panics if called before [`parse()`](Self::parse) or if the interner
1110    /// has already been taken.
1111    pub fn take_interner(&mut self) -> ThreadedRodeo {
1112        self.interner
1113            .take()
1114            .expect("interner not available (not parsed or already taken)")
1115    }
1116
1117    /// Take the functions out of the compilation unit.
1118    ///
1119    /// # Panics
1120    ///
1121    /// Panics if called before [`analyze()`](Self::analyze) or if the
1122    /// functions have already been taken.
1123    pub fn take_functions(&mut self) -> Vec<FunctionWithCfg> {
1124        self.functions
1125            .take()
1126            .expect("functions not available (not analyzed or already taken)")
1127    }
1128
1129    /// Take the type pool out of the compilation unit.
1130    ///
1131    /// # Panics
1132    ///
1133    /// Panics if called before [`analyze()`](Self::analyze) or if the
1134    /// type pool has already been taken.
1135    pub fn take_type_pool(&mut self) -> TypeInternPool {
1136        self.type_pool
1137            .take()
1138            .expect("type_pool not available (not analyzed or already taken)")
1139    }
1140
1141    /// Take the strings out of the compilation unit.
1142    ///
1143    /// # Panics
1144    ///
1145    /// Panics if called before [`analyze()`](Self::analyze) or if the
1146    /// strings have already been taken.
1147    pub fn take_strings(&mut self) -> Vec<String> {
1148        self.strings
1149            .take()
1150            .expect("strings not available (not analyzed or already taken)")
1151    }
1152
1153    /// Take the warnings out of the compilation unit.
1154    pub fn take_warnings(&mut self) -> Vec<CompileWarning> {
1155        std::mem::take(&mut self.warnings)
1156    }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162    use crate::FileId;
1163
1164    fn make_sources(source: &str) -> Vec<SourceFile<'_>> {
1165        vec![SourceFile::new("<test>", source, FileId::new(1))]
1166    }
1167
1168    #[test]
1169    fn test_compilation_unit_basic() {
1170        let sources = make_sources("fn main() -> i32 { 42 }");
1171        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1172
1173        assert!(!unit.is_parsed());
1174        assert!(!unit.is_lowered());
1175        assert!(!unit.is_analyzed());
1176
1177        unit.run_frontend().unwrap();
1178
1179        assert!(unit.is_parsed());
1180        assert!(unit.is_lowered());
1181        assert!(unit.is_analyzed());
1182        // ADR-0071 added char__from_u32 / char__is_ascii / char__len_utf8 /
1183        // char__encode_utf8 plus Option/Result methods to the prelude; the
1184        // analysed function count includes those plus user-defined `main`.
1185        assert!(unit.functions().len() >= 1);
1186    }
1187
1188    #[test]
1189    fn test_phase_ordering() {
1190        let sources = make_sources("fn main() -> i32 { 42 }");
1191        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1192
1193        // Parse first
1194        unit.parse().unwrap();
1195        assert!(unit.is_parsed());
1196        assert!(!unit.is_lowered());
1197
1198        // Then lower
1199        unit.lower().unwrap();
1200        assert!(unit.is_lowered());
1201        assert!(!unit.is_analyzed());
1202
1203        // Then analyze
1204        unit.analyze().unwrap();
1205        assert!(unit.is_analyzed());
1206    }
1207
1208    #[test]
1209    fn test_duplicate_function_error() {
1210        let sources = vec![
1211            SourceFile::new("a.gruel", "fn foo() -> i32 { 1 }", FileId::new(1)),
1212            SourceFile::new("b.gruel", "fn foo() -> i32 { 2 }", FileId::new(2)),
1213        ];
1214        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1215
1216        let result = unit.parse();
1217        assert!(result.is_err());
1218        let err = result.unwrap_err();
1219        assert!(err.to_string().contains("function"));
1220    }
1221
1222    #[test]
1223    fn test_warnings_collected() {
1224        let sources = make_sources("fn main() -> i32 { let x = 42; 0 }");
1225        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1226        unit.run_frontend().unwrap();
1227
1228        assert_eq!(unit.warnings().len(), 1);
1229        assert!(unit.warnings()[0].to_string().contains("unused"));
1230    }
1231
1232    #[test]
1233    #[should_panic(expected = "lower() called before parse()")]
1234    fn test_lower_before_parse_panics() {
1235        let sources = make_sources("fn main() -> i32 { 42 }");
1236        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1237        unit.lower().unwrap();
1238    }
1239
1240    #[test]
1241    #[should_panic(expected = "analyze() called before lower()")]
1242    fn test_analyze_before_lower_panics() {
1243        let sources = make_sources("fn main() -> i32 { 42 }");
1244        let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1245        unit.parse().unwrap();
1246        unit.analyze().unwrap();
1247    }
1248
1249    #[test]
1250    fn test_llvm_optimization_wiring() {
1251        // Verify that -O2 produces a valid binary that runs correctly.
1252        // This exercises the LLVM pass pipeline end-to-end.
1253        use crate::{CompileOptions, OptLevel};
1254        let sources = make_sources("fn main() -> i32 { let x = 2 + 3; x }");
1255        let options = CompileOptions {
1256            opt_level: OptLevel::O2,
1257            ..CompileOptions::default()
1258        };
1259        let mut unit = CompilationUnit::new(sources, options);
1260        unit.run_frontend().unwrap();
1261        // The frontend should succeed; backend (LLVM codegen) is tested separately
1262        // via spec tests that run the resulting binary. The prelude contributes
1263        // additional functions (char__*, etc.) so we only assert that user
1264        // code analysed at all.
1265        assert!(unit.functions().len() >= 1);
1266    }
1267}