gruel_compiler/unit.rs
1//! Unified compilation unit that owns all compilation artifacts.
2//!
3//! The [`CompilationUnit`] provides a single source of truth for all compilation state,
4//! from source files through to machine code. It enforces phase ordering through the
5//! type system - you can't access AIR without first running semantic analysis.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use gruel_compiler::{CompilationUnit, SourceFile, CompileOptions};
11//! use gruel_util::FileId;
12//!
13//! // Create source files
14//! let sources = vec![
15//! SourceFile::new("main.gruel", "fn main() -> i32 { 42 }", FileId::new(1)),
16//! ];
17//!
18//! // Create compilation unit and run phases
19//! let mut unit = CompilationUnit::new(sources, CompileOptions::default())?;
20//! unit.parse()?;
21//! unit.analyze()?;
22//! let output = unit.compile()?;
23//! ```
24
25use rustc_hash::FxHashMap as HashMap;
26
27use lasso::ThreadedRodeo;
28use rayon::prelude::*;
29use tracing::{info, info_span};
30
31/// ADR-0085 + ADR-0086: collect the deduplicated, lex-sorted set of
32/// library names declared via `link_extern("…")` / `static_link_extern("…")`
33/// blocks. Each contributes one `-l<name>` flag at the linker step.
34/// ADR-0086: the per-library `LinkMode` controls whether the linker
35/// gets `-Wl,-Bstatic` bracketing (ELF) or `-Wl,-search_paths_first`
36/// (Mach-O) around the `-l<name>`.
37///
38/// ADR-0087: `link_extern("gruel_runtime")` is the prelude's way of
39/// declaring extern fn signatures against the Rust runtime archive.
40/// The archive itself is linked by absolute path (see
41/// `link_system_with_warnings`), so emitting `-lgruel_runtime` would
42/// cause the linker to search for a separate copy and fail. Skip
43/// the name from the library-set; the source-level declaration is
44/// still load-bearing for sema's binding resolution.
45fn collect_extern_link_libraries(
46 rir: &gruel_rir::Rir,
47 interner: &ThreadedRodeo,
48) -> Vec<(String, crate::link::LinkMode)> {
49 use crate::link::LinkMode;
50 use gruel_rir::inst::RirLinkMode;
51 let mut libs: std::collections::BTreeMap<String, LinkMode> = std::collections::BTreeMap::new();
52 let convert = |m: RirLinkMode| match m {
53 RirLinkMode::Dynamic => LinkMode::Dynamic,
54 RirLinkMode::Static => LinkMode::Static,
55 };
56 let is_skipped = |name: &str| name == "gruel_runtime";
57 for ext in rir.extern_fns() {
58 let lib = interner.resolve(&ext.library).to_string();
59 if is_skipped(&lib) {
60 continue;
61 }
62 libs.insert(lib, convert(ext.link_mode));
63 }
64 for (lib, link_mode, _) in rir.empty_link_extern_blocks() {
65 let name = interner.resolve(lib).to_string();
66 if is_skipped(&name) {
67 continue;
68 }
69 libs.insert(name, convert(*link_mode));
70 }
71 libs.into_iter().collect()
72}
73
74use crate::{
75 AnalyzedFunction, Ast, AstGen, Cfg, CfgBuilder, CompileError, CompileErrors, CompileOptions,
76 CompileOutput, CompileWarning, ErrorKind, FunctionWithCfg, Lexer, MultiErrorResult, OptLevel,
77 Parser, Rir, Sema, SourceFile, Type, TypeInternPool, compile_backend,
78};
79use gruel_cache::{CacheKey, CacheStore, Hasher, compiler_fingerprint};
80use gruel_util::FileId;
81
82fn opt_level_to_u32(o: OptLevel) -> u32 {
83 match o {
84 OptLevel::O0 => 0,
85 OptLevel::O1 => 1,
86 OptLevel::O2 => 2,
87 OptLevel::O3 => 3,
88 }
89}
90
91/// Run sema on the merged RIR. Extracted so the AIR-cache miss path
92/// (ADR-0074 Phase 4) can call it in two places without duplicating
93/// the boilerplate.
94fn run_sema(
95 rir: &Rir,
96 interner: &ThreadedRodeo,
97 options: &CompileOptions,
98 file_paths: HashMap<FileId, String>,
99) -> MultiErrorResult<gruel_air::SemaOutput> {
100 let _span = info_span!("sema").entered();
101 let mut sema = Sema::new(rir, interner, options.preview_features.clone());
102 sema.set_file_paths(file_paths);
103 sema.set_suppress_comptime_dbg_print(options.capture_comptime_dbg);
104 sema.set_target(options.target.clone());
105 let output = sema.analyze_all()?;
106 info!(
107 function_count = output.functions.len(),
108 struct_count = output.type_pool.stats().struct_count,
109 "semantic analysis complete"
110 );
111 Ok(output)
112}
113
114/// Clone a slice of analyzed functions for the AIR cache write path.
115fn clone_functions(fns: &[gruel_air::AnalyzedFunction]) -> Vec<gruel_air::AnalyzedFunction> {
116 fns.to_vec()
117}
118
119/// Snapshot a TypeInternPool for the AIR cache write path.
120fn clone_type_pool(pool: &gruel_air::TypeInternPool) -> gruel_air::TypeInternPool {
121 pool.clone_snapshot()
122}
123
124// ADR-0078: the prelude content has moved to `prelude/*.gruel`. The
125// concatenated source is built at parse time via
126// `prelude_source::assemble_prelude_source`, which prefers the on-disk files
127// but falls back to embedded copies (`include_str!`). The virtual prelude
128// source still parses under `FileId::PRELUDE`, so all downstream code
129// (visibility carve-outs, span paths) is unchanged.
130
131/// Result of parsing a single file within a compilation unit.
132#[derive(Debug)]
133struct ParsedFileData {
134 /// Path to the source file.
135 path: String,
136 /// The parsed abstract syntax tree.
137 ast: Ast,
138}
139
140/// A unified compilation unit that owns all artifacts from source to machine code.
141///
142/// The compilation unit progresses through phases:
143/// 1. **New**: Just source files
144/// 2. **Parsed**: ASTs and interner from parsing
145/// 3. **Lowered**: RIR (untyped intermediate representation)
146/// 4. **Analyzed**: AIR (typed IR) and CFGs for all functions
147///
148/// Each phase builds on the previous one. The unit validates that phases
149/// are run in order - you can't analyze before parsing.
150///
151/// # Thread Safety
152///
153/// The compilation unit uses [`ThreadedRodeo`] for string interning, which is
154/// thread-safe. Parallel operations (like per-function CFG construction) can
155/// safely share the interner.
156#[derive(Debug)]
157pub struct CompilationUnit<'src> {
158 // === Configuration ===
159 /// Compilation options (target, optimization level, etc.)
160 options: CompileOptions,
161
162 // === Source ===
163 /// Source files being compiled.
164 sources: Vec<SourceFile<'src>>,
165
166 // === Phase 1: Parsing ===
167 /// Parsed ASTs for each file (populated by `parse()`).
168 parsed_files: Option<Vec<ParsedFileData>>,
169 /// Merged AST containing all items (populated by `parse()`).
170 merged_ast: Option<Ast>,
171 /// String interner shared across all files.
172 interner: Option<ThreadedRodeo>,
173 /// Maps FileId to source file path (for error messages).
174 file_paths: HashMap<FileId, String>,
175
176 // === Phase 2: RIR Generation ===
177 /// Untyped intermediate representation (populated by `lower()`).
178 rir: Option<Rir>,
179
180 // === Phase 3: Semantic Analysis + CFG ===
181 /// Analyzed functions with typed IR and control flow graphs.
182 functions: Option<Vec<FunctionWithCfg>>,
183 /// Type intern pool containing all struct and enum definitions.
184 type_pool: Option<TypeInternPool>,
185 /// String literals indexed by their string_const index.
186 strings: Option<Vec<String>>,
187 /// Byte-blob literals from `@embed_file`, indexed by bytes_const index.
188 bytes: Option<Vec<Vec<u8>>>,
189 /// Warnings collected during compilation.
190 warnings: Vec<CompileWarning>,
191 /// Interface definitions (ADR-0056), indexed by InterfaceId.0.
192 interface_defs: Option<Vec<gruel_air::InterfaceDef>>,
193 /// (StructId, InterfaceId) → conformance witness; codegen uses this to
194 /// emit one vtable global per pair.
195 interface_vtables: Option<gruel_air::InterfaceVtables>,
196}
197
198impl<'src> CompilationUnit<'src> {
199 /// Create a new compilation unit from source files.
200 ///
201 /// This initializes the unit with source files but does not run any
202 /// compilation phases. Call [`parse()`](Self::parse), [`lower()`](Self::lower),
203 /// and [`analyze()`](Self::analyze) to progress through compilation.
204 ///
205 /// # Arguments
206 ///
207 /// * `sources` - Source files to compile
208 /// * `options` - Compilation options (target, optimization, etc.)
209 pub fn new(sources: Vec<SourceFile<'src>>, options: CompileOptions) -> Self {
210 let file_paths: HashMap<FileId, String> = sources
211 .iter()
212 .map(|s| (s.file_id, s.path.to_string()))
213 .collect();
214
215 Self {
216 options,
217 sources,
218 parsed_files: None,
219 merged_ast: None,
220 interner: None,
221 file_paths,
222 rir: None,
223 functions: None,
224 type_pool: None,
225 strings: None,
226 bytes: None,
227 warnings: Vec::new(),
228 interface_defs: None,
229 interface_vtables: None,
230 }
231 }
232
233 // =========================================================================
234 // Phase 1: Parsing
235 // =========================================================================
236
237 /// Open the AIR cache for whole-program AIR caching (ADR-0074
238 /// Phase 4). Returns `None` when the preview gate is off, the
239 /// cache_dir is not configured, or the underlying machinery
240 /// (CacheStore / compiler_fingerprint) fails. The cache key
241 /// concatenates every source file's content so any change to any
242 /// file invalidates the whole-program AIR — the per-file
243 /// granularity from the ADR design needs sema to run per-file,
244 /// which is its own refactor.
245 fn open_air_cache(&self) -> Option<(CacheStore, CacheKey)> {
246 let (store, build_fp) = self.open_parse_cache()?;
247 let mut h = Hasher::new();
248 h.update(build_fp.as_bytes());
249 for source in &self.sources {
250 h.update_str(source.path);
251 h.update_str(source.source);
252 }
253 Some((store, h.finalize()))
254 }
255
256 /// Open the parse cache when `cache_dir` is configured. Returns
257 /// `None` if no cache directory is set (e.g. `--no-cache`), or if
258 /// opening the store / hashing the compiler binary fails (in which
259 /// case the build silently continues uncached — correctness is
260 /// preserved).
261 fn open_parse_cache(&self) -> Option<(CacheStore, CacheKey)> {
262 let dir = self.options.cache_dir.as_ref()?;
263
264 let store = match CacheStore::open(dir) {
265 Ok(s) => s,
266 Err(e) => {
267 tracing::warn!(error = %e, dir = %dir.display(), "failed to open cache");
268 return None;
269 }
270 };
271
272 // Compose the compilation fingerprint from compiler-binary hash
273 // + target + opt level + sorted preview features.
274 let bin_path = match gruel_cache::current_binary_path() {
275 Ok(p) => p,
276 Err(e) => {
277 tracing::warn!(error = %e, "could not resolve current binary path");
278 return None;
279 }
280 };
281 let memo_dir = std::env::var_os("HOME")
282 .map(std::path::PathBuf::from)
283 .map(|h| h.join(".cache").join("gruel").join("binary-hash"))
284 .unwrap_or_else(|| std::env::temp_dir().join("gruel-binary-hash"));
285 let compiler_fp = match compiler_fingerprint(&bin_path, &memo_dir) {
286 Ok(fp) => fp,
287 Err(e) => {
288 tracing::warn!(error = %e, "compiler_fingerprint failed");
289 return None;
290 }
291 };
292
293 let mut h = Hasher::new();
294 h.update(compiler_fp.as_bytes());
295 h.update_str(&format!("{}", self.options.target));
296 h.update_u32(opt_level_to_u32(self.options.opt_level));
297 let mut feats: Vec<&'static str> = self
298 .options
299 .preview_features
300 .iter()
301 .map(|f| f.name())
302 .collect();
303 feats.sort_unstable();
304 for f in feats {
305 h.update_str(f);
306 }
307 let build_fp = h.finalize();
308 Some((store, build_fp))
309 }
310
311 /// Parse all source files.
312 ///
313 /// This runs lexing and parsing on each source file, producing ASTs.
314 /// The ASTs are then merged into a single program, detecting any
315 /// duplicate symbol definitions.
316 ///
317 /// # Errors
318 ///
319 /// Returns errors if:
320 /// - Any file fails to lex or parse
321 /// - Duplicate function, struct, or enum definitions are found
322 pub fn parse(&mut self) -> MultiErrorResult<()> {
323 let _span = info_span!("parse", file_count = self.sources.len()).entered();
324
325 // Parse all files with a shared interner
326 let mut parsed_files = Vec::with_capacity(self.sources.len() + 1);
327 let mut interner = ThreadedRodeo::new();
328
329 // ADR-0065 / ADR-0078: load the prelude as an implicitly-imported
330 // module rooted at `std/_prelude.gruel`. The root is the only file
331 // the compiler hand-loads — it uses `@import` internally to pull
332 // in `prelude/*.gruel` submodules and re-exports their pub
333 // items. Submodules are pre-staged in `file_paths` (and pre-parsed
334 // when their disk copy isn't available) so the @import resolver
335 // finds them whether the host has an on-disk stdlib or not. Only
336 // the root's pub items become globally available; submodule items
337 // accessed only through the root's `pub const` re-exports.
338 let resolved = crate::prelude_source::resolved_prelude();
339 let mut prelude_file_id = FileId::PRELUDE.index();
340
341 // Stage prelude submodules: register their paths in `file_paths`
342 // so @import resolution finds them, and pre-parse them so they
343 // don't need to hit the disk. `other_std_files` (`_std.gruel`,
344 // `math.gruel`, etc.) are NOT staged here — they get loaded only
345 // when user code explicitly `@import("std")`s them, via the
346 // existing on-disk module resolver.
347 for file in &resolved.prelude_dir {
348 let file_id = FileId::new(prelude_file_id);
349 self.file_paths.insert(file_id, file.path.clone());
350 let lexer = Lexer::with_interner_and_file_id(&file.source, interner, file_id);
351 let (tokens, returned_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
352 interner = returned_interner;
353 let parser = Parser::new(tokens, interner)
354 .with_preview_features(self.options.preview_features.clone())
355 .with_source(&*file.source);
356 let (ast, returned_interner) = parser.parse()?;
357 interner = returned_interner;
358 parsed_files.push(ParsedFileData {
359 path: file.path.clone(),
360 ast,
361 });
362 prelude_file_id = prelude_file_id.wrapping_sub(1);
363 }
364
365 // Parse the prelude root itself (`std/_prelude.gruel`).
366 let root_file_id = FileId::new(prelude_file_id);
367 self.file_paths
368 .insert(root_file_id, resolved.root.path.clone());
369 let root_lexer =
370 Lexer::with_interner_and_file_id(&resolved.root.source, interner, root_file_id);
371 let (root_tokens, returned_interner) =
372 root_lexer.tokenize().map_err(CompileErrors::from)?;
373 interner = returned_interner;
374 let root_parser = Parser::new(root_tokens, interner)
375 .with_preview_features(self.options.preview_features.clone())
376 .with_source(&*resolved.root.source);
377 let (root_ast, returned_interner) = root_parser.parse()?;
378 interner = returned_interner;
379 parsed_files.push(ParsedFileData {
380 path: resolved.root.path,
381 ast: root_ast,
382 });
383
384 // ADR-0074: when a cache_dir is configured, route user-file
385 // parsing through the on-disk cache. The prelude (above) is
386 // always parsed uncached because its source is a constant in
387 // the binary; its Spurs are already in `interner`, which the
388 // cache wiring then reuses as the build-shared interner.
389 let cache_handle = self.open_parse_cache();
390 if let Some((store, build_fp)) = cache_handle {
391 let (cached_files, stats) = crate::parse_cache::parse_files_into(
392 &interner,
393 &self.sources,
394 &self.options.preview_features,
395 &store,
396 &build_fp,
397 )?;
398 info!(
399 hits = stats.hits,
400 misses = stats.misses,
401 files = self.sources.len(),
402 "parse cache complete"
403 );
404 for file in cached_files {
405 parsed_files.push(ParsedFileData {
406 path: file.path,
407 ast: file.ast,
408 });
409 }
410 } else {
411 for source in &self.sources {
412 let _file_span = info_span!("parse_file", path = %source.path).entered();
413
414 // Create lexer with shared interner and file ID
415 let lexer =
416 Lexer::with_interner_and_file_id(source.source, interner, source.file_id);
417
418 // Tokenize
419 let (tokens, returned_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
420 interner = returned_interner;
421
422 info!(token_count = tokens.len(), "lexing complete");
423
424 // Parse
425 let parser = Parser::new(tokens, interner)
426 .with_preview_features(self.options.preview_features.clone())
427 .with_source(source.source);
428 let (ast, returned_interner) = parser.parse()?;
429 interner = returned_interner;
430
431 info!(item_count = ast.items.len(), "parsing complete");
432
433 parsed_files.push(ParsedFileData {
434 path: source.path.to_string(),
435 ast,
436 });
437 }
438 }
439
440 // Merge symbols and check for duplicates
441 let merged_ast = self.merge_symbols(&parsed_files, &interner)?;
442
443 self.parsed_files = Some(parsed_files);
444 self.merged_ast = Some(merged_ast);
445 self.interner = Some(interner);
446
447 Ok(())
448 }
449
450 /// Merge symbols from all parsed files, checking for duplicates.
451 fn merge_symbols(
452 &self,
453 files: &[ParsedFileData],
454 interner: &ThreadedRodeo,
455 ) -> MultiErrorResult<Ast> {
456 use crate::{Item, Span};
457
458 /// Information about a symbol definition for duplicate detection.
459 struct SymbolDef {
460 span: Span,
461 file_path: String,
462 }
463
464 let _span = info_span!("merge_symbols", file_count = files.len()).entered();
465
466 let mut functions: HashMap<String, SymbolDef> = HashMap::default();
467 let mut structs: HashMap<String, SymbolDef> = HashMap::default();
468 let mut enums: HashMap<String, SymbolDef> = HashMap::default();
469 let mut all_items = Vec::new();
470 let mut errors = Vec::new();
471
472 for file in files {
473 for item in &file.ast.items {
474 match item {
475 Item::Function(func) => {
476 let name = interner.resolve(&func.name.name).to_string();
477 if let Some(first) = functions.get(&name) {
478 errors.push(
479 CompileError::new(
480 ErrorKind::DuplicateTypeDefinition {
481 type_name: format!("function `{}`", name),
482 },
483 func.span,
484 )
485 .with_label(
486 format!("first defined in {}", first.file_path),
487 first.span,
488 ),
489 );
490 } else {
491 functions.insert(
492 name,
493 SymbolDef {
494 span: func.span,
495 file_path: file.path.clone(),
496 },
497 );
498 }
499 }
500 Item::Struct(s) => {
501 let name = interner.resolve(&s.name.name).to_string();
502 if let Some(first) = structs.get(&name) {
503 errors.push(
504 CompileError::new(
505 ErrorKind::DuplicateTypeDefinition {
506 type_name: format!("struct `{}`", name),
507 },
508 s.span,
509 )
510 .with_label(
511 format!("first defined in {}", first.file_path),
512 first.span,
513 ),
514 );
515 } else if let Some(first) = enums.get(&name) {
516 errors.push(
517 CompileError::new(
518 ErrorKind::DuplicateTypeDefinition {
519 type_name: format!(
520 "struct `{}` (conflicts with enum)",
521 name
522 ),
523 },
524 s.span,
525 )
526 .with_label(
527 format!("enum first defined in {}", first.file_path),
528 first.span,
529 ),
530 );
531 } else {
532 structs.insert(
533 name,
534 SymbolDef {
535 span: s.span,
536 file_path: file.path.clone(),
537 },
538 );
539 }
540 }
541 Item::Enum(e) => {
542 let name = interner.resolve(&e.name.name).to_string();
543 if let Some(first) = enums.get(&name) {
544 errors.push(
545 CompileError::new(
546 ErrorKind::DuplicateTypeDefinition {
547 type_name: format!("enum `{}`", name),
548 },
549 e.span,
550 )
551 .with_label(
552 format!("first defined in {}", first.file_path),
553 first.span,
554 ),
555 );
556 } else if let Some(first) = structs.get(&name) {
557 errors.push(
558 CompileError::new(
559 ErrorKind::DuplicateTypeDefinition {
560 type_name: format!(
561 "enum `{}` (conflicts with struct)",
562 name
563 ),
564 },
565 e.span,
566 )
567 .with_label(
568 format!("struct first defined in {}", first.file_path),
569 first.span,
570 ),
571 );
572 } else {
573 enums.insert(
574 name,
575 SymbolDef {
576 span: e.span,
577 file_path: file.path.clone(),
578 },
579 );
580 }
581 }
582 Item::Interface(_) => {
583 // Interfaces (ADR-0056) are validated in Sema; cross-file
584 // duplicate detection is added in Phase 2.
585 }
586 Item::Derive(_) => {
587 // Derives (ADR-0058) are validated in Sema; cross-file
588 // duplicate detection follows the interface model.
589 }
590 Item::Const(_) => {
591 // Validated in Sema
592 }
593 Item::LinkExtern(_) => {
594 // ADR-0085: extern fn declarations live on the
595 // RIR side-table; cross-file duplicate detection
596 // happens in sema.
597 }
598 Item::Error(_) => {
599 // Error nodes from parser recovery are skipped
600 }
601 }
602 all_items.push(item.clone());
603 }
604 }
605
606 if !errors.is_empty() {
607 return Err(CompileErrors::from(errors));
608 }
609
610 info!(
611 function_count = functions.len(),
612 struct_count = structs.len(),
613 enum_count = enums.len(),
614 "symbol merging complete"
615 );
616
617 Ok(Ast {
618 module_doc: None,
619 items: all_items,
620 })
621 }
622
623 // =========================================================================
624 // Phase 2: RIR Generation
625 // =========================================================================
626
627 /// Generate untyped intermediate representation (RIR).
628 ///
629 /// This transforms the merged AST into RIR, which is a more uniform
630 /// representation suitable for semantic analysis.
631 ///
632 /// # Panics
633 ///
634 /// Panics if called before [`parse()`](Self::parse).
635 pub fn lower(&mut self) -> MultiErrorResult<()> {
636 let ast = self
637 .merged_ast
638 .as_ref()
639 .expect("lower() called before parse()");
640 let interner = self.interner.as_ref().expect("interner not initialized");
641
642 let _span = info_span!("astgen").entered();
643
644 let astgen = AstGen::new(ast, interner);
645 let rir = astgen.generate();
646
647 info!(instruction_count = rir.len(), "RIR generation complete");
648
649 self.rir = Some(rir);
650 Ok(())
651 }
652
653 // =========================================================================
654 // Phase 3: Semantic Analysis + CFG Construction
655 // =========================================================================
656
657 /// Perform semantic analysis and build control flow graphs.
658 ///
659 /// This runs type checking, symbol resolution, and other semantic checks,
660 /// then builds CFGs for each function. Optimizations are applied based
661 /// on the configured optimization level.
662 ///
663 /// # Panics
664 ///
665 /// Panics if called before [`lower()`](Self::lower).
666 pub fn analyze(&mut self) -> MultiErrorResult<()> {
667 let rir = self.rir.as_ref().expect("analyze() called before lower()");
668 let interner = self.interner.as_ref().expect("interner not initialized");
669
670 // ADR-0074 Phase 4: try the whole-program AIR cache first.
671 // Per-file AIR caching needs sema to run per-file (currently
672 // it runs program-wide on the merged AST). Whole-program is
673 // coarser but exercises the full cache pipeline end-to-end.
674 let air_cache_handle = self.open_air_cache();
675
676 // Semantic analysis
677 let sema_output = if let Some((store, key)) = &air_cache_handle {
678 match store.get(gruel_cache::CacheKind::Air, key) {
679 Ok(Some(bytes)) => match gruel_cache::CachedAirOutput::decode(&bytes) {
680 Ok(cached) => {
681 info!("air cache hit");
682 // Restore the build's interner to the cached
683 // state. This is sound because the AIR cache is
684 // whole-program — every Spur in cached AIR was
685 // produced against this snapshot. Replay
686 // re-interns each cached string back into the
687 // build's interner; for newly-empty interners
688 // (typical at this point), this restores the
689 // original Spur values.
690 let _remap = cached.interner.restore_into(interner);
691 // Replay comptime @dbg output to stderr so cache
692 // hits are observably identical to cold builds
693 // (ADR-0074 "Comptime side-effects replay").
694 if !self.options.capture_comptime_dbg {
695 for line in &cached.comptime_dbg_output {
696 eprintln!("{}", line);
697 }
698 }
699 // Note: warnings are not cached yet (DiagnosticWrapper
700 // serde is its own follow-up); cache hits omit
701 // them. Documented in ADR-0074.
702 gruel_air::SemaOutput {
703 functions: cached.functions,
704 strings: cached.strings,
705 bytes: cached.bytes,
706 warnings: Vec::new(),
707 type_pool: cached.type_pool,
708 comptime_dbg_output: cached.comptime_dbg_output,
709 interface_defs: cached.interface_defs,
710 interface_vtables: cached.interface_vtables,
711 }
712 }
713 Err(e) => {
714 tracing::warn!(error = %e, "air cache decode failed; recomputing");
715 run_sema(rir, interner, &self.options, self.file_paths.clone())?
716 }
717 },
718 _ => {
719 info!("air cache miss");
720 let output = run_sema(rir, interner, &self.options, self.file_paths.clone())?;
721 // Best-effort cache write; failure is not a build error.
722 let cached = gruel_cache::CachedAirOutput {
723 interner: gruel_cache::InternerSnapshot::capture(interner),
724 functions: clone_functions(&output.functions),
725 type_pool: clone_type_pool(&output.type_pool),
726 strings: output.strings.clone(),
727 bytes: output.bytes.clone(),
728 interface_defs: output.interface_defs.clone(),
729 interface_vtables: output.interface_vtables.clone(),
730 comptime_dbg_output: output.comptime_dbg_output.clone(),
731 };
732 match cached.encode() {
733 Ok(bytes) => {
734 if let Err(e) = store.put(gruel_cache::CacheKind::Air, key, &bytes) {
735 tracing::warn!(error = %e, "air cache write failed");
736 }
737 }
738 Err(e) => tracing::warn!(error = %e, "air cache encode failed"),
739 }
740 output
741 }
742 }
743 } else {
744 run_sema(rir, interner, &self.options, self.file_paths.clone())?
745 };
746
747 // Synthesize drop glue functions
748 let drop_glue_functions =
749 crate::drop_glue::synthesize_drop_glue(&sema_output.type_pool, interner);
750 // ADR-0079: clone glue retired — prelude `derive Clone` emits
751 // the clone method via the standard derive-expansion path.
752
753 // Combine user functions with drop glue, filtering out comptime-only functions
754 let all_functions: Vec<_> = sema_output
755 .functions
756 .into_iter()
757 .filter(|f| f.air.return_type() != Type::COMPTIME_TYPE)
758 .chain(drop_glue_functions)
759 .collect();
760
761 // Build CFGs in parallel
762 let interner_ref = self.interner.as_ref().expect("interner not initialized");
763 let (functions, cfg_warnings) =
764 self.build_cfgs(all_functions, &sema_output.type_pool, interner_ref);
765
766 self.functions = Some(functions);
767 self.type_pool = Some(sema_output.type_pool);
768 self.strings = Some(sema_output.strings);
769 self.bytes = Some(sema_output.bytes);
770 self.warnings.extend(sema_output.warnings);
771 self.warnings.extend(cfg_warnings);
772 self.interface_defs = Some(sema_output.interface_defs);
773 self.interface_vtables = Some(sema_output.interface_vtables);
774
775 Ok(())
776 }
777
778 /// Build CFGs for all functions in parallel.
779 fn build_cfgs(
780 &self,
781 functions: Vec<AnalyzedFunction>,
782 type_pool: &TypeInternPool,
783 interner: &ThreadedRodeo,
784 ) -> (Vec<FunctionWithCfg>, Vec<CompileWarning>) {
785 let _span = info_span!("cfg_construction").entered();
786
787 let results: Vec<(FunctionWithCfg, Vec<CompileWarning>)> = functions
788 .into_par_iter()
789 .map(|func| {
790 let cfg_output = CfgBuilder::build(&func, type_pool, interner);
791
792 (
793 FunctionWithCfg {
794 analyzed: func,
795 cfg: cfg_output.cfg,
796 },
797 cfg_output.warnings,
798 )
799 })
800 .collect();
801
802 let mut functions = Vec::with_capacity(results.len());
803 let mut warnings = Vec::new();
804 for (func, func_warnings) in results {
805 functions.push(func);
806 warnings.extend(func_warnings);
807 }
808
809 info!(
810 function_count = functions.len(),
811 "CFG construction complete"
812 );
813
814 (functions, warnings)
815 }
816
817 // =========================================================================
818 // Phase 4: Code Generation + Linking
819 // =========================================================================
820
821 /// Generate machine code and link into an executable.
822 ///
823 /// This is the final compilation phase. It generates machine code for
824 /// all functions and links them into an executable binary.
825 ///
826 /// # Panics
827 ///
828 /// Panics if called before [`analyze()`](Self::analyze).
829 pub fn compile(&self) -> MultiErrorResult<CompileOutput> {
830 let functions = self
831 .functions
832 .as_ref()
833 .expect("compile() called before analyze()");
834 let type_pool = self.type_pool.as_ref().expect("type_pool not available");
835 let strings = self.strings.as_ref().expect("strings not available");
836 let bytes = self.bytes.as_ref().expect("bytes not available");
837 let interner = self.interner.as_ref().expect("interner not available");
838
839 let empty_iface_defs: Vec<gruel_air::InterfaceDef> = Vec::new();
840 let empty_iface_vtables: gruel_air::InterfaceVtables = rustc_hash::FxHashMap::default();
841 let interface_defs = self.interface_defs.as_ref().unwrap_or(&empty_iface_defs);
842 let interface_vtables = self
843 .interface_vtables
844 .as_ref()
845 .unwrap_or(&empty_iface_vtables);
846 // ADR-0085: collect the deduplicated, lex-sorted set of library
847 // names declared by `link_extern("…")` blocks (including empty
848 // ones). Linker turns each into a `-l<name>` flag.
849 let rir = self.rir.as_ref().expect("compile() called before lower()");
850 let extra_link_libraries = collect_extern_link_libraries(rir, interner);
851
852 let inputs = crate::BackendInputs {
853 functions,
854 type_pool,
855 strings,
856 bytes,
857 interner,
858 interface_defs,
859 interface_vtables,
860 target: &self.options.target,
861 extra_link_libraries: &extra_link_libraries,
862 };
863
864 // ADR-0074 Phase 5: bitcode cache. If the AIR cache is configured
865 // and air_key matches, we may have cached pre-optimization LLVM
866 // bitcode that lets us skip the AIR→IR translation step. The
867 // LLVM optimizer + back-end + linker still run on every build.
868 if let Some((store, key)) = self.open_air_cache() {
869 return self.compile_with_bitcode_cache(&inputs, &store, &key);
870 }
871 compile_backend(&inputs, &self.options, &self.warnings)
872 }
873
874 /// Codegen path that consults the LLVM bitcode cache (ADR-0074
875 /// Phase 5). On hit, parses the cached bitcode and runs the
876 /// optimizer + back-end on it. On miss, generates bitcode, writes
877 /// it to the cache, then runs optimizer + back-end. Either way the
878 /// optimizer pipeline runs — the cache only saves the AIR→IR
879 /// translation step.
880 fn compile_with_bitcode_cache(
881 &self,
882 inputs: &crate::BackendInputs<'_>,
883 store: &CacheStore,
884 air_key: &CacheKey,
885 ) -> MultiErrorResult<CompileOutput> {
886 // Check for main function (matches compile_backend).
887 let _main_fn = inputs
888 .functions
889 .iter()
890 .find(|f| f.analyzed.name == "main")
891 .ok_or_else(|| {
892 CompileErrors::from(CompileError::without_span(ErrorKind::NoMainFunction))
893 })?;
894
895 // Bitcode cache key is the same as air_key — bitcode is a
896 // deterministic function of AIR, and cached AIR keys already
897 // factor in everything that influences codegen (target, opt
898 // level, preview features, source content, compiler binary).
899 let cfgs: Vec<&Cfg> = inputs.functions.iter().map(|f| &f.cfg).collect();
900 let codegen_inputs = inputs.to_codegen_inputs(&cfgs);
901
902 let object_bytes = match store.get(gruel_cache::CacheKind::LlvmIr, air_key) {
903 Ok(Some(bitcode)) => {
904 info!("bitcode cache hit");
905 gruel_codegen_llvm::compile_bitcode_to_object(
906 &bitcode,
907 self.options.opt_level,
908 &self.options.target,
909 )
910 .map_err(CompileErrors::from)?
911 }
912 _ => {
913 info!("bitcode cache miss");
914 let bitcode = gruel_codegen_llvm::generate_bitcode(&codegen_inputs)
915 .map_err(CompileErrors::from)?;
916 if let Err(e) = store.put(gruel_cache::CacheKind::LlvmIr, air_key, &bitcode) {
917 tracing::warn!(error = %e, "bitcode cache write failed");
918 }
919 gruel_codegen_llvm::compile_bitcode_to_object(
920 &bitcode,
921 self.options.opt_level,
922 &self.options.target,
923 )
924 .map_err(CompileErrors::from)?
925 }
926 };
927
928 // Reuse the same link tail as generate_llvm_objects_and_link.
929 let object_files = vec![object_bytes];
930 let linker_cmd = match &self.options.linker {
931 crate::LinkerMode::System(cmd) => cmd.clone(),
932 crate::LinkerMode::Internal => "cc".to_string(),
933 };
934 crate::link::link_system_with_warnings(
935 &self.options,
936 &object_files,
937 &linker_cmd,
938 &self.warnings,
939 inputs.extra_link_libraries,
940 )
941 }
942
943 // =========================================================================
944 // Convenience Methods
945 // =========================================================================
946
947 /// Run all frontend phases (parse, lower, analyze).
948 ///
949 /// This is a convenience method that runs the complete frontend pipeline.
950 /// Equivalent to calling `parse()`, `lower()`, and `analyze()` in sequence.
951 pub fn run_frontend(&mut self) -> MultiErrorResult<()> {
952 self.parse()?;
953 self.lower()?;
954 self.analyze()?;
955 Ok(())
956 }
957
958 /// Run all phases and produce a compiled binary.
959 ///
960 /// This is a convenience method that runs the complete compilation pipeline.
961 /// Equivalent to calling `run_frontend()` followed by `compile()`, with an
962 /// early `NoMainFunction` check after parsing so we surface the missing-main
963 /// error directly instead of letting sema's "analyze every top-level fn"
964 /// fallback churn through the prelude and report something unrelated first.
965 pub fn run_all(&mut self) -> MultiErrorResult<CompileOutput> {
966 self.parse()?;
967 self.require_user_main()?;
968 self.lower()?;
969 self.analyze()?;
970 self.compile()
971 }
972
973 /// Bail out with `NoMainFunction` if the merged AST has no `fn main` in any
974 /// user source file. Prelude files get auto-assigned `FileId`s during
975 /// `parse()` that don't appear in `self.sources`; restricting the search to
976 /// user-provided file IDs keeps the helper robust even if a future prelude
977 /// module ever defined an internal `main` helper.
978 fn require_user_main(&self) -> MultiErrorResult<()> {
979 use gruel_parser::ast::Item;
980 let ast = self
981 .merged_ast
982 .as_ref()
983 .expect("require_user_main called before parse()");
984 let interner = self.interner.as_ref().expect("interner not initialized");
985 let user_file_ids: rustc_hash::FxHashSet<u32> =
986 self.sources.iter().map(|s| s.file_id.index()).collect();
987 let has_user_main = ast.items.iter().any(|item| {
988 let Item::Function(f) = item else {
989 return false;
990 };
991 if !user_file_ids.contains(&f.name.span.file_id.index()) {
992 return false;
993 }
994 interner.resolve(&f.name.name) == "main"
995 });
996 if !has_user_main {
997 return Err(CompileErrors::from(CompileError::without_span(
998 ErrorKind::NoMainFunction,
999 )));
1000 }
1001 Ok(())
1002 }
1003
1004 /// Check if parsing has been completed.
1005 pub fn is_parsed(&self) -> bool {
1006 self.merged_ast.is_some()
1007 }
1008
1009 /// Check if RIR generation has been completed.
1010 pub fn is_lowered(&self) -> bool {
1011 self.rir.is_some()
1012 }
1013
1014 /// Check if semantic analysis has been completed.
1015 pub fn is_analyzed(&self) -> bool {
1016 self.functions.is_some()
1017 }
1018
1019 // =========================================================================
1020 // Accessors
1021 // =========================================================================
1022
1023 /// Get the compilation options.
1024 pub fn options(&self) -> &CompileOptions {
1025 &self.options
1026 }
1027
1028 /// Get the merged AST (after parsing).
1029 ///
1030 /// # Panics
1031 ///
1032 /// Panics if called before [`parse()`](Self::parse).
1033 pub fn ast(&self) -> &Ast {
1034 self.merged_ast
1035 .as_ref()
1036 .expect("ast() called before parse()")
1037 }
1038
1039 /// Get the string interner.
1040 ///
1041 /// # Panics
1042 ///
1043 /// Panics if called before [`parse()`](Self::parse).
1044 pub fn interner(&self) -> &ThreadedRodeo {
1045 self.interner
1046 .as_ref()
1047 .expect("interner() called before parse()")
1048 }
1049
1050 /// Get the RIR (after lowering).
1051 ///
1052 /// # Panics
1053 ///
1054 /// Panics if called before [`lower()`](Self::lower).
1055 pub fn rir(&self) -> &Rir {
1056 self.rir.as_ref().expect("rir() called before lower()")
1057 }
1058
1059 /// Get the analyzed functions with CFGs (after analysis).
1060 ///
1061 /// # Panics
1062 ///
1063 /// Panics if called before [`analyze()`](Self::analyze).
1064 pub fn functions(&self) -> &[FunctionWithCfg] {
1065 self.functions
1066 .as_ref()
1067 .expect("functions() called before analyze()")
1068 }
1069
1070 /// Get the type pool (after analysis).
1071 ///
1072 /// # Panics
1073 ///
1074 /// Panics if called before [`analyze()`](Self::analyze).
1075 pub fn type_pool(&self) -> &TypeInternPool {
1076 self.type_pool
1077 .as_ref()
1078 .expect("type_pool() called before analyze()")
1079 }
1080
1081 /// Get string literals (after analysis).
1082 ///
1083 /// # Panics
1084 ///
1085 /// Panics if called before [`analyze()`](Self::analyze).
1086 pub fn strings(&self) -> &[String] {
1087 self.strings
1088 .as_ref()
1089 .expect("strings() called before analyze()")
1090 }
1091
1092 /// Get all warnings collected during compilation.
1093 pub fn warnings(&self) -> &[CompileWarning] {
1094 &self.warnings
1095 }
1096
1097 /// Get the file paths map.
1098 pub fn file_paths(&self) -> &HashMap<FileId, String> {
1099 &self.file_paths
1100 }
1101
1102 /// Take the interner out of the compilation unit.
1103 ///
1104 /// This is useful when you need ownership of the interner (e.g., for
1105 /// code generation).
1106 ///
1107 /// # Panics
1108 ///
1109 /// Panics if called before [`parse()`](Self::parse) or if the interner
1110 /// has already been taken.
1111 pub fn take_interner(&mut self) -> ThreadedRodeo {
1112 self.interner
1113 .take()
1114 .expect("interner not available (not parsed or already taken)")
1115 }
1116
1117 /// Take the functions out of the compilation unit.
1118 ///
1119 /// # Panics
1120 ///
1121 /// Panics if called before [`analyze()`](Self::analyze) or if the
1122 /// functions have already been taken.
1123 pub fn take_functions(&mut self) -> Vec<FunctionWithCfg> {
1124 self.functions
1125 .take()
1126 .expect("functions not available (not analyzed or already taken)")
1127 }
1128
1129 /// Take the type pool out of the compilation unit.
1130 ///
1131 /// # Panics
1132 ///
1133 /// Panics if called before [`analyze()`](Self::analyze) or if the
1134 /// type pool has already been taken.
1135 pub fn take_type_pool(&mut self) -> TypeInternPool {
1136 self.type_pool
1137 .take()
1138 .expect("type_pool not available (not analyzed or already taken)")
1139 }
1140
1141 /// Take the strings out of the compilation unit.
1142 ///
1143 /// # Panics
1144 ///
1145 /// Panics if called before [`analyze()`](Self::analyze) or if the
1146 /// strings have already been taken.
1147 pub fn take_strings(&mut self) -> Vec<String> {
1148 self.strings
1149 .take()
1150 .expect("strings not available (not analyzed or already taken)")
1151 }
1152
1153 /// Take the warnings out of the compilation unit.
1154 pub fn take_warnings(&mut self) -> Vec<CompileWarning> {
1155 std::mem::take(&mut self.warnings)
1156 }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161 use super::*;
1162 use crate::FileId;
1163
1164 fn make_sources(source: &str) -> Vec<SourceFile<'_>> {
1165 vec![SourceFile::new("<test>", source, FileId::new(1))]
1166 }
1167
1168 #[test]
1169 fn test_compilation_unit_basic() {
1170 let sources = make_sources("fn main() -> i32 { 42 }");
1171 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1172
1173 assert!(!unit.is_parsed());
1174 assert!(!unit.is_lowered());
1175 assert!(!unit.is_analyzed());
1176
1177 unit.run_frontend().unwrap();
1178
1179 assert!(unit.is_parsed());
1180 assert!(unit.is_lowered());
1181 assert!(unit.is_analyzed());
1182 // ADR-0071 added char__from_u32 / char__is_ascii / char__len_utf8 /
1183 // char__encode_utf8 plus Option/Result methods to the prelude; the
1184 // analysed function count includes those plus user-defined `main`.
1185 assert!(unit.functions().len() >= 1);
1186 }
1187
1188 #[test]
1189 fn test_phase_ordering() {
1190 let sources = make_sources("fn main() -> i32 { 42 }");
1191 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1192
1193 // Parse first
1194 unit.parse().unwrap();
1195 assert!(unit.is_parsed());
1196 assert!(!unit.is_lowered());
1197
1198 // Then lower
1199 unit.lower().unwrap();
1200 assert!(unit.is_lowered());
1201 assert!(!unit.is_analyzed());
1202
1203 // Then analyze
1204 unit.analyze().unwrap();
1205 assert!(unit.is_analyzed());
1206 }
1207
1208 #[test]
1209 fn test_duplicate_function_error() {
1210 let sources = vec![
1211 SourceFile::new("a.gruel", "fn foo() -> i32 { 1 }", FileId::new(1)),
1212 SourceFile::new("b.gruel", "fn foo() -> i32 { 2 }", FileId::new(2)),
1213 ];
1214 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1215
1216 let result = unit.parse();
1217 assert!(result.is_err());
1218 let err = result.unwrap_err();
1219 assert!(err.to_string().contains("function"));
1220 }
1221
1222 #[test]
1223 fn test_warnings_collected() {
1224 let sources = make_sources("fn main() -> i32 { let x = 42; 0 }");
1225 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1226 unit.run_frontend().unwrap();
1227
1228 assert_eq!(unit.warnings().len(), 1);
1229 assert!(unit.warnings()[0].to_string().contains("unused"));
1230 }
1231
1232 #[test]
1233 #[should_panic(expected = "lower() called before parse()")]
1234 fn test_lower_before_parse_panics() {
1235 let sources = make_sources("fn main() -> i32 { 42 }");
1236 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1237 unit.lower().unwrap();
1238 }
1239
1240 #[test]
1241 #[should_panic(expected = "analyze() called before lower()")]
1242 fn test_analyze_before_lower_panics() {
1243 let sources = make_sources("fn main() -> i32 { 42 }");
1244 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
1245 unit.parse().unwrap();
1246 unit.analyze().unwrap();
1247 }
1248
1249 #[test]
1250 fn test_llvm_optimization_wiring() {
1251 // Verify that -O2 produces a valid binary that runs correctly.
1252 // This exercises the LLVM pass pipeline end-to-end.
1253 use crate::{CompileOptions, OptLevel};
1254 let sources = make_sources("fn main() -> i32 { let x = 2 + 3; x }");
1255 let options = CompileOptions {
1256 opt_level: OptLevel::O2,
1257 ..CompileOptions::default()
1258 };
1259 let mut unit = CompilationUnit::new(sources, options);
1260 unit.run_frontend().unwrap();
1261 // The frontend should succeed; backend (LLVM codegen) is tested separately
1262 // via spec tests that run the resulting binary. The prelude contributes
1263 // additional functions (char__*, etc.) so we only assert that user
1264 // code analysed at all.
1265 assert!(unit.functions().len() >= 1);
1266 }
1267}