gruel_compiler/unit.rs
1//! Unified compilation unit that owns all compilation artifacts.
2//!
3//! The [`CompilationUnit`] provides a single source of truth for all compilation state,
4//! from source files through to machine code. It enforces phase ordering through the
5//! type system - you can't access AIR without first running semantic analysis.
6//!
7//! # Example
8//!
9//! ```ignore
10//! use gruel_compiler::{CompilationUnit, SourceFile, CompileOptions};
11//! use gruel_span::FileId;
12//!
13//! // Create source files
14//! let sources = vec![
15//! SourceFile::new("main.gruel", "fn main() -> i32 { 42 }", FileId::new(1)),
16//! ];
17//!
18//! // Create compilation unit and run phases
19//! let mut unit = CompilationUnit::new(sources, CompileOptions::default())?;
20//! unit.parse()?;
21//! unit.analyze()?;
22//! let output = unit.compile()?;
23//! ```
24
25use std::collections::HashMap;
26
27use lasso::ThreadedRodeo;
28use rayon::prelude::*;
29use tracing::{info, info_span};
30
31use crate::{
32 AnalyzedFunction, Ast, AstGen, CfgBuilder, CompileError, CompileErrors, CompileOptions,
33 CompileOutput, CompileWarning, ErrorKind, FunctionWithCfg, Lexer, MultiErrorResult, Parser,
34 Rir, Sema, SourceFile, Type, TypeInternPool, compile_backend,
35};
36use gruel_span::FileId;
37
38/// Result of parsing a single file within a compilation unit.
39#[derive(Debug)]
40struct ParsedFileData {
41 /// Path to the source file.
42 path: String,
43 /// The parsed abstract syntax tree.
44 ast: Ast,
45}
46
47/// A unified compilation unit that owns all artifacts from source to machine code.
48///
49/// The compilation unit progresses through phases:
50/// 1. **New**: Just source files
51/// 2. **Parsed**: ASTs and interner from parsing
52/// 3. **Lowered**: RIR (untyped intermediate representation)
53/// 4. **Analyzed**: AIR (typed IR) and CFGs for all functions
54///
55/// Each phase builds on the previous one. The unit validates that phases
56/// are run in order - you can't analyze before parsing.
57///
58/// # Thread Safety
59///
60/// The compilation unit uses [`ThreadedRodeo`] for string interning, which is
61/// thread-safe. Parallel operations (like per-function CFG construction) can
62/// safely share the interner.
63#[derive(Debug)]
64pub struct CompilationUnit<'src> {
65 // === Configuration ===
66 /// Compilation options (target, optimization level, etc.)
67 options: CompileOptions,
68
69 // === Source ===
70 /// Source files being compiled.
71 sources: Vec<SourceFile<'src>>,
72
73 // === Phase 1: Parsing ===
74 /// Parsed ASTs for each file (populated by `parse()`).
75 parsed_files: Option<Vec<ParsedFileData>>,
76 /// Merged AST containing all items (populated by `parse()`).
77 merged_ast: Option<Ast>,
78 /// String interner shared across all files.
79 interner: Option<ThreadedRodeo>,
80 /// Maps FileId to source file path (for error messages).
81 file_paths: HashMap<FileId, String>,
82
83 // === Phase 2: RIR Generation ===
84 /// Untyped intermediate representation (populated by `lower()`).
85 rir: Option<Rir>,
86
87 // === Phase 3: Semantic Analysis + CFG ===
88 /// Analyzed functions with typed IR and control flow graphs.
89 functions: Option<Vec<FunctionWithCfg>>,
90 /// Type intern pool containing all struct and enum definitions.
91 type_pool: Option<TypeInternPool>,
92 /// String literals indexed by their string_const index.
93 strings: Option<Vec<String>>,
94 /// Warnings collected during compilation.
95 warnings: Vec<CompileWarning>,
96}
97
98impl<'src> CompilationUnit<'src> {
99 /// Create a new compilation unit from source files.
100 ///
101 /// This initializes the unit with source files but does not run any
102 /// compilation phases. Call [`parse()`](Self::parse), [`lower()`](Self::lower),
103 /// and [`analyze()`](Self::analyze) to progress through compilation.
104 ///
105 /// # Arguments
106 ///
107 /// * `sources` - Source files to compile
108 /// * `options` - Compilation options (target, optimization, etc.)
109 pub fn new(sources: Vec<SourceFile<'src>>, options: CompileOptions) -> Self {
110 let file_paths: HashMap<FileId, String> = sources
111 .iter()
112 .map(|s| (s.file_id, s.path.to_string()))
113 .collect();
114
115 Self {
116 options,
117 sources,
118 parsed_files: None,
119 merged_ast: None,
120 interner: None,
121 file_paths,
122 rir: None,
123 functions: None,
124 type_pool: None,
125 strings: None,
126 warnings: Vec::new(),
127 }
128 }
129
130 // =========================================================================
131 // Phase 1: Parsing
132 // =========================================================================
133
134 /// Parse all source files.
135 ///
136 /// This runs lexing and parsing on each source file, producing ASTs.
137 /// The ASTs are then merged into a single program, detecting any
138 /// duplicate symbol definitions.
139 ///
140 /// # Errors
141 ///
142 /// Returns errors if:
143 /// - Any file fails to lex or parse
144 /// - Duplicate function, struct, or enum definitions are found
145 pub fn parse(&mut self) -> MultiErrorResult<()> {
146 let _span = info_span!("parse", file_count = self.sources.len()).entered();
147
148 // Parse all files with a shared interner
149 let mut parsed_files = Vec::with_capacity(self.sources.len());
150 let mut interner = ThreadedRodeo::new();
151
152 for source in &self.sources {
153 let _file_span = info_span!("parse_file", path = %source.path).entered();
154
155 // Create lexer with shared interner and file ID
156 let lexer = Lexer::with_interner_and_file_id(source.source, interner, source.file_id);
157
158 // Tokenize
159 let (tokens, returned_interner) = lexer.tokenize().map_err(CompileErrors::from)?;
160 interner = returned_interner;
161
162 info!(token_count = tokens.len(), "lexing complete");
163
164 // Parse
165 let parser = Parser::new(tokens, interner);
166 let (ast, returned_interner) = parser.parse()?;
167 interner = returned_interner;
168
169 info!(item_count = ast.items.len(), "parsing complete");
170
171 parsed_files.push(ParsedFileData {
172 path: source.path.to_string(),
173 ast,
174 });
175 }
176
177 // Merge symbols and check for duplicates
178 let merged_ast = self.merge_symbols(&parsed_files, &interner)?;
179
180 self.parsed_files = Some(parsed_files);
181 self.merged_ast = Some(merged_ast);
182 self.interner = Some(interner);
183
184 Ok(())
185 }
186
187 /// Merge symbols from all parsed files, checking for duplicates.
188 fn merge_symbols(
189 &self,
190 files: &[ParsedFileData],
191 interner: &ThreadedRodeo,
192 ) -> MultiErrorResult<Ast> {
193 use crate::{Item, Span};
194
195 /// Information about a symbol definition for duplicate detection.
196 struct SymbolDef {
197 span: Span,
198 file_path: String,
199 }
200
201 let _span = info_span!("merge_symbols", file_count = files.len()).entered();
202
203 let mut functions: HashMap<String, SymbolDef> = HashMap::new();
204 let mut structs: HashMap<String, SymbolDef> = HashMap::new();
205 let mut enums: HashMap<String, SymbolDef> = HashMap::new();
206 let mut all_items = Vec::new();
207 let mut errors = Vec::new();
208
209 for file in files {
210 for item in &file.ast.items {
211 match item {
212 Item::Function(func) => {
213 let name = interner.resolve(&func.name.name).to_string();
214 if let Some(first) = functions.get(&name) {
215 errors.push(
216 CompileError::new(
217 ErrorKind::DuplicateTypeDefinition {
218 type_name: format!("function `{}`", name),
219 },
220 func.span,
221 )
222 .with_label(
223 format!("first defined in {}", first.file_path),
224 first.span,
225 ),
226 );
227 } else {
228 functions.insert(
229 name,
230 SymbolDef {
231 span: func.span,
232 file_path: file.path.clone(),
233 },
234 );
235 }
236 }
237 Item::Struct(s) => {
238 let name = interner.resolve(&s.name.name).to_string();
239 if let Some(first) = structs.get(&name) {
240 errors.push(
241 CompileError::new(
242 ErrorKind::DuplicateTypeDefinition {
243 type_name: format!("struct `{}`", name),
244 },
245 s.span,
246 )
247 .with_label(
248 format!("first defined in {}", first.file_path),
249 first.span,
250 ),
251 );
252 } else if let Some(first) = enums.get(&name) {
253 errors.push(
254 CompileError::new(
255 ErrorKind::DuplicateTypeDefinition {
256 type_name: format!(
257 "struct `{}` (conflicts with enum)",
258 name
259 ),
260 },
261 s.span,
262 )
263 .with_label(
264 format!("enum first defined in {}", first.file_path),
265 first.span,
266 ),
267 );
268 } else {
269 structs.insert(
270 name,
271 SymbolDef {
272 span: s.span,
273 file_path: file.path.clone(),
274 },
275 );
276 }
277 }
278 Item::Enum(e) => {
279 let name = interner.resolve(&e.name.name).to_string();
280 if let Some(first) = enums.get(&name) {
281 errors.push(
282 CompileError::new(
283 ErrorKind::DuplicateTypeDefinition {
284 type_name: format!("enum `{}`", name),
285 },
286 e.span,
287 )
288 .with_label(
289 format!("first defined in {}", first.file_path),
290 first.span,
291 ),
292 );
293 } else if let Some(first) = structs.get(&name) {
294 errors.push(
295 CompileError::new(
296 ErrorKind::DuplicateTypeDefinition {
297 type_name: format!(
298 "enum `{}` (conflicts with struct)",
299 name
300 ),
301 },
302 e.span,
303 )
304 .with_label(
305 format!("struct first defined in {}", first.file_path),
306 first.span,
307 ),
308 );
309 } else {
310 enums.insert(
311 name,
312 SymbolDef {
313 span: e.span,
314 file_path: file.path.clone(),
315 },
316 );
317 }
318 }
319 Item::DropFn(_) | Item::Const(_) => {
320 // Validated in Sema
321 }
322 Item::Error(_) => {
323 // Error nodes from parser recovery are skipped
324 }
325 }
326 all_items.push(item.clone());
327 }
328 }
329
330 if !errors.is_empty() {
331 return Err(CompileErrors::from(errors));
332 }
333
334 info!(
335 function_count = functions.len(),
336 struct_count = structs.len(),
337 enum_count = enums.len(),
338 "symbol merging complete"
339 );
340
341 Ok(Ast { items: all_items })
342 }
343
344 // =========================================================================
345 // Phase 2: RIR Generation
346 // =========================================================================
347
348 /// Generate untyped intermediate representation (RIR).
349 ///
350 /// This transforms the merged AST into RIR, which is a more uniform
351 /// representation suitable for semantic analysis.
352 ///
353 /// # Panics
354 ///
355 /// Panics if called before [`parse()`](Self::parse).
356 pub fn lower(&mut self) -> MultiErrorResult<()> {
357 let ast = self
358 .merged_ast
359 .as_ref()
360 .expect("lower() called before parse()");
361 let interner = self.interner.as_ref().expect("interner not initialized");
362
363 let _span = info_span!("astgen").entered();
364
365 let astgen = AstGen::new(ast, interner);
366 let rir = astgen.generate();
367
368 info!(instruction_count = rir.len(), "RIR generation complete");
369
370 self.rir = Some(rir);
371 Ok(())
372 }
373
374 // =========================================================================
375 // Phase 3: Semantic Analysis + CFG Construction
376 // =========================================================================
377
378 /// Perform semantic analysis and build control flow graphs.
379 ///
380 /// This runs type checking, symbol resolution, and other semantic checks,
381 /// then builds CFGs for each function. Optimizations are applied based
382 /// on the configured optimization level.
383 ///
384 /// # Panics
385 ///
386 /// Panics if called before [`lower()`](Self::lower).
387 pub fn analyze(&mut self) -> MultiErrorResult<()> {
388 let rir = self.rir.as_ref().expect("analyze() called before lower()");
389 let interner = self.interner.as_ref().expect("interner not initialized");
390
391 // Semantic analysis
392 let sema_output = {
393 let _span = info_span!("sema").entered();
394 let mut sema = Sema::new(rir, interner, self.options.preview_features.clone());
395 sema.set_file_paths(self.file_paths.clone());
396 sema.set_suppress_comptime_dbg_print(self.options.capture_comptime_dbg);
397 let output = sema.analyze_all()?;
398 info!(
399 function_count = output.functions.len(),
400 struct_count = output.type_pool.stats().struct_count,
401 "semantic analysis complete"
402 );
403 output
404 };
405
406 // Synthesize drop glue functions
407 let drop_glue_functions =
408 crate::drop_glue::synthesize_drop_glue(&sema_output.type_pool, interner);
409
410 // Combine user functions with drop glue, filtering out comptime-only functions
411 let all_functions: Vec<_> = sema_output
412 .functions
413 .into_iter()
414 .filter(|f| f.air.return_type() != Type::COMPTIME_TYPE)
415 .chain(drop_glue_functions)
416 .collect();
417
418 // Build CFGs in parallel
419 let (functions, cfg_warnings) = self.build_cfgs(all_functions, &sema_output.type_pool);
420
421 self.functions = Some(functions);
422 self.type_pool = Some(sema_output.type_pool);
423 self.strings = Some(sema_output.strings);
424 self.warnings.extend(sema_output.warnings);
425 self.warnings.extend(cfg_warnings);
426
427 Ok(())
428 }
429
430 /// Build CFGs for all functions in parallel.
431 fn build_cfgs(
432 &self,
433 functions: Vec<AnalyzedFunction>,
434 type_pool: &TypeInternPool,
435 ) -> (Vec<FunctionWithCfg>, Vec<CompileWarning>) {
436 let _span = info_span!("cfg_construction").entered();
437
438 let results: Vec<(FunctionWithCfg, Vec<CompileWarning>)> = functions
439 .into_par_iter()
440 .map(|func| {
441 let cfg_output = CfgBuilder::build(&func, type_pool);
442
443 (
444 FunctionWithCfg {
445 analyzed: func,
446 cfg: cfg_output.cfg,
447 },
448 cfg_output.warnings,
449 )
450 })
451 .collect();
452
453 let mut functions = Vec::with_capacity(results.len());
454 let mut warnings = Vec::new();
455 for (func, func_warnings) in results {
456 functions.push(func);
457 warnings.extend(func_warnings);
458 }
459
460 info!(
461 function_count = functions.len(),
462 "CFG construction complete"
463 );
464
465 (functions, warnings)
466 }
467
468 // =========================================================================
469 // Phase 4: Code Generation + Linking
470 // =========================================================================
471
472 /// Generate machine code and link into an executable.
473 ///
474 /// This is the final compilation phase. It generates machine code for
475 /// all functions and links them into an executable binary.
476 ///
477 /// # Panics
478 ///
479 /// Panics if called before [`analyze()`](Self::analyze).
480 pub fn compile(&self) -> MultiErrorResult<CompileOutput> {
481 let functions = self
482 .functions
483 .as_ref()
484 .expect("compile() called before analyze()");
485 let type_pool = self.type_pool.as_ref().expect("type_pool not available");
486 let strings = self.strings.as_ref().expect("strings not available");
487 let interner = self.interner.as_ref().expect("interner not available");
488
489 compile_backend(
490 functions,
491 type_pool,
492 strings,
493 interner,
494 &self.options,
495 &self.warnings,
496 )
497 }
498
499 // =========================================================================
500 // Convenience Methods
501 // =========================================================================
502
503 /// Run all frontend phases (parse, lower, analyze).
504 ///
505 /// This is a convenience method that runs the complete frontend pipeline.
506 /// Equivalent to calling `parse()`, `lower()`, and `analyze()` in sequence.
507 pub fn run_frontend(&mut self) -> MultiErrorResult<()> {
508 self.parse()?;
509 self.lower()?;
510 self.analyze()?;
511 Ok(())
512 }
513
514 /// Run all phases and produce a compiled binary.
515 ///
516 /// This is a convenience method that runs the complete compilation pipeline.
517 /// Equivalent to calling `run_frontend()` followed by `compile()`.
518 pub fn run_all(&mut self) -> MultiErrorResult<CompileOutput> {
519 self.run_frontend()?;
520 self.compile()
521 }
522
523 /// Check if parsing has been completed.
524 pub fn is_parsed(&self) -> bool {
525 self.merged_ast.is_some()
526 }
527
528 /// Check if RIR generation has been completed.
529 pub fn is_lowered(&self) -> bool {
530 self.rir.is_some()
531 }
532
533 /// Check if semantic analysis has been completed.
534 pub fn is_analyzed(&self) -> bool {
535 self.functions.is_some()
536 }
537
538 // =========================================================================
539 // Accessors
540 // =========================================================================
541
542 /// Get the compilation options.
543 pub fn options(&self) -> &CompileOptions {
544 &self.options
545 }
546
547 /// Get the merged AST (after parsing).
548 ///
549 /// # Panics
550 ///
551 /// Panics if called before [`parse()`](Self::parse).
552 pub fn ast(&self) -> &Ast {
553 self.merged_ast
554 .as_ref()
555 .expect("ast() called before parse()")
556 }
557
558 /// Get the string interner.
559 ///
560 /// # Panics
561 ///
562 /// Panics if called before [`parse()`](Self::parse).
563 pub fn interner(&self) -> &ThreadedRodeo {
564 self.interner
565 .as_ref()
566 .expect("interner() called before parse()")
567 }
568
569 /// Get the RIR (after lowering).
570 ///
571 /// # Panics
572 ///
573 /// Panics if called before [`lower()`](Self::lower).
574 pub fn rir(&self) -> &Rir {
575 self.rir.as_ref().expect("rir() called before lower()")
576 }
577
578 /// Get the analyzed functions with CFGs (after analysis).
579 ///
580 /// # Panics
581 ///
582 /// Panics if called before [`analyze()`](Self::analyze).
583 pub fn functions(&self) -> &[FunctionWithCfg] {
584 self.functions
585 .as_ref()
586 .expect("functions() called before analyze()")
587 }
588
589 /// Get the type pool (after analysis).
590 ///
591 /// # Panics
592 ///
593 /// Panics if called before [`analyze()`](Self::analyze).
594 pub fn type_pool(&self) -> &TypeInternPool {
595 self.type_pool
596 .as_ref()
597 .expect("type_pool() called before analyze()")
598 }
599
600 /// Get string literals (after analysis).
601 ///
602 /// # Panics
603 ///
604 /// Panics if called before [`analyze()`](Self::analyze).
605 pub fn strings(&self) -> &[String] {
606 self.strings
607 .as_ref()
608 .expect("strings() called before analyze()")
609 }
610
611 /// Get all warnings collected during compilation.
612 pub fn warnings(&self) -> &[CompileWarning] {
613 &self.warnings
614 }
615
616 /// Get the file paths map.
617 pub fn file_paths(&self) -> &HashMap<FileId, String> {
618 &self.file_paths
619 }
620
621 /// Take the interner out of the compilation unit.
622 ///
623 /// This is useful when you need ownership of the interner (e.g., for
624 /// code generation).
625 ///
626 /// # Panics
627 ///
628 /// Panics if called before [`parse()`](Self::parse) or if the interner
629 /// has already been taken.
630 pub fn take_interner(&mut self) -> ThreadedRodeo {
631 self.interner
632 .take()
633 .expect("interner not available (not parsed or already taken)")
634 }
635
636 /// Take the functions out of the compilation unit.
637 ///
638 /// # Panics
639 ///
640 /// Panics if called before [`analyze()`](Self::analyze) or if the
641 /// functions have already been taken.
642 pub fn take_functions(&mut self) -> Vec<FunctionWithCfg> {
643 self.functions
644 .take()
645 .expect("functions not available (not analyzed or already taken)")
646 }
647
648 /// Take the type pool out of the compilation unit.
649 ///
650 /// # Panics
651 ///
652 /// Panics if called before [`analyze()`](Self::analyze) or if the
653 /// type pool has already been taken.
654 pub fn take_type_pool(&mut self) -> TypeInternPool {
655 self.type_pool
656 .take()
657 .expect("type_pool not available (not analyzed or already taken)")
658 }
659
660 /// Take the strings out of the compilation unit.
661 ///
662 /// # Panics
663 ///
664 /// Panics if called before [`analyze()`](Self::analyze) or if the
665 /// strings have already been taken.
666 pub fn take_strings(&mut self) -> Vec<String> {
667 self.strings
668 .take()
669 .expect("strings not available (not analyzed or already taken)")
670 }
671
672 /// Take the warnings out of the compilation unit.
673 pub fn take_warnings(&mut self) -> Vec<CompileWarning> {
674 std::mem::take(&mut self.warnings)
675 }
676}
677
678#[cfg(test)]
679mod tests {
680 use super::*;
681 use crate::FileId;
682
683 fn make_sources(source: &str) -> Vec<SourceFile<'_>> {
684 vec![SourceFile::new("<test>", source, FileId::new(1))]
685 }
686
687 #[test]
688 fn test_compilation_unit_basic() {
689 let sources = make_sources("fn main() -> i32 { 42 }");
690 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
691
692 assert!(!unit.is_parsed());
693 assert!(!unit.is_lowered());
694 assert!(!unit.is_analyzed());
695
696 unit.run_frontend().unwrap();
697
698 assert!(unit.is_parsed());
699 assert!(unit.is_lowered());
700 assert!(unit.is_analyzed());
701 assert_eq!(unit.functions().len(), 1);
702 }
703
704 #[test]
705 fn test_phase_ordering() {
706 let sources = make_sources("fn main() -> i32 { 42 }");
707 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
708
709 // Parse first
710 unit.parse().unwrap();
711 assert!(unit.is_parsed());
712 assert!(!unit.is_lowered());
713
714 // Then lower
715 unit.lower().unwrap();
716 assert!(unit.is_lowered());
717 assert!(!unit.is_analyzed());
718
719 // Then analyze
720 unit.analyze().unwrap();
721 assert!(unit.is_analyzed());
722 }
723
724 #[test]
725 fn test_duplicate_function_error() {
726 let sources = vec![
727 SourceFile::new("a.gruel", "fn foo() -> i32 { 1 }", FileId::new(1)),
728 SourceFile::new("b.gruel", "fn foo() -> i32 { 2 }", FileId::new(2)),
729 ];
730 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
731
732 let result = unit.parse();
733 assert!(result.is_err());
734 let err = result.unwrap_err();
735 assert!(err.to_string().contains("function"));
736 }
737
738 #[test]
739 fn test_warnings_collected() {
740 let sources = make_sources("fn main() -> i32 { let x = 42; 0 }");
741 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
742 unit.run_frontend().unwrap();
743
744 assert_eq!(unit.warnings().len(), 1);
745 assert!(unit.warnings()[0].to_string().contains("unused"));
746 }
747
748 #[test]
749 #[should_panic(expected = "lower() called before parse()")]
750 fn test_lower_before_parse_panics() {
751 let sources = make_sources("fn main() -> i32 { 42 }");
752 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
753 unit.lower().unwrap();
754 }
755
756 #[test]
757 #[should_panic(expected = "analyze() called before lower()")]
758 fn test_analyze_before_lower_panics() {
759 let sources = make_sources("fn main() -> i32 { 42 }");
760 let mut unit = CompilationUnit::new(sources, CompileOptions::default());
761 unit.parse().unwrap();
762 unit.analyze().unwrap();
763 }
764
765 #[test]
766 fn test_llvm_optimization_wiring() {
767 // Verify that -O2 produces a valid binary that runs correctly.
768 // This exercises the LLVM pass pipeline end-to-end.
769 use crate::{CompileOptions, OptLevel};
770 let sources = make_sources("fn main() -> i32 { let x = 2 + 3; x }");
771 let options = CompileOptions {
772 opt_level: OptLevel::O2,
773 ..CompileOptions::default()
774 };
775 let mut unit = CompilationUnit::new(sources, options);
776 unit.run_frontend().unwrap();
777 // The frontend should succeed; backend (LLVM codegen) is tested separately
778 // via spec tests that run the resulting binary.
779 assert_eq!(unit.functions().len(), 1);
780 }
781}