Skip to main content

gruel_lexer/
lib.rs

1//! Lexer for the Gruel programming language.
2//!
3//! Converts source text into a sequence of tokens for parsing.
4//! Uses logos for efficient tokenization.
5
6mod logos_lexer;
7
8pub use gruel_util::FileId;
9use gruel_util::Span;
10pub use lasso::{Key, Spur, ThreadedRodeo};
11pub use logos_lexer::LogosLexer as Lexer;
12
13/// Token kinds in the Gruel language.
14///
15/// This enum is `Copy` since all variants contain only small, copyable data:
16/// - Most variants are unit (no data)
17/// - `Int` contains a `u64` (8 bytes)
18/// - `Ident` and `String` contain a `Symbol` (4 bytes, an interned string handle)
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum TokenKind {
21    // Keywords
22    Fn,
23    Let,
24    Mut,
25    If,
26    Else,
27    Match,
28    While,
29    For,
30    In,
31    Loop,
32    Break,
33    Continue,
34    Return,
35    True,
36    False,
37    Struct,
38    Enum,
39    Interface,
40    SelfValue,        // self (value, not type)
41    SelfType,         // Self (type, not value) - used in methods to refer to the struct type
42    Comptime,         // comptime (compile-time evaluation)
43    ComptimeUnroll,   // comptime_unroll (compile-time loop unrolling)
44    Derive,           // derive (user-defined derive items, ADR-0058)
45    Pub,              // pub visibility modifier (module system)
46    Const,            // const declaration (module system re-exports)
47    Checked,          // checked { } block for unchecked operations
48    LinkExtern,       // link_extern("…") { … } FFI block (ADR-0085)
49    StaticLinkExtern, // static_link_extern("…") { … } FFI block (ADR-0086)
50
51    // Type keywords
52    I8,
53    I16,
54    I32,
55    I64,
56    Isize,
57    U8,
58    U16,
59    U32,
60    U64,
61    Usize,
62    F16,
63    F32,
64    F64,
65    Bool,
66    Char,
67
68    // Patterns
69    Underscore, // _ (wildcard pattern)
70
71    // Literals
72    Int(u64),
73    /// Floating-point literal, stored as f64 bits for Eq/Copy compatibility.
74    /// Use `f64::from_bits()` to recover the value.
75    Float(u64),
76    String(Spur),
77    /// Char literal — the resolved Unicode scalar value (ADR-0071).
78    CharLit(u32),
79
80    // Identifiers
81    Ident(Spur),
82
83    // Operators
84    Plus,     // +
85    Minus,    // -
86    Star,     // *
87    Slash,    // /
88    Percent,  // %
89    Eq,       // =
90    EqEq,     // ==
91    Bang,     // !
92    BangEq,   // !=
93    Lt,       // <
94    Gt,       // >
95    LtEq,     // <=
96    GtEq,     // >=
97    AmpAmp,   // &&
98    PipePipe, // ||
99    Amp,      // &
100    Pipe,     // |
101    Caret,    // ^
102    Tilde,    // ~
103    LtLt,     // <<
104    GtGt,     // >>
105
106    // Punctuation
107    LParen,
108    RParen,
109    LBrace,
110    RBrace,
111    LBracket,   // [
112    RBracket,   // ]
113    Arrow,      // ->
114    FatArrow,   // =>
115    ColonColon, // ::
116    Colon,
117    Semi,
118    Comma,
119    Dot, // .
120    At,  // @
121
122    // Builtins
123    AtImport(Spur), // @import - contains interned "import" string
124
125    /// `///` line doc comment (ADR-0089). Carries the interned body text
126    /// (after the marker, with at most one leading space stripped).
127    LineDoc(Spur),
128
129    // Special
130    Eof,
131}
132
133impl TokenKind {
134    /// Get a human-readable name for this token kind.
135    pub fn name(&self) -> &'static str {
136        match self {
137            TokenKind::Fn => "'fn'",
138            TokenKind::Let => "'let'",
139            TokenKind::Mut => "'mut'",
140            TokenKind::If => "'if'",
141            TokenKind::Else => "'else'",
142            TokenKind::Match => "'match'",
143            TokenKind::While => "'while'",
144            TokenKind::For => "'for'",
145            TokenKind::In => "'in'",
146            TokenKind::Loop => "'loop'",
147            TokenKind::Break => "'break'",
148            TokenKind::Continue => "'continue'",
149            TokenKind::Return => "'return'",
150            TokenKind::True => "'true'",
151            TokenKind::False => "'false'",
152            TokenKind::Struct => "'struct'",
153            TokenKind::Enum => "'enum'",
154            TokenKind::Interface => "'interface'",
155            TokenKind::SelfValue => "'self'",
156            TokenKind::SelfType => "'Self'",
157            TokenKind::Comptime => "'comptime'",
158            TokenKind::ComptimeUnroll => "'comptime_unroll'",
159            TokenKind::Derive => "'derive'",
160            TokenKind::Pub => "'pub'",
161            TokenKind::Const => "'const'",
162            TokenKind::Checked => "'checked'",
163            TokenKind::LinkExtern => "'link_extern'",
164            TokenKind::StaticLinkExtern => "'static_link_extern'",
165            TokenKind::I8 => "type 'i8'",
166            TokenKind::I16 => "type 'i16'",
167            TokenKind::I32 => "type 'i32'",
168            TokenKind::I64 => "type 'i64'",
169            TokenKind::Isize => "type 'isize'",
170            TokenKind::U8 => "type 'u8'",
171            TokenKind::U16 => "type 'u16'",
172            TokenKind::U32 => "type 'u32'",
173            TokenKind::U64 => "type 'u64'",
174            TokenKind::Usize => "type 'usize'",
175            TokenKind::F16 => "type 'f16'",
176            TokenKind::F32 => "type 'f32'",
177            TokenKind::F64 => "type 'f64'",
178            TokenKind::Bool => "type 'bool'",
179            TokenKind::Char => "type 'char'",
180            TokenKind::Underscore => "'_'",
181            TokenKind::Int(_) => "integer",
182            TokenKind::Float(_) => "float",
183            TokenKind::String(_) => "string",
184            TokenKind::CharLit(_) => "char literal",
185            TokenKind::Ident(_) => "identifier",
186            TokenKind::Plus => "'+'",
187            TokenKind::Minus => "'-'",
188            TokenKind::Star => "'*'",
189            TokenKind::Slash => "'/'",
190            TokenKind::Percent => "'%'",
191            TokenKind::Eq => "'='",
192            TokenKind::EqEq => "'=='",
193            TokenKind::Bang => "'!'",
194            TokenKind::BangEq => "'!='",
195            TokenKind::Lt => "'<'",
196            TokenKind::Gt => "'>'",
197            TokenKind::LtEq => "'<='",
198            TokenKind::GtEq => "'>='",
199            TokenKind::AmpAmp => "'&&'",
200            TokenKind::PipePipe => "'||'",
201            TokenKind::Amp => "'&'",
202            TokenKind::Pipe => "'|'",
203            TokenKind::Caret => "'^'",
204            TokenKind::Tilde => "'~'",
205            TokenKind::LtLt => "'<<'",
206            TokenKind::GtGt => "'>>'",
207            TokenKind::LParen => "'('",
208            TokenKind::RParen => "')'",
209            TokenKind::LBrace => "'{'",
210            TokenKind::RBrace => "'}'",
211            TokenKind::LBracket => "'['",
212            TokenKind::RBracket => "']'",
213            TokenKind::Arrow => "'->'",
214            TokenKind::FatArrow => "'=>'",
215            TokenKind::ColonColon => "'::'",
216            TokenKind::Colon => "':'",
217            TokenKind::Semi => "';'",
218            TokenKind::Comma => "','",
219            TokenKind::Dot => "'.'",
220            TokenKind::At => "'@'",
221            TokenKind::AtImport(_) => "'@import'",
222            TokenKind::LineDoc(_) => "doc comment",
223            TokenKind::Eof => "end of file",
224        }
225    }
226}
227
228/// A token with its kind and source span.
229#[derive(Debug, Clone)]
230pub struct Token {
231    pub kind: TokenKind,
232    pub span: Span,
233}
234
235impl std::fmt::Display for Token {
236    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
237        write!(
238            f,
239            "{:>4}..{:<4} {}",
240            self.span.start, self.span.end, self.kind
241        )
242    }
243}
244
245impl std::fmt::Display for TokenKind {
246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247        match self {
248            TokenKind::Fn => write!(f, "FN"),
249            TokenKind::Let => write!(f, "LET"),
250            TokenKind::Mut => write!(f, "MUT"),
251            TokenKind::If => write!(f, "IF"),
252            TokenKind::Else => write!(f, "ELSE"),
253            TokenKind::Match => write!(f, "MATCH"),
254            TokenKind::While => write!(f, "WHILE"),
255            TokenKind::For => write!(f, "FOR"),
256            TokenKind::In => write!(f, "IN"),
257            TokenKind::Loop => write!(f, "LOOP"),
258            TokenKind::Break => write!(f, "BREAK"),
259            TokenKind::Continue => write!(f, "CONTINUE"),
260            TokenKind::Return => write!(f, "RETURN"),
261            TokenKind::True => write!(f, "TRUE"),
262            TokenKind::False => write!(f, "FALSE"),
263            TokenKind::Struct => write!(f, "STRUCT"),
264            TokenKind::Enum => write!(f, "ENUM"),
265            TokenKind::Interface => write!(f, "INTERFACE"),
266            TokenKind::SelfValue => write!(f, "SELF"),
267            TokenKind::SelfType => write!(f, "SELFTYPE"),
268            TokenKind::Comptime => write!(f, "COMPTIME"),
269            TokenKind::ComptimeUnroll => write!(f, "COMPTIME_UNROLL"),
270            TokenKind::Derive => write!(f, "DERIVE"),
271            TokenKind::Pub => write!(f, "PUB"),
272            TokenKind::Const => write!(f, "CONST"),
273            TokenKind::Checked => write!(f, "CHECKED"),
274            TokenKind::LinkExtern => write!(f, "LINK_EXTERN"),
275            TokenKind::StaticLinkExtern => write!(f, "STATIC_LINK_EXTERN"),
276            TokenKind::I8 => write!(f, "TYPE(i8)"),
277            TokenKind::I16 => write!(f, "TYPE(i16)"),
278            TokenKind::I32 => write!(f, "TYPE(i32)"),
279            TokenKind::I64 => write!(f, "TYPE(i64)"),
280            TokenKind::Isize => write!(f, "TYPE(isize)"),
281            TokenKind::U8 => write!(f, "TYPE(u8)"),
282            TokenKind::U16 => write!(f, "TYPE(u16)"),
283            TokenKind::U32 => write!(f, "TYPE(u32)"),
284            TokenKind::U64 => write!(f, "TYPE(u64)"),
285            TokenKind::Usize => write!(f, "TYPE(usize)"),
286            TokenKind::F16 => write!(f, "TYPE(f16)"),
287            TokenKind::F32 => write!(f, "TYPE(f32)"),
288            TokenKind::F64 => write!(f, "TYPE(f64)"),
289            TokenKind::Bool => write!(f, "TYPE(bool)"),
290            TokenKind::Char => write!(f, "TYPE(char)"),
291            TokenKind::Underscore => write!(f, "UNDERSCORE"),
292            TokenKind::Int(v) => write!(f, "INT({})", v),
293            TokenKind::Float(bits) => write!(f, "FLOAT({})", f64::from_bits(*bits)),
294            TokenKind::String(s) => write!(f, "STRING(sym:{})", s.into_usize()),
295            TokenKind::CharLit(c) => write!(f, "CHAR(U+{:04X})", c),
296            TokenKind::Ident(s) => write!(f, "IDENT(sym:{})", s.into_usize()),
297            TokenKind::Plus => write!(f, "PLUS"),
298            TokenKind::Minus => write!(f, "MINUS"),
299            TokenKind::Star => write!(f, "STAR"),
300            TokenKind::Slash => write!(f, "SLASH"),
301            TokenKind::Percent => write!(f, "PERCENT"),
302            TokenKind::Eq => write!(f, "EQ"),
303            TokenKind::EqEq => write!(f, "EQEQ"),
304            TokenKind::Bang => write!(f, "BANG"),
305            TokenKind::BangEq => write!(f, "BANGEQ"),
306            TokenKind::Lt => write!(f, "LT"),
307            TokenKind::Gt => write!(f, "GT"),
308            TokenKind::LtEq => write!(f, "LTEQ"),
309            TokenKind::GtEq => write!(f, "GTEQ"),
310            TokenKind::AmpAmp => write!(f, "AMPAMP"),
311            TokenKind::PipePipe => write!(f, "PIPEPIPE"),
312            TokenKind::Amp => write!(f, "AMP"),
313            TokenKind::Pipe => write!(f, "PIPE"),
314            TokenKind::Caret => write!(f, "CARET"),
315            TokenKind::Tilde => write!(f, "TILDE"),
316            TokenKind::LtLt => write!(f, "LTLT"),
317            TokenKind::GtGt => write!(f, "GTGT"),
318            TokenKind::LParen => write!(f, "LPAREN"),
319            TokenKind::RParen => write!(f, "RPAREN"),
320            TokenKind::LBrace => write!(f, "LBRACE"),
321            TokenKind::RBrace => write!(f, "RBRACE"),
322            TokenKind::LBracket => write!(f, "LBRACKET"),
323            TokenKind::RBracket => write!(f, "RBRACKET"),
324            TokenKind::Arrow => write!(f, "ARROW"),
325            TokenKind::FatArrow => write!(f, "FATARROW"),
326            TokenKind::ColonColon => write!(f, "COLONCOLON"),
327            TokenKind::Colon => write!(f, "COLON"),
328            TokenKind::Semi => write!(f, "SEMI"),
329            TokenKind::Comma => write!(f, "COMMA"),
330            TokenKind::Dot => write!(f, "DOT"),
331            TokenKind::At => write!(f, "AT"),
332            TokenKind::AtImport(_) => write!(f, "AT_IMPORT"),
333            TokenKind::LineDoc(s) => write!(f, "LINEDOC(sym:{})", s.into_usize()),
334            TokenKind::Eof => write!(f, "EOF"),
335        }
336    }
337}