Skip to main content

gruel_lexer/
lib.rs

1//! Lexer for the Gruel programming language.
2//!
3//! Converts source text into a sequence of tokens for parsing.
4//! Uses logos for efficient tokenization.
5
6mod logos_lexer;
7
8pub use gruel_span::FileId;
9use gruel_span::Span;
10pub use lasso::{Key, Spur, ThreadedRodeo};
11pub use logos_lexer::LogosLexer as Lexer;
12
13/// Token kinds in the Gruel language.
14///
15/// This enum is `Copy` since all variants contain only small, copyable data:
16/// - Most variants are unit (no data)
17/// - `Int` contains a `u64` (8 bytes)
18/// - `Ident` and `String` contain a `Symbol` (4 bytes, an interned string handle)
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub enum TokenKind {
21    // Keywords
22    Fn,
23    Let,
24    Mut,
25    Inout,
26    Borrow,
27    If,
28    Else,
29    Match,
30    While,
31    For,
32    In,
33    Loop,
34    Break,
35    Continue,
36    Return,
37    True,
38    False,
39    Struct,
40    Enum,
41    Drop,
42    Linear,         // linear struct modifier
43    SelfValue,      // self (value, not type)
44    SelfType,       // Self (type, not value) - used in methods to refer to the struct type
45    Comptime,       // comptime (compile-time evaluation)
46    ComptimeUnroll, // comptime_unroll (compile-time loop unrolling)
47    Pub,            // pub visibility modifier (module system)
48    Const,          // const declaration (module system re-exports)
49    Checked,        // checked { } block for unchecked operations
50    Unchecked,      // unchecked fn modifier
51    Ptr,            // ptr const T / ptr mut T pointer types
52
53    // Type keywords
54    I8,
55    I16,
56    I32,
57    I64,
58    Isize,
59    U8,
60    U16,
61    U32,
62    U64,
63    Usize,
64    F16,
65    F32,
66    F64,
67    Bool,
68
69    // Patterns
70    Underscore, // _ (wildcard pattern)
71
72    // Literals
73    Int(u64),
74    /// Floating-point literal, stored as f64 bits for Eq/Copy compatibility.
75    /// Use `f64::from_bits()` to recover the value.
76    Float(u64),
77    String(Spur),
78
79    // Identifiers
80    Ident(Spur),
81
82    // Operators
83    Plus,     // +
84    Minus,    // -
85    Star,     // *
86    Slash,    // /
87    Percent,  // %
88    Eq,       // =
89    EqEq,     // ==
90    Bang,     // !
91    BangEq,   // !=
92    Lt,       // <
93    Gt,       // >
94    LtEq,     // <=
95    GtEq,     // >=
96    AmpAmp,   // &&
97    PipePipe, // ||
98    Amp,      // &
99    Pipe,     // |
100    Caret,    // ^
101    Tilde,    // ~
102    LtLt,     // <<
103    GtGt,     // >>
104
105    // Punctuation
106    LParen,
107    RParen,
108    LBrace,
109    RBrace,
110    LBracket,   // [
111    RBracket,   // ]
112    Arrow,      // ->
113    FatArrow,   // =>
114    ColonColon, // ::
115    Colon,
116    Semi,
117    Comma,
118    Dot, // .
119    At,  // @
120
121    // Builtins
122    AtImport(Spur), // @import - contains interned "import" string
123
124    // Special
125    Eof,
126}
127
128impl TokenKind {
129    /// Get a human-readable name for this token kind.
130    pub fn name(&self) -> &'static str {
131        match self {
132            TokenKind::Fn => "'fn'",
133            TokenKind::Let => "'let'",
134            TokenKind::Mut => "'mut'",
135            TokenKind::Inout => "'inout'",
136            TokenKind::Borrow => "'borrow'",
137            TokenKind::If => "'if'",
138            TokenKind::Else => "'else'",
139            TokenKind::Match => "'match'",
140            TokenKind::While => "'while'",
141            TokenKind::For => "'for'",
142            TokenKind::In => "'in'",
143            TokenKind::Loop => "'loop'",
144            TokenKind::Break => "'break'",
145            TokenKind::Continue => "'continue'",
146            TokenKind::Return => "'return'",
147            TokenKind::True => "'true'",
148            TokenKind::False => "'false'",
149            TokenKind::Struct => "'struct'",
150            TokenKind::Enum => "'enum'",
151            TokenKind::Drop => "'drop'",
152            TokenKind::Linear => "'linear'",
153            TokenKind::SelfValue => "'self'",
154            TokenKind::SelfType => "'Self'",
155            TokenKind::Comptime => "'comptime'",
156            TokenKind::ComptimeUnroll => "'comptime_unroll'",
157            TokenKind::Pub => "'pub'",
158            TokenKind::Const => "'const'",
159            TokenKind::Checked => "'checked'",
160            TokenKind::Unchecked => "'unchecked'",
161            TokenKind::Ptr => "'ptr'",
162            TokenKind::I8 => "type 'i8'",
163            TokenKind::I16 => "type 'i16'",
164            TokenKind::I32 => "type 'i32'",
165            TokenKind::I64 => "type 'i64'",
166            TokenKind::Isize => "type 'isize'",
167            TokenKind::U8 => "type 'u8'",
168            TokenKind::U16 => "type 'u16'",
169            TokenKind::U32 => "type 'u32'",
170            TokenKind::U64 => "type 'u64'",
171            TokenKind::Usize => "type 'usize'",
172            TokenKind::F16 => "type 'f16'",
173            TokenKind::F32 => "type 'f32'",
174            TokenKind::F64 => "type 'f64'",
175            TokenKind::Bool => "type 'bool'",
176            TokenKind::Underscore => "'_'",
177            TokenKind::Int(_) => "integer",
178            TokenKind::Float(_) => "float",
179            TokenKind::String(_) => "string",
180            TokenKind::Ident(_) => "identifier",
181            TokenKind::Plus => "'+'",
182            TokenKind::Minus => "'-'",
183            TokenKind::Star => "'*'",
184            TokenKind::Slash => "'/'",
185            TokenKind::Percent => "'%'",
186            TokenKind::Eq => "'='",
187            TokenKind::EqEq => "'=='",
188            TokenKind::Bang => "'!'",
189            TokenKind::BangEq => "'!='",
190            TokenKind::Lt => "'<'",
191            TokenKind::Gt => "'>'",
192            TokenKind::LtEq => "'<='",
193            TokenKind::GtEq => "'>='",
194            TokenKind::AmpAmp => "'&&'",
195            TokenKind::PipePipe => "'||'",
196            TokenKind::Amp => "'&'",
197            TokenKind::Pipe => "'|'",
198            TokenKind::Caret => "'^'",
199            TokenKind::Tilde => "'~'",
200            TokenKind::LtLt => "'<<'",
201            TokenKind::GtGt => "'>>'",
202            TokenKind::LParen => "'('",
203            TokenKind::RParen => "')'",
204            TokenKind::LBrace => "'{'",
205            TokenKind::RBrace => "'}'",
206            TokenKind::LBracket => "'['",
207            TokenKind::RBracket => "']'",
208            TokenKind::Arrow => "'->'",
209            TokenKind::FatArrow => "'=>'",
210            TokenKind::ColonColon => "'::'",
211            TokenKind::Colon => "':'",
212            TokenKind::Semi => "';'",
213            TokenKind::Comma => "','",
214            TokenKind::Dot => "'.'",
215            TokenKind::At => "'@'",
216            TokenKind::AtImport(_) => "'@import'",
217            TokenKind::Eof => "end of file",
218        }
219    }
220}
221
222/// A token with its kind and source span.
223#[derive(Debug, Clone)]
224pub struct Token {
225    pub kind: TokenKind,
226    pub span: Span,
227}
228
229impl std::fmt::Display for Token {
230    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
231        write!(
232            f,
233            "{:>4}..{:<4} {}",
234            self.span.start, self.span.end, self.kind
235        )
236    }
237}
238
239impl std::fmt::Display for TokenKind {
240    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241        match self {
242            TokenKind::Fn => write!(f, "FN"),
243            TokenKind::Let => write!(f, "LET"),
244            TokenKind::Mut => write!(f, "MUT"),
245            TokenKind::Inout => write!(f, "INOUT"),
246            TokenKind::Borrow => write!(f, "BORROW"),
247            TokenKind::If => write!(f, "IF"),
248            TokenKind::Else => write!(f, "ELSE"),
249            TokenKind::Match => write!(f, "MATCH"),
250            TokenKind::While => write!(f, "WHILE"),
251            TokenKind::For => write!(f, "FOR"),
252            TokenKind::In => write!(f, "IN"),
253            TokenKind::Loop => write!(f, "LOOP"),
254            TokenKind::Break => write!(f, "BREAK"),
255            TokenKind::Continue => write!(f, "CONTINUE"),
256            TokenKind::Return => write!(f, "RETURN"),
257            TokenKind::True => write!(f, "TRUE"),
258            TokenKind::False => write!(f, "FALSE"),
259            TokenKind::Struct => write!(f, "STRUCT"),
260            TokenKind::Enum => write!(f, "ENUM"),
261            TokenKind::Drop => write!(f, "DROP"),
262            TokenKind::Linear => write!(f, "LINEAR"),
263            TokenKind::SelfValue => write!(f, "SELF"),
264            TokenKind::SelfType => write!(f, "SELFTYPE"),
265            TokenKind::Comptime => write!(f, "COMPTIME"),
266            TokenKind::ComptimeUnroll => write!(f, "COMPTIME_UNROLL"),
267            TokenKind::Pub => write!(f, "PUB"),
268            TokenKind::Const => write!(f, "CONST"),
269            TokenKind::Checked => write!(f, "CHECKED"),
270            TokenKind::Unchecked => write!(f, "UNCHECKED"),
271            TokenKind::Ptr => write!(f, "PTR"),
272            TokenKind::I8 => write!(f, "TYPE(i8)"),
273            TokenKind::I16 => write!(f, "TYPE(i16)"),
274            TokenKind::I32 => write!(f, "TYPE(i32)"),
275            TokenKind::I64 => write!(f, "TYPE(i64)"),
276            TokenKind::Isize => write!(f, "TYPE(isize)"),
277            TokenKind::U8 => write!(f, "TYPE(u8)"),
278            TokenKind::U16 => write!(f, "TYPE(u16)"),
279            TokenKind::U32 => write!(f, "TYPE(u32)"),
280            TokenKind::U64 => write!(f, "TYPE(u64)"),
281            TokenKind::Usize => write!(f, "TYPE(usize)"),
282            TokenKind::F16 => write!(f, "TYPE(f16)"),
283            TokenKind::F32 => write!(f, "TYPE(f32)"),
284            TokenKind::F64 => write!(f, "TYPE(f64)"),
285            TokenKind::Bool => write!(f, "TYPE(bool)"),
286            TokenKind::Underscore => write!(f, "UNDERSCORE"),
287            TokenKind::Int(v) => write!(f, "INT({})", v),
288            TokenKind::Float(bits) => write!(f, "FLOAT({})", f64::from_bits(*bits)),
289            TokenKind::String(s) => write!(f, "STRING(sym:{})", s.into_usize()),
290            TokenKind::Ident(s) => write!(f, "IDENT(sym:{})", s.into_usize()),
291            TokenKind::Plus => write!(f, "PLUS"),
292            TokenKind::Minus => write!(f, "MINUS"),
293            TokenKind::Star => write!(f, "STAR"),
294            TokenKind::Slash => write!(f, "SLASH"),
295            TokenKind::Percent => write!(f, "PERCENT"),
296            TokenKind::Eq => write!(f, "EQ"),
297            TokenKind::EqEq => write!(f, "EQEQ"),
298            TokenKind::Bang => write!(f, "BANG"),
299            TokenKind::BangEq => write!(f, "BANGEQ"),
300            TokenKind::Lt => write!(f, "LT"),
301            TokenKind::Gt => write!(f, "GT"),
302            TokenKind::LtEq => write!(f, "LTEQ"),
303            TokenKind::GtEq => write!(f, "GTEQ"),
304            TokenKind::AmpAmp => write!(f, "AMPAMP"),
305            TokenKind::PipePipe => write!(f, "PIPEPIPE"),
306            TokenKind::Amp => write!(f, "AMP"),
307            TokenKind::Pipe => write!(f, "PIPE"),
308            TokenKind::Caret => write!(f, "CARET"),
309            TokenKind::Tilde => write!(f, "TILDE"),
310            TokenKind::LtLt => write!(f, "LTLT"),
311            TokenKind::GtGt => write!(f, "GTGT"),
312            TokenKind::LParen => write!(f, "LPAREN"),
313            TokenKind::RParen => write!(f, "RPAREN"),
314            TokenKind::LBrace => write!(f, "LBRACE"),
315            TokenKind::RBrace => write!(f, "RBRACE"),
316            TokenKind::LBracket => write!(f, "LBRACKET"),
317            TokenKind::RBracket => write!(f, "RBRACKET"),
318            TokenKind::Arrow => write!(f, "ARROW"),
319            TokenKind::FatArrow => write!(f, "FATARROW"),
320            TokenKind::ColonColon => write!(f, "COLONCOLON"),
321            TokenKind::Colon => write!(f, "COLON"),
322            TokenKind::Semi => write!(f, "SEMI"),
323            TokenKind::Comma => write!(f, "COMMA"),
324            TokenKind::Dot => write!(f, "DOT"),
325            TokenKind::At => write!(f, "AT"),
326            TokenKind::AtImport(_) => write!(f, "AT_IMPORT"),
327            TokenKind::Eof => write!(f, "EOF"),
328        }
329    }
330}