1use gruel_error::{CompileError, CompileResult, ErrorKind};
7use gruel_span::{FileId, Span};
8use lasso::{Spur, ThreadedRodeo};
9use logos::Logos;
10
11#[derive(Debug, Clone, PartialEq, Eq, Default)]
13pub enum LexError {
14 #[default]
15 UnexpectedCharacter,
16 InvalidInteger,
17 InvalidFloat,
18 InvalidStringEscape,
19 UnterminatedString,
20}
21
22fn process_string_from_quote(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Result<Spur, LexError> {
26 let remainder = lex.remainder();
29 let mut chars = remainder.chars();
30 let mut consumed = 0;
31 let mut result = String::new();
32 let mut found_close = false;
33
34 while let Some(c) = chars.next() {
35 if c == '"' {
36 consumed += 1;
38 found_close = true;
39 break;
40 } else if c == '\\' {
41 consumed += c.len_utf8();
43 match chars.next() {
44 Some('\\') => {
45 consumed += 1;
46 result.push('\\');
47 }
48 Some('"') => {
49 consumed += 1;
50 result.push('"');
51 }
52 Some('n') => {
53 consumed += 1;
54 result.push('\n');
55 }
56 Some('t') => {
57 consumed += 1;
58 result.push('\t');
59 }
60 Some('r') => {
61 consumed += 1;
62 result.push('\r');
63 }
64 Some('0') => {
65 consumed += 1;
66 result.push('\0');
67 }
68 Some(other) => {
69 consumed += other.len_utf8();
71 lex.bump(consumed);
72 return Err(LexError::InvalidStringEscape);
73 }
74 None => {
75 lex.bump(consumed);
77 return Err(LexError::UnterminatedString);
78 }
79 }
80 } else if c == '\n' {
81 lex.bump(consumed);
84 return Err(LexError::UnterminatedString);
85 } else {
86 consumed += c.len_utf8();
87 result.push(c);
88 }
89 }
90
91 if !found_close {
92 lex.bump(consumed);
94 return Err(LexError::UnterminatedString);
95 }
96
97 lex.bump(consumed);
99
100 let spur = lex.extras.get_or_intern(&result);
102 Ok(spur)
103}
104
105#[derive(Logos, Debug, Clone, PartialEq, Eq)]
107#[logos(error = LexError)]
108#[logos(extras = ThreadedRodeo)]
109#[logos(skip r"[ \t\n\r\f]+")]
110#[logos(skip r"//[^\n]*")]
111pub enum LogosTokenKind {
112 #[token("fn")]
114 Fn,
115 #[token("let")]
116 Let,
117 #[token("mut")]
118 Mut,
119 #[token("inout")]
120 Inout,
121 #[token("borrow")]
122 Borrow,
123 #[token("if")]
124 If,
125 #[token("else")]
126 Else,
127 #[token("match")]
128 Match,
129 #[token("while")]
130 While,
131 #[token("for")]
132 For,
133 #[token("in")]
134 In,
135 #[token("loop")]
136 Loop,
137 #[token("break")]
138 Break,
139 #[token("continue")]
140 Continue,
141 #[token("return")]
142 Return,
143 #[token("true")]
144 True,
145 #[token("false")]
146 False,
147 #[token("struct")]
148 Struct,
149 #[token("enum")]
150 Enum,
151 #[token("drop")]
152 Drop,
153 #[token("linear")]
154 Linear,
155 #[token("self")]
156 SelfValue,
157 #[token("Self")]
158 SelfType,
159 #[token("comptime_unroll")]
160 ComptimeUnroll,
161 #[token("comptime")]
162 Comptime,
163 #[token("pub")]
164 Pub,
165 #[token("const")]
166 Const,
167 #[token("checked")]
168 Checked,
169 #[token("unchecked")]
170 Unchecked,
171 #[token("ptr")]
172 Ptr,
173
174 #[token("i8")]
176 I8,
177 #[token("i16")]
178 I16,
179 #[token("i32")]
180 I32,
181 #[token("i64")]
182 I64,
183 #[token("isize")]
184 Isize,
185 #[token("u8")]
186 U8,
187 #[token("u16")]
188 U16,
189 #[token("u32")]
190 U32,
191 #[token("u64")]
192 U64,
193 #[token("usize")]
194 Usize,
195 #[token("f16")]
196 F16,
197 #[token("f32")]
198 F32,
199 #[token("f64")]
200 F64,
201 #[token("bool")]
202 Bool,
203
204 #[token("_")]
206 Underscore,
207
208 #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", |lex| {
211 lex.slice().parse::<f64>().map(|v| v.to_bits()).map_err(|_| LexError::InvalidFloat)
212 })]
213 #[regex(r"[0-9]+[eE][+-]?[0-9]+", |lex| {
214 lex.slice().parse::<f64>().map(|v| v.to_bits()).map_err(|_| LexError::InvalidFloat)
215 })]
216 Float(u64),
217
218 #[regex(r"[0-9]+", |lex| lex.slice().parse::<u64>().map_err(|_| LexError::InvalidInteger))]
220 Int(u64),
221
222 #[token("\"", process_string_from_quote)]
225 String(Spur),
226
227 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.extras.get_or_intern(lex.slice()), priority = 1)]
229 Ident(Spur),
230
231 #[token("==")]
233 EqEq,
234 #[token("!=")]
235 BangEq,
236 #[token("<=")]
237 LtEq,
238 #[token(">=")]
239 GtEq,
240 #[token("&&")]
241 AmpAmp,
242 #[token("||")]
243 PipePipe,
244 #[token("<<")]
245 LtLt,
246 #[token(">>")]
247 GtGt,
248 #[token("->")]
249 Arrow,
250 #[token("=>")]
251 FatArrow,
252 #[token("::")]
253 ColonColon,
254
255 #[token("+")]
257 Plus,
258 #[token("-")]
259 Minus,
260 #[token("*")]
261 Star,
262 #[token("/")]
263 Slash,
264 #[token("%")]
265 Percent,
266 #[token("=")]
267 Eq,
268 #[token("!")]
269 Bang,
270 #[token("<")]
271 Lt,
272 #[token(">")]
273 Gt,
274 #[token("&")]
275 Amp,
276 #[token("|")]
277 Pipe,
278 #[token("^")]
279 Caret,
280 #[token("~")]
281 Tilde,
282
283 #[token("(")]
285 LParen,
286 #[token(")")]
287 RParen,
288 #[token("{")]
289 LBrace,
290 #[token("}")]
291 RBrace,
292 #[token("[")]
293 LBracket,
294 #[token("]")]
295 RBracket,
296 #[token(":")]
297 Colon,
298 #[token(";")]
299 Semi,
300 #[token(",")]
301 Comma,
302 #[token(".")]
303 Dot,
304 #[token("@")]
305 At,
306
307 #[token("@import", at_import_callback)]
310 AtImport,
311}
312
313fn at_import_callback(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Option<()> {
316 match lex.remainder().chars().next() {
317 Some(c) if c.is_ascii_alphanumeric() || c == '_' => None,
318 _ => Some(()),
319 }
320}
321
322use crate::{Token, TokenKind};
323
324impl From<LogosTokenKind> for TokenKind {
325 fn from(logos_kind: LogosTokenKind) -> Self {
326 match logos_kind {
327 LogosTokenKind::Fn => TokenKind::Fn,
328 LogosTokenKind::Let => TokenKind::Let,
329 LogosTokenKind::Mut => TokenKind::Mut,
330 LogosTokenKind::Inout => TokenKind::Inout,
331 LogosTokenKind::Borrow => TokenKind::Borrow,
332 LogosTokenKind::If => TokenKind::If,
333 LogosTokenKind::Else => TokenKind::Else,
334 LogosTokenKind::Match => TokenKind::Match,
335 LogosTokenKind::While => TokenKind::While,
336 LogosTokenKind::For => TokenKind::For,
337 LogosTokenKind::In => TokenKind::In,
338 LogosTokenKind::Loop => TokenKind::Loop,
339 LogosTokenKind::Break => TokenKind::Break,
340 LogosTokenKind::Continue => TokenKind::Continue,
341 LogosTokenKind::Return => TokenKind::Return,
342 LogosTokenKind::True => TokenKind::True,
343 LogosTokenKind::False => TokenKind::False,
344 LogosTokenKind::Struct => TokenKind::Struct,
345 LogosTokenKind::Enum => TokenKind::Enum,
346 LogosTokenKind::Drop => TokenKind::Drop,
347 LogosTokenKind::Linear => TokenKind::Linear,
348 LogosTokenKind::SelfValue => TokenKind::SelfValue,
349 LogosTokenKind::SelfType => TokenKind::SelfType,
350 LogosTokenKind::ComptimeUnroll => TokenKind::ComptimeUnroll,
351 LogosTokenKind::Comptime => TokenKind::Comptime,
352 LogosTokenKind::Pub => TokenKind::Pub,
353 LogosTokenKind::Const => TokenKind::Const,
354 LogosTokenKind::Checked => TokenKind::Checked,
355 LogosTokenKind::Unchecked => TokenKind::Unchecked,
356 LogosTokenKind::Ptr => TokenKind::Ptr,
357 LogosTokenKind::I8 => TokenKind::I8,
358 LogosTokenKind::I16 => TokenKind::I16,
359 LogosTokenKind::I32 => TokenKind::I32,
360 LogosTokenKind::I64 => TokenKind::I64,
361 LogosTokenKind::Isize => TokenKind::Isize,
362 LogosTokenKind::U8 => TokenKind::U8,
363 LogosTokenKind::U16 => TokenKind::U16,
364 LogosTokenKind::U32 => TokenKind::U32,
365 LogosTokenKind::U64 => TokenKind::U64,
366 LogosTokenKind::Usize => TokenKind::Usize,
367 LogosTokenKind::F16 => TokenKind::F16,
368 LogosTokenKind::F32 => TokenKind::F32,
369 LogosTokenKind::F64 => TokenKind::F64,
370 LogosTokenKind::Bool => TokenKind::Bool,
371 LogosTokenKind::Float(bits) => TokenKind::Float(bits),
372 LogosTokenKind::Underscore => TokenKind::Underscore,
373 LogosTokenKind::Int(n) => TokenKind::Int(n),
374 LogosTokenKind::String(s) => TokenKind::String(s),
375 LogosTokenKind::Ident(s) => TokenKind::Ident(s),
376 LogosTokenKind::EqEq => TokenKind::EqEq,
377 LogosTokenKind::BangEq => TokenKind::BangEq,
378 LogosTokenKind::LtEq => TokenKind::LtEq,
379 LogosTokenKind::GtEq => TokenKind::GtEq,
380 LogosTokenKind::AmpAmp => TokenKind::AmpAmp,
381 LogosTokenKind::PipePipe => TokenKind::PipePipe,
382 LogosTokenKind::LtLt => TokenKind::LtLt,
383 LogosTokenKind::GtGt => TokenKind::GtGt,
384 LogosTokenKind::Arrow => TokenKind::Arrow,
385 LogosTokenKind::FatArrow => TokenKind::FatArrow,
386 LogosTokenKind::ColonColon => TokenKind::ColonColon,
387 LogosTokenKind::Plus => TokenKind::Plus,
388 LogosTokenKind::Minus => TokenKind::Minus,
389 LogosTokenKind::Star => TokenKind::Star,
390 LogosTokenKind::Slash => TokenKind::Slash,
391 LogosTokenKind::Percent => TokenKind::Percent,
392 LogosTokenKind::Eq => TokenKind::Eq,
393 LogosTokenKind::Bang => TokenKind::Bang,
394 LogosTokenKind::Lt => TokenKind::Lt,
395 LogosTokenKind::Gt => TokenKind::Gt,
396 LogosTokenKind::Amp => TokenKind::Amp,
397 LogosTokenKind::Pipe => TokenKind::Pipe,
398 LogosTokenKind::Caret => TokenKind::Caret,
399 LogosTokenKind::Tilde => TokenKind::Tilde,
400 LogosTokenKind::LParen => TokenKind::LParen,
401 LogosTokenKind::RParen => TokenKind::RParen,
402 LogosTokenKind::LBrace => TokenKind::LBrace,
403 LogosTokenKind::RBrace => TokenKind::RBrace,
404 LogosTokenKind::LBracket => TokenKind::LBracket,
405 LogosTokenKind::RBracket => TokenKind::RBracket,
406 LogosTokenKind::Colon => TokenKind::Colon,
407 LogosTokenKind::Semi => TokenKind::Semi,
408 LogosTokenKind::Comma => TokenKind::Comma,
409 LogosTokenKind::Dot => TokenKind::Dot,
410 LogosTokenKind::At => TokenKind::At,
411 LogosTokenKind::AtImport => unreachable!("AtImport should be handled specially"),
413 }
414 }
415}
416
417pub struct LogosLexer<'a> {
419 source: &'a str,
420 interner: ThreadedRodeo,
421 file_id: FileId,
422}
423
424impl<'a> LogosLexer<'a> {
425 pub fn new(source: &'a str) -> Self {
429 Self {
430 source,
431 interner: ThreadedRodeo::default(),
432 file_id: FileId::DEFAULT,
433 }
434 }
435
436 pub fn with_interner(source: &'a str, interner: ThreadedRodeo) -> Self {
438 Self {
439 source,
440 interner,
441 file_id: FileId::DEFAULT,
442 }
443 }
444
445 pub fn with_file_id(source: &'a str, file_id: FileId) -> Self {
447 Self {
448 source,
449 interner: ThreadedRodeo::default(),
450 file_id,
451 }
452 }
453
454 pub fn with_interner_and_file_id(
456 source: &'a str,
457 interner: ThreadedRodeo,
458 file_id: FileId,
459 ) -> Self {
460 Self {
461 source,
462 interner,
463 file_id,
464 }
465 }
466
467 pub fn tokenize(self) -> CompileResult<(Vec<Token>, ThreadedRodeo)> {
469 let mut tokens = Vec::with_capacity(self.source.len() / 4);
471
472 let mut lexer = LogosTokenKind::lexer_with_extras(self.source, self.interner);
473
474 while let Some(result) = lexer.next() {
475 let span = lexer.span();
476 match result {
477 Ok(logos_kind) => {
478 let token_kind = if matches!(logos_kind, LogosTokenKind::AtImport) {
481 let import_spur = lexer.extras.get_or_intern("import");
482 TokenKind::AtImport(import_spur)
483 } else {
484 logos_kind.into()
485 };
486 tokens.push(Token {
487 kind: token_kind,
488 span: Span::with_file(self.file_id, span.start as u32, span.end as u32),
489 });
490 }
491 Err(lex_error) => {
492 let gruel_span =
493 Span::with_file(self.file_id, span.start as u32, span.end as u32);
494 let slice = lexer.slice();
495 let error_char = slice.chars().next().unwrap_or('?');
496 let kind = match lex_error {
497 LexError::InvalidInteger => ErrorKind::InvalidInteger,
498 LexError::InvalidFloat => ErrorKind::InvalidFloat,
499 LexError::UnexpectedCharacter => ErrorKind::UnexpectedCharacter(error_char),
500 LexError::InvalidStringEscape => {
501 let escape_char = slice
503 .find('\\')
504 .and_then(|pos| slice[pos + 1..].chars().next())
505 .unwrap_or('?');
506 ErrorKind::InvalidStringEscape(escape_char)
507 }
508 LexError::UnterminatedString => ErrorKind::UnterminatedString,
509 };
510 return Err(CompileError::new(kind, gruel_span));
511 }
512 }
513 }
514
515 let eof_pos = self.source.len() as u32;
517 tokens.push(Token {
518 kind: TokenKind::Eof,
519 span: Span::point_in_file(self.file_id, eof_pos),
520 });
521
522 let interner = lexer.extras;
524
525 Ok((tokens, interner))
526 }
527}
528
529#[cfg(test)]
530mod tests {
531 use super::*;
532
533 fn get_ident_str<'a>(kind: &TokenKind, interner: &'a ThreadedRodeo) -> Option<&'a str> {
535 match kind {
536 TokenKind::Ident(sym) => Some(interner.resolve(sym)),
537 _ => None,
538 }
539 }
540
541 fn get_string_str<'a>(kind: &TokenKind, interner: &'a ThreadedRodeo) -> Option<&'a str> {
543 match kind {
544 TokenKind::String(sym) => Some(interner.resolve(sym)),
545 _ => None,
546 }
547 }
548
549 #[test]
550 fn test_logos_basic_tokens() {
551 let lexer = LogosLexer::new("fn main() -> i32 { 42 }");
552 let (tokens, interner) = lexer.tokenize().unwrap();
553
554 assert!(matches!(tokens[0].kind, TokenKind::Fn));
555 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("main"));
556 assert!(matches!(tokens[2].kind, TokenKind::LParen));
557 assert!(matches!(tokens[3].kind, TokenKind::RParen));
558 assert!(matches!(tokens[4].kind, TokenKind::Arrow));
559 assert!(matches!(tokens[5].kind, TokenKind::I32));
560 assert!(matches!(tokens[6].kind, TokenKind::LBrace));
561 assert!(matches!(tokens[7].kind, TokenKind::Int(42)));
562 assert!(matches!(tokens[8].kind, TokenKind::RBrace));
563 assert!(matches!(tokens[9].kind, TokenKind::Eof));
564 }
565
566 #[test]
567 fn test_logos_unexpected_character() {
568 let lexer = LogosLexer::new("fn main() { $ }");
569 let result = lexer.tokenize();
570 assert!(result.is_err());
571 let err = result.unwrap_err();
572 assert!(matches!(err.kind, ErrorKind::UnexpectedCharacter('$')));
573 }
574
575 #[test]
576 fn test_logos_at_token() {
577 let lexer = LogosLexer::new("@dbg");
578 let (tokens, interner) = lexer.tokenize().unwrap();
579 assert!(matches!(tokens[0].kind, TokenKind::At));
580 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("dbg"));
581 }
582
583 #[test]
584 fn test_logos_at_import_token() {
585 let lexer = LogosLexer::new("@import");
587 let (tokens, interner) = lexer.tokenize().unwrap();
588 if let TokenKind::AtImport(spur) = tokens[0].kind {
589 assert_eq!(interner.resolve(&spur), "import");
590 } else {
591 panic!("Expected AtImport token");
592 }
593 assert!(matches!(tokens[1].kind, TokenKind::Eof));
594 }
595
596 #[test]
597 fn test_logos_at_import_vs_at_other() {
598 let lexer = LogosLexer::new("@import @other");
600 let (tokens, interner) = lexer.tokenize().unwrap();
601 assert!(matches!(tokens[0].kind, TokenKind::AtImport(_)));
602 assert!(matches!(tokens[1].kind, TokenKind::At));
603 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("other"));
604 }
605
606 #[test]
607 fn test_logos_at_import_span() {
608 let lexer = LogosLexer::new("@import");
610 let (tokens, _) = lexer.tokenize().unwrap();
611 assert_eq!(tokens[0].span, Span::new(0, 7)); }
613
614 #[test]
615 fn test_logos_at_import_with_parens() {
616 let lexer = LogosLexer::new(r#"@import("math.gruel")"#);
618 let (tokens, interner) = lexer.tokenize().unwrap();
619 assert!(matches!(tokens[0].kind, TokenKind::AtImport(_)));
620 assert!(matches!(tokens[1].kind, TokenKind::LParen));
621 assert_eq!(
622 get_string_str(&tokens[2].kind, &interner),
623 Some("math.gruel")
624 );
625 assert!(matches!(tokens[3].kind, TokenKind::RParen));
626 }
627
628 #[test]
629 fn test_logos_at_import_suffix_is_error() {
630 let lexer = LogosLexer::new("@importx");
634 let result = lexer.tokenize();
635 assert!(result.is_err());
637 }
638
639 #[test]
640 fn test_logos_spans() {
641 let lexer = LogosLexer::new("fn main");
642 let (tokens, _interner) = lexer.tokenize().unwrap();
643
644 assert_eq!(tokens[0].span, Span::new(0, 2)); assert_eq!(tokens[1].span, Span::new(3, 7)); }
647
648 #[test]
649 fn test_logos_arithmetic_operators() {
650 let lexer = LogosLexer::new("1 + 2 - 3 * 4 / 5 % 6");
651 let (tokens, _interner) = lexer.tokenize().unwrap();
652
653 assert!(matches!(tokens[0].kind, TokenKind::Int(1)));
654 assert!(matches!(tokens[1].kind, TokenKind::Plus));
655 assert!(matches!(tokens[2].kind, TokenKind::Int(2)));
656 assert!(matches!(tokens[3].kind, TokenKind::Minus));
657 assert!(matches!(tokens[4].kind, TokenKind::Int(3)));
658 assert!(matches!(tokens[5].kind, TokenKind::Star));
659 assert!(matches!(tokens[6].kind, TokenKind::Int(4)));
660 assert!(matches!(tokens[7].kind, TokenKind::Slash));
661 assert!(matches!(tokens[8].kind, TokenKind::Int(5)));
662 assert!(matches!(tokens[9].kind, TokenKind::Percent));
663 assert!(matches!(tokens[10].kind, TokenKind::Int(6)));
664 }
665
666 #[test]
667 fn test_logos_minus_vs_arrow() {
668 let lexer = LogosLexer::new("a - b");
670 let (tokens, _) = lexer.tokenize().unwrap();
671 assert!(matches!(tokens[1].kind, TokenKind::Minus));
672
673 let lexer = LogosLexer::new("-> i32");
675 let (tokens, _) = lexer.tokenize().unwrap();
676 assert!(matches!(tokens[0].kind, TokenKind::Arrow));
677
678 let lexer = LogosLexer::new("-1");
680 let (tokens, _) = lexer.tokenize().unwrap();
681 assert!(matches!(tokens[0].kind, TokenKind::Minus));
682 assert!(matches!(tokens[1].kind, TokenKind::Int(1)));
683 }
684
685 #[test]
686 fn test_logos_let_binding() {
687 let lexer = LogosLexer::new("let x = 42;");
688 let (tokens, interner) = lexer.tokenize().unwrap();
689
690 assert!(matches!(tokens[0].kind, TokenKind::Let));
691 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("x"));
692 assert!(matches!(tokens[2].kind, TokenKind::Eq));
693 assert!(matches!(tokens[3].kind, TokenKind::Int(42)));
694 assert!(matches!(tokens[4].kind, TokenKind::Semi));
695 }
696
697 #[test]
698 fn test_logos_logical_operators() {
699 let lexer = LogosLexer::new("!true && false || true");
700 let (tokens, _) = lexer.tokenize().unwrap();
701
702 assert!(matches!(tokens[0].kind, TokenKind::Bang));
703 assert!(matches!(tokens[1].kind, TokenKind::True));
704 assert!(matches!(tokens[2].kind, TokenKind::AmpAmp));
705 assert!(matches!(tokens[3].kind, TokenKind::False));
706 assert!(matches!(tokens[4].kind, TokenKind::PipePipe));
707 assert!(matches!(tokens[5].kind, TokenKind::True));
708 }
709
710 #[test]
711 fn test_logos_comparison_operators() {
712 let lexer = LogosLexer::new("a == b != c < d > e <= f >= g");
713 let (tokens, _) = lexer.tokenize().unwrap();
714
715 assert!(matches!(tokens[1].kind, TokenKind::EqEq));
716 assert!(matches!(tokens[3].kind, TokenKind::BangEq));
717 assert!(matches!(tokens[5].kind, TokenKind::Lt));
718 assert!(matches!(tokens[7].kind, TokenKind::Gt));
719 assert!(matches!(tokens[9].kind, TokenKind::LtEq));
720 assert!(matches!(tokens[11].kind, TokenKind::GtEq));
721 }
722
723 #[test]
724 fn test_logos_line_comments() {
725 let lexer = LogosLexer::new("fn // comment\nmain");
726 let (tokens, interner) = lexer.tokenize().unwrap();
727
728 assert!(matches!(tokens[0].kind, TokenKind::Fn));
729 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("main"));
730 assert!(matches!(tokens[2].kind, TokenKind::Eof));
731 }
732
733 #[test]
734 fn test_logos_keywords_vs_identifiers() {
735 let lexer = LogosLexer::new("fn let mut if else while break continue true false");
737 let (tokens, _) = lexer.tokenize().unwrap();
738
739 assert!(matches!(tokens[0].kind, TokenKind::Fn));
740 assert!(matches!(tokens[1].kind, TokenKind::Let));
741 assert!(matches!(tokens[2].kind, TokenKind::Mut));
742 assert!(matches!(tokens[3].kind, TokenKind::If));
743 assert!(matches!(tokens[4].kind, TokenKind::Else));
744 assert!(matches!(tokens[5].kind, TokenKind::While));
745 assert!(matches!(tokens[6].kind, TokenKind::Break));
746 assert!(matches!(tokens[7].kind, TokenKind::Continue));
747 assert!(matches!(tokens[8].kind, TokenKind::True));
748 assert!(matches!(tokens[9].kind, TokenKind::False));
749
750 let lexer = LogosLexer::new("fns lets mutable iff elseif whileloop");
752 let (tokens, interner) = lexer.tokenize().unwrap();
753
754 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("fns"));
755 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("lets"));
756 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("mutable"));
757 assert_eq!(get_ident_str(&tokens[3].kind, &interner), Some("iff"));
758 assert_eq!(get_ident_str(&tokens[4].kind, &interner), Some("elseif"));
759 assert_eq!(get_ident_str(&tokens[5].kind, &interner), Some("whileloop"));
760 }
761
762 #[test]
763 fn test_logos_bitwise_operators() {
764 let lexer = LogosLexer::new("a & b | c ^ d ~ e << f >> g");
765 let (tokens, interner) = lexer.tokenize().unwrap();
766
767 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("a"));
768 assert!(matches!(tokens[1].kind, TokenKind::Amp));
769 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("b"));
770 assert!(matches!(tokens[3].kind, TokenKind::Pipe));
771 assert_eq!(get_ident_str(&tokens[4].kind, &interner), Some("c"));
772 assert!(matches!(tokens[5].kind, TokenKind::Caret));
773 assert_eq!(get_ident_str(&tokens[6].kind, &interner), Some("d"));
774 assert!(matches!(tokens[7].kind, TokenKind::Tilde));
775 assert_eq!(get_ident_str(&tokens[8].kind, &interner), Some("e"));
776 assert!(matches!(tokens[9].kind, TokenKind::LtLt));
777 assert_eq!(get_ident_str(&tokens[10].kind, &interner), Some("f"));
778 assert!(matches!(tokens[11].kind, TokenKind::GtGt));
779 assert_eq!(get_ident_str(&tokens[12].kind, &interner), Some("g"));
780 }
781
782 #[test]
783 fn test_logos_bitwise_vs_logical() {
784 let lexer = LogosLexer::new("a & b");
786 let (tokens, _) = lexer.tokenize().unwrap();
787 assert!(matches!(tokens[1].kind, TokenKind::Amp));
788
789 let lexer = LogosLexer::new("a && b");
791 let (tokens, _) = lexer.tokenize().unwrap();
792 assert!(matches!(tokens[1].kind, TokenKind::AmpAmp));
793
794 let lexer = LogosLexer::new("a | b");
796 let (tokens, _) = lexer.tokenize().unwrap();
797 assert!(matches!(tokens[1].kind, TokenKind::Pipe));
798
799 let lexer = LogosLexer::new("a || b");
801 let (tokens, _) = lexer.tokenize().unwrap();
802 assert!(matches!(tokens[1].kind, TokenKind::PipePipe));
803 }
804
805 #[test]
806 fn test_logos_shift_vs_comparison() {
807 let lexer = LogosLexer::new("a << b");
809 let (tokens, _) = lexer.tokenize().unwrap();
810 assert!(matches!(tokens[1].kind, TokenKind::LtLt));
811
812 let lexer = LogosLexer::new("a >> b");
814 let (tokens, _) = lexer.tokenize().unwrap();
815 assert!(matches!(tokens[1].kind, TokenKind::GtGt));
816
817 let lexer = LogosLexer::new("a < b");
819 let (tokens, _) = lexer.tokenize().unwrap();
820 assert!(matches!(tokens[1].kind, TokenKind::Lt));
821
822 let lexer = LogosLexer::new("a > b");
824 let (tokens, _) = lexer.tokenize().unwrap();
825 assert!(matches!(tokens[1].kind, TokenKind::Gt));
826
827 let lexer = LogosLexer::new("a <= b");
829 let (tokens, _) = lexer.tokenize().unwrap();
830 assert!(matches!(tokens[1].kind, TokenKind::LtEq));
831
832 let lexer = LogosLexer::new("a >= b");
834 let (tokens, _) = lexer.tokenize().unwrap();
835 assert!(matches!(tokens[1].kind, TokenKind::GtEq));
836 }
837
838 #[test]
839 fn test_logos_integer_overflow() {
840 let lexer = LogosLexer::new("99999999999999999999999");
842 let result = lexer.tokenize();
843 assert!(result.is_err());
844 let err = result.unwrap_err();
845 assert!(matches!(err.kind, ErrorKind::InvalidInteger));
846 }
847
848 #[test]
849 fn test_logos_type_keywords() {
850 let lexer = LogosLexer::new("i8 i16 i32 i64 u8 u16 u32 u64 bool");
852 let (tokens, _) = lexer.tokenize().unwrap();
853
854 assert!(matches!(tokens[0].kind, TokenKind::I8));
855 assert!(matches!(tokens[1].kind, TokenKind::I16));
856 assert!(matches!(tokens[2].kind, TokenKind::I32));
857 assert!(matches!(tokens[3].kind, TokenKind::I64));
858 assert!(matches!(tokens[4].kind, TokenKind::U8));
859 assert!(matches!(tokens[5].kind, TokenKind::U16));
860 assert!(matches!(tokens[6].kind, TokenKind::U32));
861 assert!(matches!(tokens[7].kind, TokenKind::U64));
862 assert!(matches!(tokens[8].kind, TokenKind::Bool));
863
864 let lexer = LogosLexer::new("i32x i64ptr boolish u8_data");
866 let (tokens, interner) = lexer.tokenize().unwrap();
867
868 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("i32x"));
869 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("i64ptr"));
870 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("boolish"));
871 assert_eq!(get_ident_str(&tokens[3].kind, &interner), Some("u8_data"));
872 }
873
874 #[test]
875 fn test_logos_unterminated_string() {
876 let lexer = LogosLexer::new(r#""hello"#);
878 let result = lexer.tokenize();
879 assert!(result.is_err());
880 let err = result.unwrap_err();
881 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
882
883 let lexer = LogosLexer::new("\"hello\nworld");
885 let result = lexer.tokenize();
886 assert!(result.is_err());
887 let err = result.unwrap_err();
888 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
889
890 let lexer = LogosLexer::new("\"");
892 let result = lexer.tokenize();
893 assert!(result.is_err());
894 let err = result.unwrap_err();
895 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
896 }
897
898 #[test]
899 fn test_logos_valid_strings() {
900 let lexer = LogosLexer::new(r#""hello""#);
902 let (tokens, interner) = lexer.tokenize().unwrap();
903 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some("hello"));
904
905 let lexer = LogosLexer::new(r#""""#);
907 let (tokens, interner) = lexer.tokenize().unwrap();
908 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some(""));
909
910 let lexer = LogosLexer::new(r#""hello\"world""#);
912 let (tokens, interner) = lexer.tokenize().unwrap();
913 assert_eq!(
914 get_string_str(&tokens[0].kind, &interner),
915 Some("hello\"world")
916 );
917
918 let lexer = LogosLexer::new(r#""hello\\world""#);
920 let (tokens, interner) = lexer.tokenize().unwrap();
921 assert_eq!(
922 get_string_str(&tokens[0].kind, &interner),
923 Some("hello\\world")
924 );
925 }
926
927 #[test]
928 fn test_logos_escape_newline() {
929 let lexer = LogosLexer::new(r#""line1\nline2""#);
930 let (tokens, interner) = lexer.tokenize().unwrap();
931 assert_eq!(
932 get_string_str(&tokens[0].kind, &interner),
933 Some("line1\nline2")
934 );
935 }
936
937 #[test]
938 fn test_logos_escape_tab() {
939 let lexer = LogosLexer::new(r#""col1\tcol2""#);
940 let (tokens, interner) = lexer.tokenize().unwrap();
941 assert_eq!(
942 get_string_str(&tokens[0].kind, &interner),
943 Some("col1\tcol2")
944 );
945 }
946
947 #[test]
948 fn test_logos_escape_carriage_return() {
949 let lexer = LogosLexer::new(r#""line\r\n""#);
950 let (tokens, interner) = lexer.tokenize().unwrap();
951 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some("line\r\n"));
952 }
953
954 #[test]
955 fn test_logos_escape_null() {
956 let lexer = LogosLexer::new(r#""null\0byte""#);
957 let (tokens, interner) = lexer.tokenize().unwrap();
958 assert_eq!(
959 get_string_str(&tokens[0].kind, &interner),
960 Some("null\0byte")
961 );
962 }
963
964 #[test]
965 fn test_logos_invalid_escape_q() {
966 let lexer = LogosLexer::new(r#""bad\qescape""#);
967 let result = lexer.tokenize();
968 assert!(result.is_err());
969 let err = result.unwrap_err();
970 assert!(matches!(err.kind, ErrorKind::InvalidStringEscape('q')));
971 }
972
973 #[test]
974 fn test_logos_all_escapes_combined() {
975 let lexer = LogosLexer::new(r#""\\\"abc\n\t\r\0xyz""#);
977 let (tokens, interner) = lexer.tokenize().unwrap();
978 assert_eq!(
979 get_string_str(&tokens[0].kind, &interner),
980 Some("\\\"abc\n\t\r\0xyz")
981 );
982 }
983
984 #[test]
985 fn test_interning_deduplicates() {
986 let lexer = LogosLexer::new("x x x");
988 let (tokens, _interner) = lexer.tokenize().unwrap();
989
990 let sym0 = match &tokens[0].kind {
991 TokenKind::Ident(s) => *s,
992 _ => panic!("expected Ident"),
993 };
994 let sym1 = match &tokens[1].kind {
995 TokenKind::Ident(s) => *s,
996 _ => panic!("expected Ident"),
997 };
998 let sym2 = match &tokens[2].kind {
999 TokenKind::Ident(s) => *s,
1000 _ => panic!("expected Ident"),
1001 };
1002
1003 assert_eq!(sym0, sym1);
1004 assert_eq!(sym1, sym2);
1005 }
1006
1007 #[test]
1008 fn test_token_kind_is_copy() {
1009 let lexer = LogosLexer::new("x");
1011 let (tokens, _) = lexer.tokenize().unwrap();
1012 let kind = tokens[0].kind; let _kind2 = kind; let _kind3 = kind;
1015 }
1016}