1use gruel_util::{CompileError, CompileResult, ErrorKind};
7use gruel_util::{FileId, Span};
8use lasso::{Spur, ThreadedRodeo};
9use logos::Logos;
10
11#[derive(Debug, Clone, PartialEq, Eq, Default)]
13pub enum LexError {
14 #[default]
15 UnexpectedCharacter,
16 InvalidInteger,
17 InvalidFloat,
18 InvalidStringEscape,
19 UnterminatedString,
20 EmptyCharLit,
22 UnterminatedCharLit,
23 MultiCharLit,
24 InvalidCharEscape,
25 InvalidUnicodeEscape,
26}
27
28fn process_string_from_quote(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Result<Spur, LexError> {
32 let remainder = lex.remainder();
35 let mut chars = remainder.chars();
36 let mut consumed = 0;
37 let mut result = String::new();
38 let mut found_close = false;
39
40 while let Some(c) = chars.next() {
41 if c == '"' {
42 consumed += 1;
44 found_close = true;
45 break;
46 } else if c == '\\' {
47 consumed += c.len_utf8();
49 match chars.next() {
50 Some('\\') => {
51 consumed += 1;
52 result.push('\\');
53 }
54 Some('"') => {
55 consumed += 1;
56 result.push('"');
57 }
58 Some('n') => {
59 consumed += 1;
60 result.push('\n');
61 }
62 Some('t') => {
63 consumed += 1;
64 result.push('\t');
65 }
66 Some('r') => {
67 consumed += 1;
68 result.push('\r');
69 }
70 Some('0') => {
71 consumed += 1;
72 result.push('\0');
73 }
74 Some(other) => {
75 consumed += other.len_utf8();
77 lex.bump(consumed);
78 return Err(LexError::InvalidStringEscape);
79 }
80 None => {
81 lex.bump(consumed);
83 return Err(LexError::UnterminatedString);
84 }
85 }
86 } else if c == '\n' {
87 lex.bump(consumed);
90 return Err(LexError::UnterminatedString);
91 } else {
92 consumed += c.len_utf8();
93 result.push(c);
94 }
95 }
96
97 if !found_close {
98 lex.bump(consumed);
100 return Err(LexError::UnterminatedString);
101 }
102
103 lex.bump(consumed);
105
106 let spur = lex.extras.get_or_intern(&result);
108 Ok(spur)
109}
110
111fn process_char_from_quote(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Result<u32, LexError> {
115 let remainder = lex.remainder();
117 let mut chars = remainder.chars();
118 let mut consumed: usize = 0;
119
120 let scalar: u32 = match chars.next() {
122 None => {
123 return Err(LexError::UnterminatedCharLit);
124 }
125 Some('\'') => {
126 lex.bump(1);
128 return Err(LexError::EmptyCharLit);
129 }
130 Some('\n') | Some('\r') => {
131 return Err(LexError::UnterminatedCharLit);
134 }
135 Some('\\') => {
136 consumed += 1;
137 match chars.next() {
138 None => {
139 lex.bump(consumed);
140 return Err(LexError::UnterminatedCharLit);
141 }
142 Some('n') => {
143 consumed += 1;
144 '\n' as u32
145 }
146 Some('r') => {
147 consumed += 1;
148 '\r' as u32
149 }
150 Some('t') => {
151 consumed += 1;
152 '\t' as u32
153 }
154 Some('\\') => {
155 consumed += 1;
156 '\\' as u32
157 }
158 Some('\'') => {
159 consumed += 1;
160 '\'' as u32
161 }
162 Some('"') => {
163 consumed += 1;
164 '"' as u32
165 }
166 Some('0') => {
167 consumed += 1;
168 0u32
169 }
170 Some('u') => {
171 consumed += 1;
172 match chars.next() {
174 Some('{') => {
175 consumed += 1;
176 }
177 _ => {
178 lex.bump(consumed);
179 return Err(LexError::InvalidUnicodeEscape);
180 }
181 }
182 let mut hex = String::new();
184 let mut closed = false;
185 while let Some(c) = chars.next() {
186 consumed += c.len_utf8();
187 if c == '}' {
188 closed = true;
189 break;
190 }
191 if hex.len() >= 6 {
192 lex.bump(consumed);
193 return Err(LexError::InvalidUnicodeEscape);
194 }
195 if c.is_ascii_hexdigit() {
196 hex.push(c);
197 } else {
198 lex.bump(consumed);
199 return Err(LexError::InvalidUnicodeEscape);
200 }
201 }
202 if !closed || hex.is_empty() {
203 lex.bump(consumed);
204 return Err(LexError::InvalidUnicodeEscape);
205 }
206 let n = u32::from_str_radix(&hex, 16)
207 .map_err(|_| LexError::InvalidUnicodeEscape)?;
208 if (0xD800..=0xDFFF).contains(&n) || n > 0x10FFFF {
210 lex.bump(consumed);
211 return Err(LexError::InvalidUnicodeEscape);
212 }
213 n
214 }
215 Some(_) => {
216 lex.bump(consumed);
217 return Err(LexError::InvalidCharEscape);
218 }
219 }
220 }
221 Some(c) => {
222 consumed += c.len_utf8();
223 c as u32
224 }
225 };
226
227 match chars.next() {
229 Some('\'') => {
230 consumed += 1;
231 lex.bump(consumed);
232 Ok(scalar)
233 }
234 Some('\n') | Some('\r') | None => {
235 lex.bump(consumed);
236 Err(LexError::UnterminatedCharLit)
237 }
238 Some(_) => {
239 let new_remainder = &lex.remainder()[consumed..];
255 let mut extra = 0;
256 let mut found_close = false;
257 for c in new_remainder.chars() {
258 if c == '\n' || c == '\r' {
259 break;
260 }
261 extra += c.len_utf8();
262 if c == '\'' {
263 found_close = true;
264 break;
265 }
266 }
267 lex.bump(consumed + extra);
268 if found_close {
269 Err(LexError::MultiCharLit)
270 } else {
271 Err(LexError::UnterminatedCharLit)
272 }
273 }
274 }
275}
276
277fn process_line_doc(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Spur {
286 let remainder = lex.remainder();
287 let line_end = remainder.find('\n').unwrap_or(remainder.len());
288 let body = &remainder[..line_end];
289 let stripped = body.strip_prefix(' ').unwrap_or(body);
290 let spur = lex.extras.get_or_intern(stripped);
291 lex.bump(line_end);
292 spur
293}
294
295#[derive(Logos, Debug, Clone, PartialEq, Eq)]
297#[logos(error = LexError)]
298#[logos(extras = ThreadedRodeo)]
299#[logos(skip r"[ \t\n\r\f]+")]
300#[logos(skip r"////+[^\n]*")]
302#[logos(skip r"//[^/\n][^\n]*")]
304#[logos(skip r"//")]
305pub enum LogosTokenKind {
306 #[token("///", process_line_doc)]
310 LineDoc(Spur),
311
312 #[token("fn")]
314 Fn,
315 #[token("let")]
316 Let,
317 #[token("mut")]
318 Mut,
319 #[token("if")]
320 If,
321 #[token("else")]
322 Else,
323 #[token("match")]
324 Match,
325 #[token("while")]
326 While,
327 #[token("for")]
328 For,
329 #[token("in")]
330 In,
331 #[token("loop")]
332 Loop,
333 #[token("break")]
334 Break,
335 #[token("continue")]
336 Continue,
337 #[token("return")]
338 Return,
339 #[token("true")]
340 True,
341 #[token("false")]
342 False,
343 #[token("struct")]
344 Struct,
345 #[token("enum")]
346 Enum,
347 #[token("interface")]
348 Interface,
349 #[token("self")]
350 SelfValue,
351 #[token("Self")]
352 SelfType,
353 #[token("comptime_unroll")]
354 ComptimeUnroll,
355 #[token("comptime")]
356 Comptime,
357 #[token("derive")]
358 Derive,
359 #[token("pub")]
360 Pub,
361 #[token("const")]
362 Const,
363 #[token("checked")]
364 Checked,
365 #[token("link_extern")]
369 LinkExtern,
370 #[token("static_link_extern")]
373 StaticLinkExtern,
374
375 #[token("i8")]
377 I8,
378 #[token("i16")]
379 I16,
380 #[token("i32")]
381 I32,
382 #[token("i64")]
383 I64,
384 #[token("isize")]
385 Isize,
386 #[token("u8")]
387 U8,
388 #[token("u16")]
389 U16,
390 #[token("u32")]
391 U32,
392 #[token("u64")]
393 U64,
394 #[token("usize")]
395 Usize,
396 #[token("f16")]
397 F16,
398 #[token("f32")]
399 F32,
400 #[token("f64")]
401 F64,
402 #[token("bool")]
403 Bool,
404 #[token("char")]
405 Char,
406
407 #[token("_")]
409 Underscore,
410
411 #[regex(r"[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?", |lex| {
414 lex.slice().parse::<f64>().map(|v| v.to_bits()).map_err(|_| LexError::InvalidFloat)
415 })]
416 #[regex(r"[0-9]+[eE][+-]?[0-9]+", |lex| {
417 lex.slice().parse::<f64>().map(|v| v.to_bits()).map_err(|_| LexError::InvalidFloat)
418 })]
419 Float(u64),
420
421 #[regex(r"[0-9]+", |lex| lex.slice().parse::<u64>().map_err(|_| LexError::InvalidInteger))]
423 Int(u64),
424
425 #[token("\"", process_string_from_quote)]
428 String(Spur),
429
430 #[token("'", process_char_from_quote)]
432 CharLit(u32),
433
434 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.extras.get_or_intern(lex.slice()), priority = 1)]
436 Ident(Spur),
437
438 #[token("==")]
440 EqEq,
441 #[token("!=")]
442 BangEq,
443 #[token("<=")]
444 LtEq,
445 #[token(">=")]
446 GtEq,
447 #[token("&&")]
448 AmpAmp,
449 #[token("||")]
450 PipePipe,
451 #[token("<<")]
452 LtLt,
453 #[token(">>")]
454 GtGt,
455 #[token("->")]
456 Arrow,
457 #[token("=>")]
458 FatArrow,
459 #[token("::")]
460 ColonColon,
461
462 #[token("+")]
464 Plus,
465 #[token("-")]
466 Minus,
467 #[token("*")]
468 Star,
469 #[token("/")]
470 Slash,
471 #[token("%")]
472 Percent,
473 #[token("=")]
474 Eq,
475 #[token("!")]
476 Bang,
477 #[token("<")]
478 Lt,
479 #[token(">")]
480 Gt,
481 #[token("&")]
482 Amp,
483 #[token("|")]
484 Pipe,
485 #[token("^")]
486 Caret,
487 #[token("~")]
488 Tilde,
489
490 #[token("(")]
492 LParen,
493 #[token(")")]
494 RParen,
495 #[token("{")]
496 LBrace,
497 #[token("}")]
498 RBrace,
499 #[token("[")]
500 LBracket,
501 #[token("]")]
502 RBracket,
503 #[token(":")]
504 Colon,
505 #[token(";")]
506 Semi,
507 #[token(",")]
508 Comma,
509 #[token(".")]
510 Dot,
511 #[token("@")]
512 At,
513
514 #[token("@import", at_import_callback)]
517 AtImport,
518}
519
520fn at_import_callback(lex: &mut logos::Lexer<'_, LogosTokenKind>) -> Option<()> {
523 match lex.remainder().chars().next() {
524 Some(c) if c.is_ascii_alphanumeric() || c == '_' => None,
525 _ => Some(()),
526 }
527}
528
529use crate::{Token, TokenKind};
530
531impl From<LogosTokenKind> for TokenKind {
532 fn from(logos_kind: LogosTokenKind) -> Self {
533 match logos_kind {
534 LogosTokenKind::Fn => TokenKind::Fn,
535 LogosTokenKind::Let => TokenKind::Let,
536 LogosTokenKind::Mut => TokenKind::Mut,
537 LogosTokenKind::If => TokenKind::If,
538 LogosTokenKind::Else => TokenKind::Else,
539 LogosTokenKind::Match => TokenKind::Match,
540 LogosTokenKind::While => TokenKind::While,
541 LogosTokenKind::For => TokenKind::For,
542 LogosTokenKind::In => TokenKind::In,
543 LogosTokenKind::Loop => TokenKind::Loop,
544 LogosTokenKind::Break => TokenKind::Break,
545 LogosTokenKind::Continue => TokenKind::Continue,
546 LogosTokenKind::Return => TokenKind::Return,
547 LogosTokenKind::True => TokenKind::True,
548 LogosTokenKind::False => TokenKind::False,
549 LogosTokenKind::Struct => TokenKind::Struct,
550 LogosTokenKind::Enum => TokenKind::Enum,
551 LogosTokenKind::Interface => TokenKind::Interface,
552 LogosTokenKind::SelfValue => TokenKind::SelfValue,
553 LogosTokenKind::SelfType => TokenKind::SelfType,
554 LogosTokenKind::ComptimeUnroll => TokenKind::ComptimeUnroll,
555 LogosTokenKind::Comptime => TokenKind::Comptime,
556 LogosTokenKind::Derive => TokenKind::Derive,
557 LogosTokenKind::Pub => TokenKind::Pub,
558 LogosTokenKind::Const => TokenKind::Const,
559 LogosTokenKind::Checked => TokenKind::Checked,
560 LogosTokenKind::LinkExtern => TokenKind::LinkExtern,
561 LogosTokenKind::StaticLinkExtern => TokenKind::StaticLinkExtern,
562 LogosTokenKind::I8 => TokenKind::I8,
563 LogosTokenKind::I16 => TokenKind::I16,
564 LogosTokenKind::I32 => TokenKind::I32,
565 LogosTokenKind::I64 => TokenKind::I64,
566 LogosTokenKind::Isize => TokenKind::Isize,
567 LogosTokenKind::U8 => TokenKind::U8,
568 LogosTokenKind::U16 => TokenKind::U16,
569 LogosTokenKind::U32 => TokenKind::U32,
570 LogosTokenKind::U64 => TokenKind::U64,
571 LogosTokenKind::Usize => TokenKind::Usize,
572 LogosTokenKind::F16 => TokenKind::F16,
573 LogosTokenKind::F32 => TokenKind::F32,
574 LogosTokenKind::F64 => TokenKind::F64,
575 LogosTokenKind::Bool => TokenKind::Bool,
576 LogosTokenKind::Char => TokenKind::Char,
577 LogosTokenKind::Float(bits) => TokenKind::Float(bits),
578 LogosTokenKind::Underscore => TokenKind::Underscore,
579 LogosTokenKind::Int(n) => TokenKind::Int(n),
580 LogosTokenKind::String(s) => TokenKind::String(s),
581 LogosTokenKind::CharLit(c) => TokenKind::CharLit(c),
582 LogosTokenKind::Ident(s) => TokenKind::Ident(s),
583 LogosTokenKind::EqEq => TokenKind::EqEq,
584 LogosTokenKind::BangEq => TokenKind::BangEq,
585 LogosTokenKind::LtEq => TokenKind::LtEq,
586 LogosTokenKind::GtEq => TokenKind::GtEq,
587 LogosTokenKind::AmpAmp => TokenKind::AmpAmp,
588 LogosTokenKind::PipePipe => TokenKind::PipePipe,
589 LogosTokenKind::LtLt => TokenKind::LtLt,
590 LogosTokenKind::GtGt => TokenKind::GtGt,
591 LogosTokenKind::Arrow => TokenKind::Arrow,
592 LogosTokenKind::FatArrow => TokenKind::FatArrow,
593 LogosTokenKind::ColonColon => TokenKind::ColonColon,
594 LogosTokenKind::Plus => TokenKind::Plus,
595 LogosTokenKind::Minus => TokenKind::Minus,
596 LogosTokenKind::Star => TokenKind::Star,
597 LogosTokenKind::Slash => TokenKind::Slash,
598 LogosTokenKind::Percent => TokenKind::Percent,
599 LogosTokenKind::Eq => TokenKind::Eq,
600 LogosTokenKind::Bang => TokenKind::Bang,
601 LogosTokenKind::Lt => TokenKind::Lt,
602 LogosTokenKind::Gt => TokenKind::Gt,
603 LogosTokenKind::Amp => TokenKind::Amp,
604 LogosTokenKind::Pipe => TokenKind::Pipe,
605 LogosTokenKind::Caret => TokenKind::Caret,
606 LogosTokenKind::Tilde => TokenKind::Tilde,
607 LogosTokenKind::LParen => TokenKind::LParen,
608 LogosTokenKind::RParen => TokenKind::RParen,
609 LogosTokenKind::LBrace => TokenKind::LBrace,
610 LogosTokenKind::RBrace => TokenKind::RBrace,
611 LogosTokenKind::LBracket => TokenKind::LBracket,
612 LogosTokenKind::RBracket => TokenKind::RBracket,
613 LogosTokenKind::Colon => TokenKind::Colon,
614 LogosTokenKind::Semi => TokenKind::Semi,
615 LogosTokenKind::Comma => TokenKind::Comma,
616 LogosTokenKind::Dot => TokenKind::Dot,
617 LogosTokenKind::At => TokenKind::At,
618 LogosTokenKind::AtImport => unreachable!("AtImport should be handled specially"),
620 LogosTokenKind::LineDoc(s) => TokenKind::LineDoc(s),
621 }
622 }
623}
624
625pub struct LogosLexer<'a> {
627 source: &'a str,
628 interner: ThreadedRodeo,
629 file_id: FileId,
630}
631
632impl<'a> LogosLexer<'a> {
633 pub fn new(source: &'a str) -> Self {
637 Self {
638 source,
639 interner: ThreadedRodeo::default(),
640 file_id: FileId::DEFAULT,
641 }
642 }
643
644 pub fn with_interner(source: &'a str, interner: ThreadedRodeo) -> Self {
646 Self {
647 source,
648 interner,
649 file_id: FileId::DEFAULT,
650 }
651 }
652
653 pub fn with_file_id(source: &'a str, file_id: FileId) -> Self {
655 Self {
656 source,
657 interner: ThreadedRodeo::default(),
658 file_id,
659 }
660 }
661
662 pub fn with_interner_and_file_id(
664 source: &'a str,
665 interner: ThreadedRodeo,
666 file_id: FileId,
667 ) -> Self {
668 Self {
669 source,
670 interner,
671 file_id,
672 }
673 }
674
675 pub fn tokenize(self) -> CompileResult<(Vec<Token>, ThreadedRodeo)> {
677 let mut tokens = Vec::with_capacity(self.source.len() / 4);
679
680 let mut lexer = LogosTokenKind::lexer_with_extras(self.source, self.interner);
681
682 while let Some(result) = lexer.next() {
683 let span = lexer.span();
684 match result {
685 Ok(logos_kind) => {
686 let token_kind = if matches!(logos_kind, LogosTokenKind::AtImport) {
689 let import_spur = lexer.extras.get_or_intern("import");
690 TokenKind::AtImport(import_spur)
691 } else {
692 logos_kind.into()
693 };
694 tokens.push(Token {
695 kind: token_kind,
696 span: Span::with_file(self.file_id, span.start as u32, span.end as u32),
697 });
698 }
699 Err(lex_error) => {
700 let gruel_util =
701 Span::with_file(self.file_id, span.start as u32, span.end as u32);
702 let slice = lexer.slice();
703 let error_char = slice.chars().next().unwrap_or('?');
704 let kind = match lex_error {
705 LexError::InvalidInteger => ErrorKind::InvalidInteger,
706 LexError::InvalidFloat => ErrorKind::InvalidFloat,
707 LexError::UnexpectedCharacter => ErrorKind::UnexpectedCharacter(error_char),
708 LexError::InvalidStringEscape => {
709 let escape_char = slice
711 .find('\\')
712 .and_then(|pos| slice[pos + 1..].chars().next())
713 .unwrap_or('?');
714 ErrorKind::InvalidStringEscape(escape_char)
715 }
716 LexError::UnterminatedString => ErrorKind::UnterminatedString,
717 LexError::EmptyCharLit => ErrorKind::EmptyCharLit,
718 LexError::UnterminatedCharLit => ErrorKind::UnterminatedCharLit,
719 LexError::MultiCharLit => ErrorKind::MultiCharLit,
720 LexError::InvalidCharEscape => ErrorKind::InvalidCharEscape,
721 LexError::InvalidUnicodeEscape => ErrorKind::InvalidUnicodeEscape,
722 };
723 return Err(CompileError::new(kind, gruel_util));
724 }
725 }
726 }
727
728 let eof_pos = self.source.len() as u32;
730 tokens.push(Token {
731 kind: TokenKind::Eof,
732 span: Span::point_in_file(self.file_id, eof_pos),
733 });
734
735 let interner = lexer.extras;
737
738 Ok((tokens, interner))
739 }
740}
741
742#[cfg(test)]
743mod tests {
744 use super::*;
745
746 fn get_ident_str<'a>(kind: &TokenKind, interner: &'a ThreadedRodeo) -> Option<&'a str> {
748 match kind {
749 TokenKind::Ident(sym) => Some(interner.resolve(sym)),
750 _ => None,
751 }
752 }
753
754 fn get_string_str<'a>(kind: &TokenKind, interner: &'a ThreadedRodeo) -> Option<&'a str> {
756 match kind {
757 TokenKind::String(sym) => Some(interner.resolve(sym)),
758 _ => None,
759 }
760 }
761
762 #[test]
763 fn test_logos_basic_tokens() {
764 let lexer = LogosLexer::new("fn main() -> i32 { 42 }");
765 let (tokens, interner) = lexer.tokenize().unwrap();
766
767 assert!(matches!(tokens[0].kind, TokenKind::Fn));
768 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("main"));
769 assert!(matches!(tokens[2].kind, TokenKind::LParen));
770 assert!(matches!(tokens[3].kind, TokenKind::RParen));
771 assert!(matches!(tokens[4].kind, TokenKind::Arrow));
772 assert!(matches!(tokens[5].kind, TokenKind::I32));
773 assert!(matches!(tokens[6].kind, TokenKind::LBrace));
774 assert!(matches!(tokens[7].kind, TokenKind::Int(42)));
775 assert!(matches!(tokens[8].kind, TokenKind::RBrace));
776 assert!(matches!(tokens[9].kind, TokenKind::Eof));
777 }
778
779 #[test]
780 fn test_logos_unexpected_character() {
781 let lexer = LogosLexer::new("fn main() { $ }");
782 let result = lexer.tokenize();
783 assert!(result.is_err());
784 let err = result.unwrap_err();
785 assert!(matches!(err.kind, ErrorKind::UnexpectedCharacter('$')));
786 }
787
788 #[test]
789 fn test_logos_at_token() {
790 let lexer = LogosLexer::new("@dbg");
791 let (tokens, interner) = lexer.tokenize().unwrap();
792 assert!(matches!(tokens[0].kind, TokenKind::At));
793 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("dbg"));
794 }
795
796 #[test]
797 fn test_logos_at_import_token() {
798 let lexer = LogosLexer::new("@import");
800 let (tokens, interner) = lexer.tokenize().unwrap();
801 if let TokenKind::AtImport(spur) = tokens[0].kind {
802 assert_eq!(interner.resolve(&spur), "import");
803 } else {
804 panic!("Expected AtImport token");
805 }
806 assert!(matches!(tokens[1].kind, TokenKind::Eof));
807 }
808
809 #[test]
810 fn test_logos_at_import_vs_at_other() {
811 let lexer = LogosLexer::new("@import @other");
813 let (tokens, interner) = lexer.tokenize().unwrap();
814 assert!(matches!(tokens[0].kind, TokenKind::AtImport(_)));
815 assert!(matches!(tokens[1].kind, TokenKind::At));
816 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("other"));
817 }
818
819 #[test]
820 fn test_logos_at_import_span() {
821 let lexer = LogosLexer::new("@import");
823 let (tokens, _) = lexer.tokenize().unwrap();
824 assert_eq!(tokens[0].span, Span::new(0, 7)); }
826
827 #[test]
828 fn test_logos_at_import_with_parens() {
829 let lexer = LogosLexer::new(r#"@import("math.gruel")"#);
831 let (tokens, interner) = lexer.tokenize().unwrap();
832 assert!(matches!(tokens[0].kind, TokenKind::AtImport(_)));
833 assert!(matches!(tokens[1].kind, TokenKind::LParen));
834 assert_eq!(
835 get_string_str(&tokens[2].kind, &interner),
836 Some("math.gruel")
837 );
838 assert!(matches!(tokens[3].kind, TokenKind::RParen));
839 }
840
841 #[test]
842 fn test_logos_at_import_suffix_is_error() {
843 let lexer = LogosLexer::new("@importx");
847 let result = lexer.tokenize();
848 assert!(result.is_err());
850 }
851
852 #[test]
853 fn test_logos_spans() {
854 let lexer = LogosLexer::new("fn main");
855 let (tokens, _interner) = lexer.tokenize().unwrap();
856
857 assert_eq!(tokens[0].span, Span::new(0, 2)); assert_eq!(tokens[1].span, Span::new(3, 7)); }
860
861 #[test]
862 fn test_logos_arithmetic_operators() {
863 let lexer = LogosLexer::new("1 + 2 - 3 * 4 / 5 % 6");
864 let (tokens, _interner) = lexer.tokenize().unwrap();
865
866 assert!(matches!(tokens[0].kind, TokenKind::Int(1)));
867 assert!(matches!(tokens[1].kind, TokenKind::Plus));
868 assert!(matches!(tokens[2].kind, TokenKind::Int(2)));
869 assert!(matches!(tokens[3].kind, TokenKind::Minus));
870 assert!(matches!(tokens[4].kind, TokenKind::Int(3)));
871 assert!(matches!(tokens[5].kind, TokenKind::Star));
872 assert!(matches!(tokens[6].kind, TokenKind::Int(4)));
873 assert!(matches!(tokens[7].kind, TokenKind::Slash));
874 assert!(matches!(tokens[8].kind, TokenKind::Int(5)));
875 assert!(matches!(tokens[9].kind, TokenKind::Percent));
876 assert!(matches!(tokens[10].kind, TokenKind::Int(6)));
877 }
878
879 #[test]
880 fn test_logos_minus_vs_arrow() {
881 let lexer = LogosLexer::new("a - b");
883 let (tokens, _) = lexer.tokenize().unwrap();
884 assert!(matches!(tokens[1].kind, TokenKind::Minus));
885
886 let lexer = LogosLexer::new("-> i32");
888 let (tokens, _) = lexer.tokenize().unwrap();
889 assert!(matches!(tokens[0].kind, TokenKind::Arrow));
890
891 let lexer = LogosLexer::new("-1");
893 let (tokens, _) = lexer.tokenize().unwrap();
894 assert!(matches!(tokens[0].kind, TokenKind::Minus));
895 assert!(matches!(tokens[1].kind, TokenKind::Int(1)));
896 }
897
898 #[test]
899 fn test_logos_let_binding() {
900 let lexer = LogosLexer::new("let x = 42;");
901 let (tokens, interner) = lexer.tokenize().unwrap();
902
903 assert!(matches!(tokens[0].kind, TokenKind::Let));
904 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("x"));
905 assert!(matches!(tokens[2].kind, TokenKind::Eq));
906 assert!(matches!(tokens[3].kind, TokenKind::Int(42)));
907 assert!(matches!(tokens[4].kind, TokenKind::Semi));
908 }
909
910 #[test]
911 fn test_logos_logical_operators() {
912 let lexer = LogosLexer::new("!true && false || true");
913 let (tokens, _) = lexer.tokenize().unwrap();
914
915 assert!(matches!(tokens[0].kind, TokenKind::Bang));
916 assert!(matches!(tokens[1].kind, TokenKind::True));
917 assert!(matches!(tokens[2].kind, TokenKind::AmpAmp));
918 assert!(matches!(tokens[3].kind, TokenKind::False));
919 assert!(matches!(tokens[4].kind, TokenKind::PipePipe));
920 assert!(matches!(tokens[5].kind, TokenKind::True));
921 }
922
923 #[test]
924 fn test_logos_comparison_operators() {
925 let lexer = LogosLexer::new("a == b != c < d > e <= f >= g");
926 let (tokens, _) = lexer.tokenize().unwrap();
927
928 assert!(matches!(tokens[1].kind, TokenKind::EqEq));
929 assert!(matches!(tokens[3].kind, TokenKind::BangEq));
930 assert!(matches!(tokens[5].kind, TokenKind::Lt));
931 assert!(matches!(tokens[7].kind, TokenKind::Gt));
932 assert!(matches!(tokens[9].kind, TokenKind::LtEq));
933 assert!(matches!(tokens[11].kind, TokenKind::GtEq));
934 }
935
936 #[test]
937 fn test_logos_line_comments() {
938 let lexer = LogosLexer::new("fn // comment\nmain");
939 let (tokens, interner) = lexer.tokenize().unwrap();
940
941 assert!(matches!(tokens[0].kind, TokenKind::Fn));
942 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("main"));
943 assert!(matches!(tokens[2].kind, TokenKind::Eof));
944 }
945
946 #[test]
947 fn test_logos_keywords_vs_identifiers() {
948 let lexer = LogosLexer::new("fn let mut if else while break continue true false");
950 let (tokens, _) = lexer.tokenize().unwrap();
951
952 assert!(matches!(tokens[0].kind, TokenKind::Fn));
953 assert!(matches!(tokens[1].kind, TokenKind::Let));
954 assert!(matches!(tokens[2].kind, TokenKind::Mut));
955 assert!(matches!(tokens[3].kind, TokenKind::If));
956 assert!(matches!(tokens[4].kind, TokenKind::Else));
957 assert!(matches!(tokens[5].kind, TokenKind::While));
958 assert!(matches!(tokens[6].kind, TokenKind::Break));
959 assert!(matches!(tokens[7].kind, TokenKind::Continue));
960 assert!(matches!(tokens[8].kind, TokenKind::True));
961 assert!(matches!(tokens[9].kind, TokenKind::False));
962
963 let lexer = LogosLexer::new("fns lets mutable iff elseif whileloop");
965 let (tokens, interner) = lexer.tokenize().unwrap();
966
967 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("fns"));
968 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("lets"));
969 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("mutable"));
970 assert_eq!(get_ident_str(&tokens[3].kind, &interner), Some("iff"));
971 assert_eq!(get_ident_str(&tokens[4].kind, &interner), Some("elseif"));
972 assert_eq!(get_ident_str(&tokens[5].kind, &interner), Some("whileloop"));
973 }
974
975 #[test]
976 fn test_logos_bitwise_operators() {
977 let lexer = LogosLexer::new("a & b | c ^ d ~ e << f >> g");
978 let (tokens, interner) = lexer.tokenize().unwrap();
979
980 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("a"));
981 assert!(matches!(tokens[1].kind, TokenKind::Amp));
982 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("b"));
983 assert!(matches!(tokens[3].kind, TokenKind::Pipe));
984 assert_eq!(get_ident_str(&tokens[4].kind, &interner), Some("c"));
985 assert!(matches!(tokens[5].kind, TokenKind::Caret));
986 assert_eq!(get_ident_str(&tokens[6].kind, &interner), Some("d"));
987 assert!(matches!(tokens[7].kind, TokenKind::Tilde));
988 assert_eq!(get_ident_str(&tokens[8].kind, &interner), Some("e"));
989 assert!(matches!(tokens[9].kind, TokenKind::LtLt));
990 assert_eq!(get_ident_str(&tokens[10].kind, &interner), Some("f"));
991 assert!(matches!(tokens[11].kind, TokenKind::GtGt));
992 assert_eq!(get_ident_str(&tokens[12].kind, &interner), Some("g"));
993 }
994
995 #[test]
996 fn test_logos_bitwise_vs_logical() {
997 let lexer = LogosLexer::new("a & b");
999 let (tokens, _) = lexer.tokenize().unwrap();
1000 assert!(matches!(tokens[1].kind, TokenKind::Amp));
1001
1002 let lexer = LogosLexer::new("a && b");
1004 let (tokens, _) = lexer.tokenize().unwrap();
1005 assert!(matches!(tokens[1].kind, TokenKind::AmpAmp));
1006
1007 let lexer = LogosLexer::new("a | b");
1009 let (tokens, _) = lexer.tokenize().unwrap();
1010 assert!(matches!(tokens[1].kind, TokenKind::Pipe));
1011
1012 let lexer = LogosLexer::new("a || b");
1014 let (tokens, _) = lexer.tokenize().unwrap();
1015 assert!(matches!(tokens[1].kind, TokenKind::PipePipe));
1016 }
1017
1018 #[test]
1019 fn test_logos_shift_vs_comparison() {
1020 let lexer = LogosLexer::new("a << b");
1022 let (tokens, _) = lexer.tokenize().unwrap();
1023 assert!(matches!(tokens[1].kind, TokenKind::LtLt));
1024
1025 let lexer = LogosLexer::new("a >> b");
1027 let (tokens, _) = lexer.tokenize().unwrap();
1028 assert!(matches!(tokens[1].kind, TokenKind::GtGt));
1029
1030 let lexer = LogosLexer::new("a < b");
1032 let (tokens, _) = lexer.tokenize().unwrap();
1033 assert!(matches!(tokens[1].kind, TokenKind::Lt));
1034
1035 let lexer = LogosLexer::new("a > b");
1037 let (tokens, _) = lexer.tokenize().unwrap();
1038 assert!(matches!(tokens[1].kind, TokenKind::Gt));
1039
1040 let lexer = LogosLexer::new("a <= b");
1042 let (tokens, _) = lexer.tokenize().unwrap();
1043 assert!(matches!(tokens[1].kind, TokenKind::LtEq));
1044
1045 let lexer = LogosLexer::new("a >= b");
1047 let (tokens, _) = lexer.tokenize().unwrap();
1048 assert!(matches!(tokens[1].kind, TokenKind::GtEq));
1049 }
1050
1051 #[test]
1052 fn test_logos_integer_overflow() {
1053 let lexer = LogosLexer::new("99999999999999999999999");
1055 let result = lexer.tokenize();
1056 assert!(result.is_err());
1057 let err = result.unwrap_err();
1058 assert!(matches!(err.kind, ErrorKind::InvalidInteger));
1059 }
1060
1061 #[test]
1062 fn test_logos_type_keywords() {
1063 let lexer = LogosLexer::new("i8 i16 i32 i64 u8 u16 u32 u64 bool");
1065 let (tokens, _) = lexer.tokenize().unwrap();
1066
1067 assert!(matches!(tokens[0].kind, TokenKind::I8));
1068 assert!(matches!(tokens[1].kind, TokenKind::I16));
1069 assert!(matches!(tokens[2].kind, TokenKind::I32));
1070 assert!(matches!(tokens[3].kind, TokenKind::I64));
1071 assert!(matches!(tokens[4].kind, TokenKind::U8));
1072 assert!(matches!(tokens[5].kind, TokenKind::U16));
1073 assert!(matches!(tokens[6].kind, TokenKind::U32));
1074 assert!(matches!(tokens[7].kind, TokenKind::U64));
1075 assert!(matches!(tokens[8].kind, TokenKind::Bool));
1076
1077 let lexer = LogosLexer::new("i32x i64ptr boolish u8_data");
1079 let (tokens, interner) = lexer.tokenize().unwrap();
1080
1081 assert_eq!(get_ident_str(&tokens[0].kind, &interner), Some("i32x"));
1082 assert_eq!(get_ident_str(&tokens[1].kind, &interner), Some("i64ptr"));
1083 assert_eq!(get_ident_str(&tokens[2].kind, &interner), Some("boolish"));
1084 assert_eq!(get_ident_str(&tokens[3].kind, &interner), Some("u8_data"));
1085 }
1086
1087 #[test]
1088 fn test_logos_unterminated_string() {
1089 let lexer = LogosLexer::new(r#""hello"#);
1091 let result = lexer.tokenize();
1092 assert!(result.is_err());
1093 let err = result.unwrap_err();
1094 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
1095
1096 let lexer = LogosLexer::new("\"hello\nworld");
1098 let result = lexer.tokenize();
1099 assert!(result.is_err());
1100 let err = result.unwrap_err();
1101 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
1102
1103 let lexer = LogosLexer::new("\"");
1105 let result = lexer.tokenize();
1106 assert!(result.is_err());
1107 let err = result.unwrap_err();
1108 assert!(matches!(err.kind, ErrorKind::UnterminatedString));
1109 }
1110
1111 #[test]
1112 fn test_logos_valid_strings() {
1113 let lexer = LogosLexer::new(r#""hello""#);
1115 let (tokens, interner) = lexer.tokenize().unwrap();
1116 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some("hello"));
1117
1118 let lexer = LogosLexer::new(r#""""#);
1120 let (tokens, interner) = lexer.tokenize().unwrap();
1121 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some(""));
1122
1123 let lexer = LogosLexer::new(r#""hello\"world""#);
1125 let (tokens, interner) = lexer.tokenize().unwrap();
1126 assert_eq!(
1127 get_string_str(&tokens[0].kind, &interner),
1128 Some("hello\"world")
1129 );
1130
1131 let lexer = LogosLexer::new(r#""hello\\world""#);
1133 let (tokens, interner) = lexer.tokenize().unwrap();
1134 assert_eq!(
1135 get_string_str(&tokens[0].kind, &interner),
1136 Some("hello\\world")
1137 );
1138 }
1139
1140 #[test]
1141 fn test_logos_escape_newline() {
1142 let lexer = LogosLexer::new(r#""line1\nline2""#);
1143 let (tokens, interner) = lexer.tokenize().unwrap();
1144 assert_eq!(
1145 get_string_str(&tokens[0].kind, &interner),
1146 Some("line1\nline2")
1147 );
1148 }
1149
1150 #[test]
1151 fn test_logos_escape_tab() {
1152 let lexer = LogosLexer::new(r#""col1\tcol2""#);
1153 let (tokens, interner) = lexer.tokenize().unwrap();
1154 assert_eq!(
1155 get_string_str(&tokens[0].kind, &interner),
1156 Some("col1\tcol2")
1157 );
1158 }
1159
1160 #[test]
1161 fn test_logos_escape_carriage_return() {
1162 let lexer = LogosLexer::new(r#""line\r\n""#);
1163 let (tokens, interner) = lexer.tokenize().unwrap();
1164 assert_eq!(get_string_str(&tokens[0].kind, &interner), Some("line\r\n"));
1165 }
1166
1167 #[test]
1168 fn test_logos_escape_null() {
1169 let lexer = LogosLexer::new(r#""null\0byte""#);
1170 let (tokens, interner) = lexer.tokenize().unwrap();
1171 assert_eq!(
1172 get_string_str(&tokens[0].kind, &interner),
1173 Some("null\0byte")
1174 );
1175 }
1176
1177 #[test]
1178 fn test_logos_invalid_escape_q() {
1179 let lexer = LogosLexer::new(r#""bad\qescape""#);
1180 let result = lexer.tokenize();
1181 assert!(result.is_err());
1182 let err = result.unwrap_err();
1183 assert!(matches!(err.kind, ErrorKind::InvalidStringEscape('q')));
1184 }
1185
1186 #[test]
1187 fn test_logos_all_escapes_combined() {
1188 let lexer = LogosLexer::new(r#""\\\"abc\n\t\r\0xyz""#);
1190 let (tokens, interner) = lexer.tokenize().unwrap();
1191 assert_eq!(
1192 get_string_str(&tokens[0].kind, &interner),
1193 Some("\\\"abc\n\t\r\0xyz")
1194 );
1195 }
1196
1197 #[test]
1198 fn test_interning_deduplicates() {
1199 let lexer = LogosLexer::new("x x x");
1201 let (tokens, _interner) = lexer.tokenize().unwrap();
1202
1203 let sym0 = match &tokens[0].kind {
1204 TokenKind::Ident(s) => *s,
1205 _ => panic!("expected Ident"),
1206 };
1207 let sym1 = match &tokens[1].kind {
1208 TokenKind::Ident(s) => *s,
1209 _ => panic!("expected Ident"),
1210 };
1211 let sym2 = match &tokens[2].kind {
1212 TokenKind::Ident(s) => *s,
1213 _ => panic!("expected Ident"),
1214 };
1215
1216 assert_eq!(sym0, sym1);
1217 assert_eq!(sym1, sym2);
1218 }
1219
1220 fn get_line_doc<'a>(kind: &TokenKind, interner: &'a ThreadedRodeo) -> Option<&'a str> {
1222 match kind {
1223 TokenKind::LineDoc(sym) => Some(interner.resolve(sym)),
1224 _ => None,
1225 }
1226 }
1227
1228 #[test]
1229 fn test_line_doc_basic() {
1230 let lexer = LogosLexer::new("/// hello\nfn main() {}");
1232 let (tokens, interner) = lexer.tokenize().unwrap();
1233 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some("hello"));
1234 assert!(matches!(tokens[1].kind, TokenKind::Fn));
1235 }
1236
1237 #[test]
1238 fn test_line_doc_no_space() {
1239 let lexer = LogosLexer::new("///x\n");
1241 let (tokens, interner) = lexer.tokenize().unwrap();
1242 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some("x"));
1243 }
1244
1245 #[test]
1246 fn test_line_doc_empty() {
1247 let lexer = LogosLexer::new("///\n");
1249 let (tokens, interner) = lexer.tokenize().unwrap();
1250 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some(""));
1251 }
1252
1253 #[test]
1254 fn test_line_doc_strips_one_space_only() {
1255 let lexer = LogosLexer::new("/// hello\n");
1257 let (tokens, interner) = lexer.tokenize().unwrap();
1258 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some(" hello"));
1259 }
1260
1261 #[test]
1262 fn test_four_slashes_is_plain_comment() {
1263 let lexer = LogosLexer::new("//// not a doc\nfn main() {}");
1265 let (tokens, _interner) = lexer.tokenize().unwrap();
1266 assert!(matches!(tokens[0].kind, TokenKind::Fn));
1267 }
1268
1269 #[test]
1270 fn test_five_slashes_is_plain_comment() {
1271 let lexer = LogosLexer::new("///// also plain\nfn main() {}");
1272 let (tokens, _interner) = lexer.tokenize().unwrap();
1273 assert!(matches!(tokens[0].kind, TokenKind::Fn));
1274 }
1275
1276 #[test]
1277 fn test_line_doc_bang_in_body() {
1278 let lexer = LogosLexer::new("///!boom\n");
1281 let (tokens, interner) = lexer.tokenize().unwrap();
1282 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some("!boom"));
1283 }
1284
1285 #[test]
1286 fn test_regular_double_slash_skipped() {
1287 let lexer = LogosLexer::new("// just a comment\nfn main() {}");
1289 let (tokens, _interner) = lexer.tokenize().unwrap();
1290 assert!(matches!(tokens[0].kind, TokenKind::Fn));
1291 }
1292
1293 #[test]
1294 fn test_empty_double_slash_skipped() {
1295 let lexer = LogosLexer::new("//\nfn main() {}");
1297 let (tokens, _interner) = lexer.tokenize().unwrap();
1298 assert!(matches!(tokens[0].kind, TokenKind::Fn));
1299 }
1300
1301 #[test]
1302 fn test_consecutive_line_docs() {
1303 let lexer = LogosLexer::new("/// line 1\n/// line 2\nfn main() {}");
1305 let (tokens, interner) = lexer.tokenize().unwrap();
1306 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some("line 1"));
1307 assert_eq!(get_line_doc(&tokens[1].kind, &interner), Some("line 2"));
1308 assert!(matches!(tokens[2].kind, TokenKind::Fn));
1309 }
1310
1311 #[test]
1312 fn test_line_doc_span_excludes_newline() {
1313 let lexer = LogosLexer::new("/// hi\n");
1314 let (tokens, _interner) = lexer.tokenize().unwrap();
1315 assert_eq!(tokens[0].span.start, 0);
1317 assert_eq!(tokens[0].span.end, 6);
1318 }
1319
1320 #[test]
1321 fn test_line_doc_at_eof_no_newline() {
1322 let lexer = LogosLexer::new("/// tail");
1324 let (tokens, interner) = lexer.tokenize().unwrap();
1325 assert_eq!(get_line_doc(&tokens[0].kind, &interner), Some("tail"));
1326 }
1327
1328 #[test]
1329 fn test_token_kind_is_copy() {
1330 let lexer = LogosLexer::new("x");
1332 let (tokens, _) = lexer.tokenize().unwrap();
1333 let kind = tokens[0].kind; let _kind2 = kind; let _kind3 = kind;
1336 }
1337}