Skip to main content

gruel_fmt/
trivia.rs

1//! Trivia scanner (ADR-0093 Phase 4).
2//!
3//! Walks the raw source once and records `//` line comments and blank-line
4//! runs as a sorted vector of [`TriviaEntry`] values. The emitter consults
5//! this table to weave trivia back into the canonical output at the right
6//! position.
7//!
8//! `///` doc comments are *not* trivia — they are parsed onto AST nodes and
9//! emitted directly. `////+` runs are also ignored (lexer-skipped).
10
11/// One scanned trivia run.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub struct TriviaEntry {
14    pub kind: TriviaKind,
15    /// Inclusive start byte of the trivia in source.
16    pub start: u32,
17    /// Exclusive end byte of the trivia in source.
18    pub end: u32,
19}
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum TriviaKind {
23    /// `// ...` line comment (excludes `///` doc comments and `////+` runs).
24    Comment,
25    /// Run of one or more blank lines, collapsed to one in output.
26    Blank,
27}
28
29/// Sorted-by-`start` trivia entries for `src`.
30#[derive(Debug, Clone)]
31pub struct TriviaTable {
32    pub entries: Vec<TriviaEntry>,
33}
34
35impl TriviaTable {
36    /// Walk `src` once and collect trivia entries.
37    pub fn scan(src: &str) -> Self {
38        let bytes = src.as_bytes();
39        let mut entries = Vec::new();
40        let mut i = 0;
41        // Count consecutive `\n` bytes; 2+ in a row means at least one blank line.
42        let mut blank_run_start: Option<u32> = None;
43        let mut newlines_in_run: u32 = 0;
44        while i < bytes.len() {
45            let b = bytes[i];
46            if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
47                // Distinguish `//`, `///` (doc), `////+` (skipped) by counting slashes.
48                let mut slash_count = 0;
49                let mut j = i;
50                while j < bytes.len() && bytes[j] == b'/' {
51                    slash_count += 1;
52                    j += 1;
53                }
54                // ADR-0089: `///` is a doc comment (already on AST); `////+` is
55                // lexer-skipped and not authored as trivia worth preserving.
56                let is_plain_comment = slash_count == 2;
57                // Walk to end of line.
58                let start = i as u32;
59                while j < bytes.len() && bytes[j] != b'\n' {
60                    j += 1;
61                }
62                let end = j as u32;
63                if is_plain_comment {
64                    Self::flush_blank(&mut entries, blank_run_start, newlines_in_run);
65                    blank_run_start = None;
66                    newlines_in_run = 0;
67                    entries.push(TriviaEntry {
68                        kind: TriviaKind::Comment,
69                        start,
70                        end,
71                    });
72                }
73                i = j;
74                continue;
75            }
76            if b == b'\n' {
77                if newlines_in_run == 0 {
78                    blank_run_start = Some(i as u32);
79                }
80                newlines_in_run += 1;
81                i += 1;
82                continue;
83            }
84            // Whitespace (other than newlines) doesn't break a newline run.
85            if b == b' ' || b == b'\t' || b == b'\r' {
86                i += 1;
87                continue;
88            }
89            // Any other byte starts non-trivia content — flush any pending
90            // blank-line run.
91            Self::flush_blank(&mut entries, blank_run_start, newlines_in_run);
92            blank_run_start = None;
93            newlines_in_run = 0;
94            // Skip a string literal so its embedded `//` doesn't trip the scanner.
95            if b == b'"' {
96                i = skip_string(bytes, i);
97                continue;
98            }
99            // Skip a char literal so embedded `'/'`, `'\''` don't confuse us.
100            if b == b'\'' {
101                i = skip_char(bytes, i);
102                continue;
103            }
104            i += 1;
105        }
106        // EOF flush.
107        Self::flush_blank(&mut entries, blank_run_start, newlines_in_run);
108        TriviaTable { entries }
109    }
110
111    fn flush_blank(entries: &mut Vec<TriviaEntry>, start: Option<u32>, newlines: u32) {
112        // 2+ consecutive newlines == at least one blank line.
113        if newlines >= 2
114            && let Some(s) = start
115        {
116            entries.push(TriviaEntry {
117                kind: TriviaKind::Blank,
118                start: s,
119                end: s + newlines,
120            });
121        }
122    }
123}
124
125fn skip_string(bytes: &[u8], start: usize) -> usize {
126    let mut i = start + 1;
127    while i < bytes.len() {
128        match bytes[i] {
129            b'"' => return i + 1,
130            b'\\' if i + 1 < bytes.len() => i += 2,
131            b'\n' => return i, // unterminated — let lexer error
132            _ => i += 1,
133        }
134    }
135    i
136}
137
138fn skip_char(bytes: &[u8], start: usize) -> usize {
139    let mut i = start + 1;
140    while i < bytes.len() {
141        match bytes[i] {
142            b'\'' => return i + 1,
143            b'\\' if i + 1 < bytes.len() => i += 2,
144            b'\n' => return i,
145            _ => i += 1,
146        }
147    }
148    i
149}
150
151/// Map byte offset → 0-based line number. Built once per source.
152#[derive(Debug, Clone)]
153pub struct LineIndex {
154    /// `line_starts[i]` is the byte offset of the first character on line `i`.
155    line_starts: Vec<u32>,
156}
157
158impl LineIndex {
159    pub fn new(src: &str) -> Self {
160        let mut line_starts = vec![0u32];
161        for (i, b) in src.bytes().enumerate() {
162            if b == b'\n' {
163                line_starts.push((i + 1) as u32);
164            }
165        }
166        Self { line_starts }
167    }
168
169    /// 0-based line number containing `byte`. Saturates at the last line.
170    pub fn line_of(&self, byte: u32) -> u32 {
171        match self.line_starts.binary_search(&byte) {
172            Ok(idx) => idx as u32,
173            Err(idx) => idx.saturating_sub(1) as u32,
174        }
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn scan_no_comments() {
184        let t = TriviaTable::scan("fn main() -> i32 { 0 }");
185        assert!(t.entries.is_empty());
186    }
187
188    #[test]
189    fn scan_single_line_comment() {
190        let t = TriviaTable::scan("// hello\nfn main() -> i32 { 0 }");
191        assert_eq!(t.entries.len(), 1);
192        assert_eq!(t.entries[0].kind, TriviaKind::Comment);
193        assert_eq!(t.entries[0].start, 0);
194        assert_eq!(t.entries[0].end, 8); // up to but not including \n
195    }
196
197    #[test]
198    fn scan_doc_comment_is_not_trivia() {
199        let t = TriviaTable::scan("/// doc\nfn main() -> i32 { 0 }");
200        assert!(t.entries.is_empty());
201    }
202
203    #[test]
204    fn scan_quadruple_slash_is_not_trivia() {
205        let t = TriviaTable::scan("//// skipped\nfn main() -> i32 { 0 }");
206        assert!(t.entries.is_empty());
207    }
208
209    #[test]
210    fn scan_blank_line() {
211        let t = TriviaTable::scan("fn a() -> i32 { 0 }\n\nfn b() -> i32 { 0 }");
212        assert_eq!(t.entries.len(), 1);
213        assert_eq!(t.entries[0].kind, TriviaKind::Blank);
214    }
215
216    #[test]
217    fn scan_comment_in_string_literal_ignored() {
218        let t = TriviaTable::scan(r#"fn main() -> i32 { let s = "// not a comment"; 0 }"#);
219        assert!(t.entries.is_empty());
220    }
221
222    #[test]
223    fn line_of() {
224        let li = LineIndex::new("abc\ndef\nghi");
225        assert_eq!(li.line_of(0), 0);
226        assert_eq!(li.line_of(3), 0);
227        assert_eq!(li.line_of(4), 1);
228        assert_eq!(li.line_of(8), 2);
229    }
230}