1use gruel_util::span::Span;
8use lsp_types::{Position, Range};
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum PositionEncoding {
13 Utf8,
14 Utf16,
15}
16
17impl Default for PositionEncoding {
18 fn default() -> Self {
19 PositionEncoding::Utf16
20 }
21}
22
23#[derive(Debug, Clone)]
28pub struct LineMap {
29 line_starts: Vec<u32>,
30 source_len: u32,
31}
32
33impl LineMap {
34 pub fn new(source: &str) -> Self {
35 let mut line_starts = vec![0u32];
36 for (i, byte) in source.bytes().enumerate() {
37 if byte == b'\n' {
38 line_starts.push((i + 1) as u32);
39 }
40 }
41 Self {
42 line_starts,
43 source_len: source.len() as u32,
44 }
45 }
46
47 pub fn line_count(&self) -> u32 {
49 self.line_starts.len() as u32
50 }
51
52 pub fn line_for_byte(&self, byte: u32) -> u32 {
54 let byte = byte.min(self.source_len);
55 let pp = self.line_starts.partition_point(|&s| s <= byte);
57 pp.saturating_sub(1) as u32
58 }
59
60 pub fn line_start(&self, line: u32) -> u32 {
63 let idx = line as usize;
64 if idx >= self.line_starts.len() {
65 self.source_len
66 } else {
67 self.line_starts[idx]
68 }
69 }
70
71 pub fn line_end(&self, source: &str, line: u32) -> u32 {
74 let next = line.saturating_add(1) as usize;
75 let bytes = source.as_bytes();
76 if next >= self.line_starts.len() {
77 return self.source_len;
78 }
79 let next_start = self.line_starts[next];
80 if next_start > 0 && bytes.get((next_start - 1) as usize) == Some(&b'\n') {
81 next_start - 1
82 } else {
83 next_start
84 }
85 }
86}
87
88pub fn byte_to_position(
90 line_map: &LineMap,
91 source: &str,
92 byte: u32,
93 encoding: PositionEncoding,
94) -> Position {
95 let byte = byte.min(source.len() as u32);
96 let line = line_map.line_for_byte(byte);
97 let line_start = line_map.line_start(line) as usize;
98 let prefix = &source[line_start..byte as usize];
99 let character = match encoding {
100 PositionEncoding::Utf8 => prefix.len() as u32,
101 PositionEncoding::Utf16 => prefix.encode_utf16().count() as u32,
102 };
103 Position { line, character }
104}
105
106pub fn position_to_byte(
108 line_map: &LineMap,
109 source: &str,
110 pos: Position,
111 encoding: PositionEncoding,
112) -> u32 {
113 let line_start = line_map.line_start(pos.line) as usize;
114 let line_end = line_map.line_end(source, pos.line) as usize;
115 let line_text = &source[line_start..line_end];
116 let column_bytes = match encoding {
117 PositionEncoding::Utf8 => (pos.character as usize).min(line_text.len()),
118 PositionEncoding::Utf16 => {
119 let mut utf16_count = 0u32;
120 let mut byte_off = 0usize;
121 for c in line_text.chars() {
122 if utf16_count >= pos.character {
123 break;
124 }
125 let unit_len = c.len_utf16() as u32;
126 utf16_count += unit_len;
127 byte_off += c.len_utf8();
128 }
129 byte_off.min(line_text.len())
130 }
131 };
132 (line_start + column_bytes) as u32
133}
134
135pub fn span_to_range(
137 line_map: &LineMap,
138 source: &str,
139 span: Span,
140 encoding: PositionEncoding,
141) -> Range {
142 Range {
143 start: byte_to_position(line_map, source, span.start, encoding),
144 end: byte_to_position(line_map, source, span.end, encoding),
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn byte_to_position_utf8() {
154 let s = "hello\nworld";
155 let li = LineMap::new(s);
156 assert_eq!(
157 byte_to_position(&li, s, 0, PositionEncoding::Utf8),
158 Position {
159 line: 0,
160 character: 0
161 }
162 );
163 assert_eq!(
164 byte_to_position(&li, s, 5, PositionEncoding::Utf8),
165 Position {
166 line: 0,
167 character: 5
168 }
169 );
170 assert_eq!(
171 byte_to_position(&li, s, 6, PositionEncoding::Utf8),
172 Position {
173 line: 1,
174 character: 0
175 }
176 );
177 assert_eq!(
178 byte_to_position(&li, s, 11, PositionEncoding::Utf8),
179 Position {
180 line: 1,
181 character: 5
182 }
183 );
184 }
185
186 #[test]
187 fn position_to_byte_utf8_roundtrip() {
188 let s = "foo\nbar\nbaz";
189 let li = LineMap::new(s);
190 for (line, ch, expected) in [
191 (0u32, 0u32, 0u32),
192 (0, 3, 3),
193 (1, 0, 4),
194 (1, 2, 6),
195 (2, 3, 11),
196 ] {
197 let pos = Position {
198 line,
199 character: ch,
200 };
201 assert_eq!(
202 position_to_byte(&li, s, pos, PositionEncoding::Utf8),
203 expected
204 );
205 }
206 }
207
208 #[test]
209 fn utf16_handles_surrogate_pairs() {
210 let s = "ab🦀c";
212 let li = LineMap::new(s);
213 let pos_a = byte_to_position(&li, s, 0, PositionEncoding::Utf16);
214 let pos_b = byte_to_position(&li, s, 1, PositionEncoding::Utf16);
215 let pos_crab = byte_to_position(&li, s, 2, PositionEncoding::Utf16);
216 let pos_after_crab = byte_to_position(&li, s, 6, PositionEncoding::Utf16);
217 assert_eq!(pos_a.character, 0);
218 assert_eq!(pos_b.character, 1);
219 assert_eq!(pos_crab.character, 2);
220 assert_eq!(pos_after_crab.character, 4);
221
222 assert_eq!(position_to_byte(&li, s, pos_a, PositionEncoding::Utf16), 0);
224 assert_eq!(
225 position_to_byte(&li, s, pos_after_crab, PositionEncoding::Utf16),
226 6
227 );
228 }
229
230 #[test]
231 fn span_to_range_basic() {
232 let s = "let x = 42;";
233 let li = LineMap::new(s);
234 let span = Span::with_file(gruel_util::span::FileId::DEFAULT, 4, 5);
235 let range = span_to_range(&li, s, span, PositionEncoding::Utf8);
236 assert_eq!(
237 range.start,
238 Position {
239 line: 0,
240 character: 4
241 }
242 );
243 assert_eq!(
244 range.end,
245 Position {
246 line: 0,
247 character: 5
248 }
249 );
250 }
251
252 #[test]
253 fn line_map_empty_source() {
254 let li = LineMap::new("");
255 assert_eq!(li.line_count(), 1);
256 assert_eq!(li.line_for_byte(0), 0);
257 assert_eq!(li.line_start(0), 0);
258 }
259}