Skip to main content

gruel_doc/
links.rs

1//! ADR-0089 Phase 5: intra-doc link rewriting.
2//!
3//! Rewrites bare reference-style links (`[Name]`, `[Name::method]`,
4//! `[fn name]`, `[struct Name]`, `[enum Name]`, `[interface Name]`,
5//! `[derive Name]`, `[const Name]`) in a doc body into ordinary
6//! Markdown links pointing at the rendered page for the named item.
7//!
8//! Anything that doesn't resolve to a known item is left alone, exactly
9//! as rustdoc does. This is a single pre-render pass — the doc bodies
10//! themselves are immutable inputs from the AST.
11
12use std::collections::HashMap;
13
14/// A name-resolution table: identifier → (kind label, slug).
15///
16/// `kind` is one of `"fn" | "struct" | "enum" | "interface" | "derive"
17/// | "const"`. The kind disambiguates `[fn foo]` from a hypothetical
18/// `[struct foo]` (Gruel disallows shadowing across kinds today, but
19/// the lookup table tolerates both spellings).
20#[derive(Debug, Default, Clone)]
21pub struct LinkTable {
22    by_name: HashMap<String, (String, String)>,
23}
24
25impl LinkTable {
26    pub fn new() -> Self {
27        Self::default()
28    }
29
30    /// Insert a single item; later entries with the same name overwrite
31    /// earlier ones, matching the parser's "last-defined wins" semantics.
32    pub fn insert(&mut self, name: &str, kind: &str, slug: &str) {
33        self.by_name
34            .insert(name.to_string(), (kind.to_string(), slug.to_string()));
35    }
36
37    fn lookup(&self, query: &str) -> Option<&(String, String)> {
38        // Strip optional "kind " prefix (e.g. `fn foo` → `foo`).
39        let trimmed = match query.split_once(' ') {
40            Some((_kind, name)) if is_known_kind(_kind) => name.trim(),
41            _ => query.trim(),
42        };
43        // Strip method suffix: `Name::method` → `Name` (we link to the
44        // type page; per-method anchors are out of scope for MVP).
45        let trimmed = match trimmed.split_once("::") {
46            Some((parent, _method)) => parent,
47            None => trimmed,
48        };
49        self.by_name.get(trimmed)
50    }
51}
52
53fn is_known_kind(s: &str) -> bool {
54    matches!(
55        s,
56        "fn" | "struct" | "enum" | "interface" | "derive" | "const" | "link_extern"
57    )
58}
59
60/// Rewrite intra-doc references in a markdown body.
61///
62/// `extension` is the file extension to append to the slug — `".md"`
63/// for the Markdown renderer, `".html"` for the HTML renderer.
64pub fn rewrite(body: &str, table: &LinkTable, extension: &str) -> String {
65    let bytes = body.as_bytes();
66    let mut out = String::with_capacity(body.len());
67    let mut i = 0;
68    // Indices we walk are byte offsets into the original UTF-8 string.
69    // ASCII characters `[` and `]` are single-byte, so this is safe for
70    // bracket matching; for everything else we slice the original `body`
71    // to copy through whole multi-byte chars intact (rather than byte
72    // by byte, which would corrupt non-ASCII as Latin-1).
73    while i < bytes.len() {
74        if bytes[i] == b'[' {
75            if let Some(end_rel) = find_balanced_bracket(&bytes[i + 1..]) {
76                let end = i + 1 + end_rel;
77                let inner = &body[i + 1..end];
78                // Reference-style links: skip if this is `[label][ref]`
79                // or `[label](url)` (already a real link). We only
80                // rewrite the bare `[Name]` shortcut form.
81                let after = bytes.get(end + 1).copied();
82                if after == Some(b'(') || after == Some(b'[') {
83                    out.push_str(&body[i..=end]);
84                    i = end + 1;
85                    continue;
86                }
87                if let Some((_, slug)) = table.lookup(inner) {
88                    out.push('[');
89                    out.push_str(inner);
90                    out.push_str("](");
91                    out.push_str(slug);
92                    out.push_str(extension);
93                    out.push(')');
94                    i = end + 1;
95                    continue;
96                }
97            }
98            out.push('[');
99            i += 1;
100            continue;
101        }
102        // Non-`[` byte: copy through one UTF-8 codepoint at the current
103        // byte offset and advance by its length.
104        let ch = body[i..].chars().next().expect("valid utf-8");
105        out.push(ch);
106        i += ch.len_utf8();
107    }
108    out
109}
110
111/// Find the relative position of the closing `]` for a `[…]` whose
112/// opening `[` was just consumed. Returns `None` if the brackets are
113/// unbalanced or contain newlines (we never rewrite multi-line links).
114fn find_balanced_bracket(rest: &[u8]) -> Option<usize> {
115    let mut depth: i32 = 0;
116    for (i, b) in rest.iter().enumerate() {
117        match b {
118            b'\n' => return None,
119            b'[' => depth += 1,
120            b']' => {
121                if depth == 0 {
122                    return Some(i);
123                }
124                depth -= 1;
125            }
126            _ => {}
127        }
128    }
129    None
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    fn table_with(name: &str, kind: &str, slug: &str) -> LinkTable {
137        let mut t = LinkTable::new();
138        t.insert(name, kind, slug);
139        t
140    }
141
142    #[test]
143    fn rewrites_bare_name() {
144        let t = table_with("foo", "fn", "fn.foo");
145        let out = rewrite("see [foo] for details", &t, ".html");
146        assert_eq!(out, "see [foo](fn.foo.html) for details");
147    }
148
149    #[test]
150    fn rewrites_kind_prefix() {
151        let t = table_with("foo", "fn", "fn.foo");
152        let out = rewrite("call [fn foo]", &t, ".html");
153        assert_eq!(out, "call [fn foo](fn.foo.html)");
154    }
155
156    #[test]
157    fn rewrites_method_to_parent() {
158        let t = table_with("Vec", "struct", "struct.Vec");
159        let out = rewrite("see [Vec::push]", &t, ".html");
160        assert_eq!(out, "see [Vec::push](struct.Vec.html)");
161    }
162
163    #[test]
164    fn leaves_unknown_alone() {
165        let t = LinkTable::new();
166        let out = rewrite("see [bar] for details", &t, ".html");
167        assert_eq!(out, "see [bar] for details");
168    }
169
170    #[test]
171    fn leaves_explicit_links_alone() {
172        // `[label](url)` and `[label][ref]` are real links — don't
173        // touch them.
174        let t = table_with("foo", "fn", "fn.foo");
175        let body = "see [foo](other.html) and [foo][ref] and [foo] last";
176        let out = rewrite(body, &t, ".html");
177        assert_eq!(
178            out,
179            "see [foo](other.html) and [foo][ref] and [foo](fn.foo.html) last"
180        );
181    }
182
183    #[test]
184    fn extension_swaps_md_and_html() {
185        let t = table_with("foo", "fn", "fn.foo");
186        assert_eq!(rewrite("[foo]", &t, ".md"), "[foo](fn.foo.md)");
187        assert_eq!(rewrite("[foo]", &t, ".html"), "[foo](fn.foo.html)");
188    }
189
190    #[test]
191    fn no_rewrite_across_newlines() {
192        let t = table_with("foo", "fn", "fn.foo");
193        // `[foo]` only — but the opening `[` is followed by content
194        // containing a newline before `]`. Treat it as not-a-link.
195        let body = "weird [foo\n] nope";
196        let out = rewrite(body, &t, ".html");
197        assert_eq!(out, "weird [foo\n] nope");
198    }
199}