Track synthetic tokens, to be able to remove them again later

2022-02-08 18:13:18 +01:00 · 2022-02-08 18:13:18 +01:00 · 1a5aa84e9f
parent 1b5cd03a37
commit 1a5aa84e9f
6 changed files with 133 additions and 39 deletions
--- a/crates/hir_expand/src/db.rs
+++ b/crates/hir_expand/src/db.rs
@ -5,8 +5,8 @@ use std::sync::Arc;
 use base_db::{salsa, SourceDatabase};
 use either::Either;
 use limit::Limit;
-use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult, SyntheticToken};
-use rustc_hash::{FxHashMap, FxHashSet};
+use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult};
+use rustc_hash::FxHashSet;
 use syntax::{
    algo::diff,
    ast::{self, HasAttrs, HasDocComments},
@ -442,7 +442,7 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
        ));
    }

-    fixup::reverse_fixups(&mut tt);
+    fixup::reverse_fixups(&mut tt, &macro_arg.1);

    ExpandResult { value: Some(Arc::new(tt)), err }
 }
--- a/crates/hir_expand/src/fixup.rs
+++ b/crates/hir_expand/src/fixup.rs
@ -1,10 +1,10 @@
-use mbe::SyntheticToken;
+use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
 use rustc_hash::FxHashMap;
 use syntax::{
    ast::{self, AstNode},
-    match_ast, SyntaxKind, SyntaxNode, SyntaxToken,
+    match_ast, SyntaxKind, SyntaxNode, TextRange,
 };
-use tt::{Leaf, Subtree};
+use tt::Subtree;

 #[derive(Debug)]
 pub struct SyntaxFixups {
@ -16,6 +16,7 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
    let mut append = FxHashMap::default();
    let mut replace = FxHashMap::default();
    let mut preorder = node.preorder();
+    let empty_id = SyntheticTokenId(0);
    while let Some(event) = preorder.next() {
        let node = match event {
            syntax::WalkEvent::Enter(node) => node,
@ -27,12 +28,32 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
            preorder.skip_subtree();
            continue;
        }
+        let end_range = TextRange::empty(node.text_range().end());
        match_ast! {
            match node {
                ast::FieldExpr(it) => {
                    if it.name_ref().is_none() {
                        // incomplete field access: some_expr.|
-                        append.insert(node.clone(), vec![(SyntaxKind::IDENT, "__ra_fixup".into())]);
+                        append.insert(node.clone(), vec![
+                            SyntheticToken {
+                                kind: SyntaxKind::IDENT,
+                                text: "__ra_fixup".into(),
+                                range: end_range,
+                                id: empty_id,
+                            },
+                        ]);
+                    }
+                },
+                ast::ExprStmt(it) => {
+                    if it.semicolon_token().is_none() {
+                        append.insert(node.clone(), vec![
+                            SyntheticToken {
+                                kind: SyntaxKind::SEMICOLON,
+                                text: ";".into(),
+                                range: end_range,
+                                id: empty_id,
+                            },
+                        ]);
                    }
                },
                _ => (),
@ -42,20 +63,21 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
    SyntaxFixups { append, replace }
 }

-pub fn reverse_fixups(tt: &mut Subtree) {
+pub fn reverse_fixups(tt: &mut Subtree, token_map: &TokenMap) {
+    eprintln!("token_map: {:?}", token_map);
    tt.token_trees.retain(|tt| match tt {
-        tt::TokenTree::Leaf(Leaf::Ident(ident)) => ident.text != "__ra_fixup",
+        tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()).is_none(),
        _ => true,
    });
    tt.token_trees.iter_mut().for_each(|tt| match tt {
-        tt::TokenTree::Subtree(tt) => reverse_fixups(tt),
+        tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map),
        _ => {}
    });
 }

 #[cfg(test)]
 mod tests {
-    use expect_test::{Expect, expect};
+    use expect_test::{expect, Expect};

    use super::reverse_fixups;

@ -63,7 +85,7 @@ mod tests {
    fn check(ra_fixture: &str, mut expect: Expect) {
        let parsed = syntax::SourceFile::parse(ra_fixture);
        let fixups = super::fixup_syntax(&parsed.syntax_node());
-        let (mut tt, _tmap) = mbe::syntax_node_to_token_tree_censored(
+        let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(
            &parsed.syntax_node(),
            fixups.replace,
            fixups.append,
@ -77,9 +99,14 @@ mod tests {

        // the fixed-up tree should be syntactically valid
        let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
-        assert_eq!(parse.errors(), &[], "parse has syntax errors. parse tree:\n{:#?}", parse.syntax_node());
+        assert_eq!(
+            parse.errors(),
+            &[],
+            "parse has syntax errors. parse tree:\n{:#?}",
+            parse.syntax_node()
+        );

-        reverse_fixups(&mut tt);
+        reverse_fixups(&mut tt, &tmap);

        // the fixed-up + reversed version should be equivalent to the original input
        // (but token IDs don't matter)
@ -89,48 +116,60 @@ mod tests {

    #[test]
    fn incomplete_field_expr_1() {
-        check(r#"
+        check(
+            r#"
 fn foo() {
    a.
 }
-"#, expect![[r#"
+"#,
+            expect![[r#"
 fn foo () {a . __ra_fixup}
-"#]])
+"#]],
+        )
    }

    #[test]
    fn incomplete_field_expr_2() {
-        check(r#"
+        check(
+            r#"
 fn foo() {
    a. ;
 }
-"#, expect![[r#"
+"#,
+            expect![[r#"
 fn foo () {a . __ra_fixup ;}
-"#]])
+"#]],
+        )
    }

    #[test]
    fn incomplete_field_expr_3() {
-        check(r#"
+        check(
+            r#"
 fn foo() {
    a. ;
    bar();
 }
-"#, expect![[r#"
+"#,
+            expect![[r#"
 fn foo () {a . __ra_fixup ; bar () ;}
-"#]])
+"#]],
+        )
    }

    #[test]
    fn field_expr_before_call() {
        // another case that easily happens while typing
-        check(r#"
+        check(
+            r#"
 fn foo() {
    a.b
    bar();
 }
-"#, expect![[r#"
-fn foo () {a . b bar () ;}
-"#]])
+"#,
+            expect![[r#"
+fn foo () {a . b ; bar () ;}
+"#]],
+        )
    }
 }
--- a/crates/mbe/src/lib.rs
+++ b/crates/mbe/src/lib.rs
@ -31,6 +31,7 @@ pub use crate::{
    syntax_bridge::{
        parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree,
        syntax_node_to_token_tree_censored, token_tree_to_syntax_node, SyntheticToken,
+        SyntheticTokenId,
    },
    token_map::TokenMap,
 };
--- a/crates/mbe/src/syntax_bridge.rs
+++ b/crates/mbe/src/syntax_bridge.rs
@ -1,6 +1,6 @@
 //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].

-use rustc_hash::{FxHashMap, FxHashSet};
+use rustc_hash::FxHashMap;
 use stdx::{always, non_empty_vec::NonEmptyVec};
 use syntax::{
    ast::{self, make::tokens::doc_comment},
@ -35,7 +35,16 @@ pub fn syntax_node_to_token_tree_censored(
    (subtree, c.id_alloc.map)
 }

-pub type SyntheticToken = (SyntaxKind, SmolStr);
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct SyntheticTokenId(pub u32);
+
+#[derive(Debug, Clone)]
+pub struct SyntheticToken {
+    pub kind: SyntaxKind,
+    pub text: SmolStr,
+    pub range: TextRange,
+    pub id: SyntheticTokenId,
+}

 // The following items are what `rustc` macro can be parsed into :
 // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
@ -153,13 +162,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
            Some(it) => it,
            None => break,
        };
+        let synth_id = token.synthetic_id(&conv);

        let kind = token.kind(&conv);
        if kind == COMMENT {
            if let Some(tokens) = conv.convert_doc_comment(&token) {
                // FIXME: There has to be a better way to do this
                // Add the comments token id to the converted doc string
-                let id = conv.id_alloc().alloc(range);
+                let id = conv.id_alloc().alloc(range, synth_id);
                result.extend(tokens.into_iter().map(|mut tt| {
                    if let tt::TokenTree::Subtree(sub) = &mut tt {
                        if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) =
@ -174,7 +184,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
            continue;
        }
        let tt = if kind.is_punct() && kind != UNDERSCORE {
-            assert_eq!(range.len(), TextSize::of('.'));
+            // assert_eq!(range.len(), TextSize::of('.'));

            if let Some(delim) = subtree.delimiter {
                let expected = match delim.kind {
@ -226,11 +236,13 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
                    panic!("Token from lexer must be single char: token = {:#?}", token);
                }
            };
-            tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into()
+            tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range, synth_id) })
+                .into()
        } else {
            macro_rules! make_leaf {
                ($i:ident) => {
-                    tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into()
+                    tt::$i { id: conv.id_alloc().alloc(range, synth_id), text: token.to_text(conv) }
+                        .into()
                };
            }
            let leaf: tt::Leaf = match kind {
@ -245,14 +257,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
                    let apostrophe = tt::Leaf::from(tt::Punct {
                        char: '\'',
                        spacing: tt::Spacing::Joint,
-                        id: conv.id_alloc().alloc(r),
+                        id: conv.id_alloc().alloc(r, synth_id),
                    });
                    result.push(apostrophe.into());

                    let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
                    let ident = tt::Leaf::from(tt::Ident {
                        text: SmolStr::new(&token.to_text(conv)[1..]),
-                        id: conv.id_alloc().alloc(r),
+                        id: conv.id_alloc().alloc(r, synth_id),
                    });
                    result.push(ident.into());
                    continue;
@ -273,7 +285,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {

        conv.id_alloc().close_delim(entry.idx, None);
        let leaf: tt::Leaf = tt::Punct {
-            id: conv.id_alloc().alloc(entry.open_range),
+            id: conv.id_alloc().alloc(entry.open_range, None),
            char: match entry.subtree.delimiter.unwrap().kind {
                tt::DelimiterKind::Parenthesis => '(',
                tt::DelimiterKind::Brace => '{',
@ -367,11 +379,18 @@ struct TokenIdAlloc {
 }

 impl TokenIdAlloc {
-    fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
+    fn alloc(
+        &mut self,
+        absolute_range: TextRange,
+        synthetic_id: Option<SyntheticTokenId>,
+    ) -> tt::TokenId {
        let relative_range = absolute_range - self.global_offset;
        let token_id = tt::TokenId(self.next_id);
        self.next_id += 1;
        self.map.insert(token_id, relative_range);
+        if let Some(id) = synthetic_id {
+            self.map.insert_synthetic(token_id, id);
+        }
        token_id
    }

@ -411,6 +430,8 @@ trait SrcToken<Ctx>: std::fmt::Debug {
    fn to_char(&self, ctx: &Ctx) -> Option<char>;

    fn to_text(&self, ctx: &Ctx) -> SmolStr;
+
+    fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
 }

 trait TokenConvertor: Sized {
@ -437,6 +458,10 @@ impl<'a> SrcToken<RawConvertor<'a>> for usize {
    fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
        ctx.lexed.text(*self).into()
    }
+
+    fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> {
+        None
+    }
 }

 impl<'a> TokenConvertor for RawConvertor<'a> {
@ -564,13 +589,14 @@ impl SrcToken<Convertor> for SynToken {
        match self {
            SynToken::Ordinary(token) => token.kind(),
            SynToken::Punch(token, _) => token.kind(),
-            SynToken::Synthetic((kind, _)) => *kind,
+            SynToken::Synthetic(token) => token.kind,
        }
    }
    fn to_char(&self, _ctx: &Convertor) -> Option<char> {
        match self {
            SynToken::Ordinary(_) => None,
            SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
+            SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
            SynToken::Synthetic(_) => None,
        }
    }
@ -578,7 +604,14 @@ impl SrcToken<Convertor> for SynToken {
        match self {
            SynToken::Ordinary(token) => token.text().into(),
            SynToken::Punch(token, _) => token.text().into(),
-            SynToken::Synthetic((_, text)) => text.clone(),
+            SynToken::Synthetic(token) => token.text.clone(),
+        }
+    }
+
+    fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> {
+        match self {
+            SynToken::Synthetic(token) => Some(token.id),
+            _ => None,
        }
    }
 }
--- a/crates/mbe/src/token_map.rs
+++ b/crates/mbe/src/token_map.rs
@ -5,6 +5,8 @@ use std::hash::Hash;
 use parser::{SyntaxKind, T};
 use syntax::{TextRange, TextSize};

+use crate::syntax_bridge::SyntheticTokenId;
+
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
 enum TokenTextRange {
    Token(TextRange),
@ -31,6 +33,7 @@ impl TokenTextRange {
 pub struct TokenMap {
    /// Maps `tt::TokenId` to the *relative* source range.
    entries: Vec<(tt::TokenId, TokenTextRange)>,
+    pub synthetic_entries: Vec<(tt::TokenId, SyntheticTokenId)>,
 }

 impl TokenMap {
@ -57,6 +60,10 @@ impl TokenMap {
            .filter_map(move |(_, range)| range.by_kind(kind))
    }

+    pub fn synthetic_token_id(&self, token_id: tt::TokenId) -> Option<SyntheticTokenId> {
+        self.synthetic_entries.iter().find(|(tid, _)| *tid == token_id).map(|(_, id)| *id)
+    }
+
    pub fn first_range_by_token(
        &self,
        token_id: tt::TokenId,
@ -73,6 +80,10 @@ impl TokenMap {
        self.entries.push((token_id, TokenTextRange::Token(relative_range)));
    }

+    pub(crate) fn insert_synthetic(&mut self, token_id: tt::TokenId, id: SyntheticTokenId) {
+        self.synthetic_entries.push((token_id, id));
+    }
+
    pub(crate) fn insert_delim(
        &mut self,
        token_id: tt::TokenId,
--- a/crates/tt/src/lib.rs
+++ b/crates/tt/src/lib.rs
@ -87,6 +87,16 @@ pub struct Ident {
    pub id: TokenId,
 }

+impl Leaf {
+    pub fn id(&self) -> TokenId {
+        match self {
+            Leaf::Literal(l) => l.id,
+            Leaf::Punct(p) => p.id,
+            Leaf::Ident(i) => i.id,
+        }
+    }
+}
+
 fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result {
    let align = "  ".repeat(level);