From 1a5aa84e9f48f652e584cdbe0393616730ea73d8 Mon Sep 17 00:00:00 2001 From: Florian Diebold Date: Tue, 8 Feb 2022 18:13:18 +0100 Subject: [PATCH] Track synthetic tokens, to be able to remove them again later --- crates/hir_expand/src/db.rs | 6 +-- crates/hir_expand/src/fixup.rs | 87 ++++++++++++++++++++++++--------- crates/mbe/src/lib.rs | 1 + crates/mbe/src/syntax_bridge.rs | 57 ++++++++++++++++----- crates/mbe/src/token_map.rs | 11 +++++ crates/tt/src/lib.rs | 10 ++++ 6 files changed, 133 insertions(+), 39 deletions(-) diff --git a/crates/hir_expand/src/db.rs b/crates/hir_expand/src/db.rs index 935fb30fa9f..7a21e3e8701 100644 --- a/crates/hir_expand/src/db.rs +++ b/crates/hir_expand/src/db.rs @@ -5,8 +5,8 @@ use std::sync::Arc; use base_db::{salsa, SourceDatabase}; use either::Either; use limit::Limit; -use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult, SyntheticToken}; -use rustc_hash::{FxHashMap, FxHashSet}; +use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult}; +use rustc_hash::FxHashSet; use syntax::{ algo::diff, ast::{self, HasAttrs, HasDocComments}, @@ -442,7 +442,7 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult SyntaxFixups { let mut append = FxHashMap::default(); let mut replace = FxHashMap::default(); let mut preorder = node.preorder(); + let empty_id = SyntheticTokenId(0); while let Some(event) = preorder.next() { let node = match event { syntax::WalkEvent::Enter(node) => node, @@ -27,12 +28,32 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups { preorder.skip_subtree(); continue; } + let end_range = TextRange::empty(node.text_range().end()); match_ast! { match node { ast::FieldExpr(it) => { if it.name_ref().is_none() { // incomplete field access: some_expr.| - append.insert(node.clone(), vec![(SyntaxKind::IDENT, "__ra_fixup".into())]); + append.insert(node.clone(), vec![ + SyntheticToken { + kind: SyntaxKind::IDENT, + text: "__ra_fixup".into(), + range: end_range, + id: empty_id, + }, + ]); + } + }, + ast::ExprStmt(it) => { + if it.semicolon_token().is_none() { + append.insert(node.clone(), vec![ + SyntheticToken { + kind: SyntaxKind::SEMICOLON, + text: ";".into(), + range: end_range, + id: empty_id, + }, + ]); } }, _ => (), @@ -42,20 +63,21 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups { SyntaxFixups { append, replace } } -pub fn reverse_fixups(tt: &mut Subtree) { +pub fn reverse_fixups(tt: &mut Subtree, token_map: &TokenMap) { + eprintln!("token_map: {:?}", token_map); tt.token_trees.retain(|tt| match tt { - tt::TokenTree::Leaf(Leaf::Ident(ident)) => ident.text != "__ra_fixup", + tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()).is_none(), _ => true, }); tt.token_trees.iter_mut().for_each(|tt| match tt { - tt::TokenTree::Subtree(tt) => reverse_fixups(tt), + tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map), _ => {} }); } #[cfg(test)] mod tests { - use expect_test::{Expect, expect}; + use expect_test::{expect, Expect}; use super::reverse_fixups; @@ -63,7 +85,7 @@ mod tests { fn check(ra_fixture: &str, mut expect: Expect) { let parsed = syntax::SourceFile::parse(ra_fixture); let fixups = super::fixup_syntax(&parsed.syntax_node()); - let (mut tt, _tmap) = mbe::syntax_node_to_token_tree_censored( + let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored( &parsed.syntax_node(), fixups.replace, fixups.append, @@ -77,9 +99,14 @@ mod tests { // the fixed-up tree should be syntactically valid let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems); - assert_eq!(parse.errors(), &[], "parse has syntax errors. parse tree:\n{:#?}", parse.syntax_node()); + assert_eq!( + parse.errors(), + &[], + "parse has syntax errors. parse tree:\n{:#?}", + parse.syntax_node() + ); - reverse_fixups(&mut tt); + reverse_fixups(&mut tt, &tmap); // the fixed-up + reversed version should be equivalent to the original input // (but token IDs don't matter) @@ -89,48 +116,60 @@ mod tests { #[test] fn incomplete_field_expr_1() { - check(r#" + check( + r#" fn foo() { a. } -"#, expect![[r#" +"#, + expect![[r#" fn foo () {a . __ra_fixup} -"#]]) +"#]], + ) } #[test] fn incomplete_field_expr_2() { - check(r#" + check( + r#" fn foo() { a. ; } -"#, expect![[r#" +"#, + expect![[r#" fn foo () {a . __ra_fixup ;} -"#]]) +"#]], + ) } #[test] fn incomplete_field_expr_3() { - check(r#" + check( + r#" fn foo() { a. ; bar(); } -"#, expect![[r#" +"#, + expect![[r#" fn foo () {a . __ra_fixup ; bar () ;} -"#]]) +"#]], + ) } #[test] fn field_expr_before_call() { // another case that easily happens while typing - check(r#" + check( + r#" fn foo() { a.b bar(); } -"#, expect![[r#" -fn foo () {a . b bar () ;} -"#]]) +"#, + expect![[r#" +fn foo () {a . b ; bar () ;} +"#]], + ) } } diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs index 3633624c641..a35c22c2e11 100644 --- a/crates/mbe/src/lib.rs +++ b/crates/mbe/src/lib.rs @@ -31,6 +31,7 @@ pub use crate::{ syntax_bridge::{ parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree, syntax_node_to_token_tree_censored, token_tree_to_syntax_node, SyntheticToken, + SyntheticTokenId, }, token_map::TokenMap, }; diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index d3489813e17..7feaaaa62d8 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs @@ -1,6 +1,6 @@ //! Conversions between [`SyntaxNode`] and [`tt::TokenTree`]. -use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hash::FxHashMap; use stdx::{always, non_empty_vec::NonEmptyVec}; use syntax::{ ast::{self, make::tokens::doc_comment}, @@ -35,7 +35,16 @@ pub fn syntax_node_to_token_tree_censored( (subtree, c.id_alloc.map) } -pub type SyntheticToken = (SyntaxKind, SmolStr); +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct SyntheticTokenId(pub u32); + +#[derive(Debug, Clone)] +pub struct SyntheticToken { + pub kind: SyntaxKind, + pub text: SmolStr, + pub range: TextRange, + pub id: SyntheticTokenId, +} // The following items are what `rustc` macro can be parsed into : // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 @@ -153,13 +162,14 @@ fn convert_tokens(conv: &mut C) -> tt::Subtree { Some(it) => it, None => break, }; + let synth_id = token.synthetic_id(&conv); let kind = token.kind(&conv); if kind == COMMENT { if let Some(tokens) = conv.convert_doc_comment(&token) { // FIXME: There has to be a better way to do this // Add the comments token id to the converted doc string - let id = conv.id_alloc().alloc(range); + let id = conv.id_alloc().alloc(range, synth_id); result.extend(tokens.into_iter().map(|mut tt| { if let tt::TokenTree::Subtree(sub) = &mut tt { if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) = @@ -174,7 +184,7 @@ fn convert_tokens(conv: &mut C) -> tt::Subtree { continue; } let tt = if kind.is_punct() && kind != UNDERSCORE { - assert_eq!(range.len(), TextSize::of('.')); + // assert_eq!(range.len(), TextSize::of('.')); if let Some(delim) = subtree.delimiter { let expected = match delim.kind { @@ -226,11 +236,13 @@ fn convert_tokens(conv: &mut C) -> tt::Subtree { panic!("Token from lexer must be single char: token = {:#?}", token); } }; - tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into() + tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range, synth_id) }) + .into() } else { macro_rules! make_leaf { ($i:ident) => { - tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into() + tt::$i { id: conv.id_alloc().alloc(range, synth_id), text: token.to_text(conv) } + .into() }; } let leaf: tt::Leaf = match kind { @@ -245,14 +257,14 @@ fn convert_tokens(conv: &mut C) -> tt::Subtree { let apostrophe = tt::Leaf::from(tt::Punct { char: '\'', spacing: tt::Spacing::Joint, - id: conv.id_alloc().alloc(r), + id: conv.id_alloc().alloc(r, synth_id), }); result.push(apostrophe.into()); let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); let ident = tt::Leaf::from(tt::Ident { text: SmolStr::new(&token.to_text(conv)[1..]), - id: conv.id_alloc().alloc(r), + id: conv.id_alloc().alloc(r, synth_id), }); result.push(ident.into()); continue; @@ -273,7 +285,7 @@ fn convert_tokens(conv: &mut C) -> tt::Subtree { conv.id_alloc().close_delim(entry.idx, None); let leaf: tt::Leaf = tt::Punct { - id: conv.id_alloc().alloc(entry.open_range), + id: conv.id_alloc().alloc(entry.open_range, None), char: match entry.subtree.delimiter.unwrap().kind { tt::DelimiterKind::Parenthesis => '(', tt::DelimiterKind::Brace => '{', @@ -367,11 +379,18 @@ struct TokenIdAlloc { } impl TokenIdAlloc { - fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId { + fn alloc( + &mut self, + absolute_range: TextRange, + synthetic_id: Option, + ) -> tt::TokenId { let relative_range = absolute_range - self.global_offset; let token_id = tt::TokenId(self.next_id); self.next_id += 1; self.map.insert(token_id, relative_range); + if let Some(id) = synthetic_id { + self.map.insert_synthetic(token_id, id); + } token_id } @@ -411,6 +430,8 @@ trait SrcToken: std::fmt::Debug { fn to_char(&self, ctx: &Ctx) -> Option; fn to_text(&self, ctx: &Ctx) -> SmolStr; + + fn synthetic_id(&self, ctx: &Ctx) -> Option; } trait TokenConvertor: Sized { @@ -437,6 +458,10 @@ impl<'a> SrcToken> for usize { fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr { ctx.lexed.text(*self).into() } + + fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option { + None + } } impl<'a> TokenConvertor for RawConvertor<'a> { @@ -564,13 +589,14 @@ impl SrcToken for SynToken { match self { SynToken::Ordinary(token) => token.kind(), SynToken::Punch(token, _) => token.kind(), - SynToken::Synthetic((kind, _)) => *kind, + SynToken::Synthetic(token) => token.kind, } } fn to_char(&self, _ctx: &Convertor) -> Option { match self { SynToken::Ordinary(_) => None, SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), + SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(), SynToken::Synthetic(_) => None, } } @@ -578,7 +604,14 @@ impl SrcToken for SynToken { match self { SynToken::Ordinary(token) => token.text().into(), SynToken::Punch(token, _) => token.text().into(), - SynToken::Synthetic((_, text)) => text.clone(), + SynToken::Synthetic(token) => token.text.clone(), + } + } + + fn synthetic_id(&self, _ctx: &Convertor) -> Option { + match self { + SynToken::Synthetic(token) => Some(token.id), + _ => None, } } } diff --git a/crates/mbe/src/token_map.rs b/crates/mbe/src/token_map.rs index 9053526d203..ee1090945cb 100644 --- a/crates/mbe/src/token_map.rs +++ b/crates/mbe/src/token_map.rs @@ -5,6 +5,8 @@ use std::hash::Hash; use parser::{SyntaxKind, T}; use syntax::{TextRange, TextSize}; +use crate::syntax_bridge::SyntheticTokenId; + #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] enum TokenTextRange { Token(TextRange), @@ -31,6 +33,7 @@ impl TokenTextRange { pub struct TokenMap { /// Maps `tt::TokenId` to the *relative* source range. entries: Vec<(tt::TokenId, TokenTextRange)>, + pub synthetic_entries: Vec<(tt::TokenId, SyntheticTokenId)>, } impl TokenMap { @@ -57,6 +60,10 @@ impl TokenMap { .filter_map(move |(_, range)| range.by_kind(kind)) } + pub fn synthetic_token_id(&self, token_id: tt::TokenId) -> Option { + self.synthetic_entries.iter().find(|(tid, _)| *tid == token_id).map(|(_, id)| *id) + } + pub fn first_range_by_token( &self, token_id: tt::TokenId, @@ -73,6 +80,10 @@ impl TokenMap { self.entries.push((token_id, TokenTextRange::Token(relative_range))); } + pub(crate) fn insert_synthetic(&mut self, token_id: tt::TokenId, id: SyntheticTokenId) { + self.synthetic_entries.push((token_id, id)); + } + pub(crate) fn insert_delim( &mut self, token_id: tt::TokenId, diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs index 9eca970ee21..0316b15038c 100644 --- a/crates/tt/src/lib.rs +++ b/crates/tt/src/lib.rs @@ -87,6 +87,16 @@ pub struct Ident { pub id: TokenId, } +impl Leaf { + pub fn id(&self) -> TokenId { + match self { + Leaf::Literal(l) => l.id, + Leaf::Punct(p) => p.id, + Leaf::Ident(i) => i.id, + } + } +} + fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result { let align = " ".repeat(level);