Track synthetic tokens, to be able to remove them again later

This commit is contained in:
Florian Diebold 2022-02-08 18:13:18 +01:00
parent 1b5cd03a37
commit 1a5aa84e9f
6 changed files with 133 additions and 39 deletions

View file

@ -5,8 +5,8 @@ use std::sync::Arc;
use base_db::{salsa, SourceDatabase};
use either::Either;
use limit::Limit;
use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult, SyntheticToken};
use rustc_hash::{FxHashMap, FxHashSet};
use mbe::{syntax_node_to_token_tree, ExpandError, ExpandResult};
use rustc_hash::FxHashSet;
use syntax::{
algo::diff,
ast::{self, HasAttrs, HasDocComments},
@ -442,7 +442,7 @@ fn macro_expand(db: &dyn AstDatabase, id: MacroCallId) -> ExpandResult<Option<Ar
));
}
fixup::reverse_fixups(&mut tt);
fixup::reverse_fixups(&mut tt, &macro_arg.1);
ExpandResult { value: Some(Arc::new(tt)), err }
}

View file

@ -1,10 +1,10 @@
use mbe::SyntheticToken;
use mbe::{SyntheticToken, SyntheticTokenId, TokenMap};
use rustc_hash::FxHashMap;
use syntax::{
ast::{self, AstNode},
match_ast, SyntaxKind, SyntaxNode, SyntaxToken,
match_ast, SyntaxKind, SyntaxNode, TextRange,
};
use tt::{Leaf, Subtree};
use tt::Subtree;
#[derive(Debug)]
pub struct SyntaxFixups {
@ -16,6 +16,7 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
let mut append = FxHashMap::default();
let mut replace = FxHashMap::default();
let mut preorder = node.preorder();
let empty_id = SyntheticTokenId(0);
while let Some(event) = preorder.next() {
let node = match event {
syntax::WalkEvent::Enter(node) => node,
@ -27,12 +28,32 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
preorder.skip_subtree();
continue;
}
let end_range = TextRange::empty(node.text_range().end());
match_ast! {
match node {
ast::FieldExpr(it) => {
if it.name_ref().is_none() {
// incomplete field access: some_expr.|
append.insert(node.clone(), vec![(SyntaxKind::IDENT, "__ra_fixup".into())]);
append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::IDENT,
text: "__ra_fixup".into(),
range: end_range,
id: empty_id,
},
]);
}
},
ast::ExprStmt(it) => {
if it.semicolon_token().is_none() {
append.insert(node.clone(), vec![
SyntheticToken {
kind: SyntaxKind::SEMICOLON,
text: ";".into(),
range: end_range,
id: empty_id,
},
]);
}
},
_ => (),
@ -42,20 +63,21 @@ pub fn fixup_syntax(node: &SyntaxNode) -> SyntaxFixups {
SyntaxFixups { append, replace }
}
pub fn reverse_fixups(tt: &mut Subtree) {
pub fn reverse_fixups(tt: &mut Subtree, token_map: &TokenMap) {
eprintln!("token_map: {:?}", token_map);
tt.token_trees.retain(|tt| match tt {
tt::TokenTree::Leaf(Leaf::Ident(ident)) => ident.text != "__ra_fixup",
tt::TokenTree::Leaf(leaf) => token_map.synthetic_token_id(leaf.id()).is_none(),
_ => true,
});
tt.token_trees.iter_mut().for_each(|tt| match tt {
tt::TokenTree::Subtree(tt) => reverse_fixups(tt),
tt::TokenTree::Subtree(tt) => reverse_fixups(tt, token_map),
_ => {}
});
}
#[cfg(test)]
mod tests {
use expect_test::{Expect, expect};
use expect_test::{expect, Expect};
use super::reverse_fixups;
@ -63,7 +85,7 @@ mod tests {
fn check(ra_fixture: &str, mut expect: Expect) {
let parsed = syntax::SourceFile::parse(ra_fixture);
let fixups = super::fixup_syntax(&parsed.syntax_node());
let (mut tt, _tmap) = mbe::syntax_node_to_token_tree_censored(
let (mut tt, tmap) = mbe::syntax_node_to_token_tree_censored(
&parsed.syntax_node(),
fixups.replace,
fixups.append,
@ -77,9 +99,14 @@ mod tests {
// the fixed-up tree should be syntactically valid
let (parse, _) = mbe::token_tree_to_syntax_node(&tt, ::mbe::TopEntryPoint::MacroItems);
assert_eq!(parse.errors(), &[], "parse has syntax errors. parse tree:\n{:#?}", parse.syntax_node());
assert_eq!(
parse.errors(),
&[],
"parse has syntax errors. parse tree:\n{:#?}",
parse.syntax_node()
);
reverse_fixups(&mut tt);
reverse_fixups(&mut tt, &tmap);
// the fixed-up + reversed version should be equivalent to the original input
// (but token IDs don't matter)
@ -89,48 +116,60 @@ mod tests {
#[test]
fn incomplete_field_expr_1() {
check(r#"
check(
r#"
fn foo() {
a.
}
"#, expect![[r#"
"#,
expect![[r#"
fn foo () {a . __ra_fixup}
"#]])
"#]],
)
}
#[test]
fn incomplete_field_expr_2() {
check(r#"
check(
r#"
fn foo() {
a. ;
}
"#, expect![[r#"
"#,
expect![[r#"
fn foo () {a . __ra_fixup ;}
"#]])
"#]],
)
}
#[test]
fn incomplete_field_expr_3() {
check(r#"
check(
r#"
fn foo() {
a. ;
bar();
}
"#, expect![[r#"
"#,
expect![[r#"
fn foo () {a . __ra_fixup ; bar () ;}
"#]])
"#]],
)
}
#[test]
fn field_expr_before_call() {
// another case that easily happens while typing
check(r#"
check(
r#"
fn foo() {
a.b
bar();
}
"#, expect![[r#"
fn foo () {a . b bar () ;}
"#]])
"#,
expect![[r#"
fn foo () {a . b ; bar () ;}
"#]],
)
}
}

View file

@ -31,6 +31,7 @@ pub use crate::{
syntax_bridge::{
parse_exprs_with_sep, parse_to_token_tree, syntax_node_to_token_tree,
syntax_node_to_token_tree_censored, token_tree_to_syntax_node, SyntheticToken,
SyntheticTokenId,
},
token_map::TokenMap,
};

View file

@ -1,6 +1,6 @@
//! Conversions between [`SyntaxNode`] and [`tt::TokenTree`].
use rustc_hash::{FxHashMap, FxHashSet};
use rustc_hash::FxHashMap;
use stdx::{always, non_empty_vec::NonEmptyVec};
use syntax::{
ast::{self, make::tokens::doc_comment},
@ -35,7 +35,16 @@ pub fn syntax_node_to_token_tree_censored(
(subtree, c.id_alloc.map)
}
pub type SyntheticToken = (SyntaxKind, SmolStr);
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct SyntheticTokenId(pub u32);
#[derive(Debug, Clone)]
pub struct SyntheticToken {
pub kind: SyntaxKind,
pub text: SmolStr,
pub range: TextRange,
pub id: SyntheticTokenId,
}
// The following items are what `rustc` macro can be parsed into :
// link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141
@ -153,13 +162,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
Some(it) => it,
None => break,
};
let synth_id = token.synthetic_id(&conv);
let kind = token.kind(&conv);
if kind == COMMENT {
if let Some(tokens) = conv.convert_doc_comment(&token) {
// FIXME: There has to be a better way to do this
// Add the comments token id to the converted doc string
let id = conv.id_alloc().alloc(range);
let id = conv.id_alloc().alloc(range, synth_id);
result.extend(tokens.into_iter().map(|mut tt| {
if let tt::TokenTree::Subtree(sub) = &mut tt {
if let Some(tt::TokenTree::Leaf(tt::Leaf::Literal(lit))) =
@ -174,7 +184,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
continue;
}
let tt = if kind.is_punct() && kind != UNDERSCORE {
assert_eq!(range.len(), TextSize::of('.'));
// assert_eq!(range.len(), TextSize::of('.'));
if let Some(delim) = subtree.delimiter {
let expected = match delim.kind {
@ -226,11 +236,13 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
panic!("Token from lexer must be single char: token = {:#?}", token);
}
};
tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into()
tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range, synth_id) })
.into()
} else {
macro_rules! make_leaf {
($i:ident) => {
tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text(conv) }.into()
tt::$i { id: conv.id_alloc().alloc(range, synth_id), text: token.to_text(conv) }
.into()
};
}
let leaf: tt::Leaf = match kind {
@ -245,14 +257,14 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
let apostrophe = tt::Leaf::from(tt::Punct {
char: '\'',
spacing: tt::Spacing::Joint,
id: conv.id_alloc().alloc(r),
id: conv.id_alloc().alloc(r, synth_id),
});
result.push(apostrophe.into());
let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
let ident = tt::Leaf::from(tt::Ident {
text: SmolStr::new(&token.to_text(conv)[1..]),
id: conv.id_alloc().alloc(r),
id: conv.id_alloc().alloc(r, synth_id),
});
result.push(ident.into());
continue;
@ -273,7 +285,7 @@ fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
conv.id_alloc().close_delim(entry.idx, None);
let leaf: tt::Leaf = tt::Punct {
id: conv.id_alloc().alloc(entry.open_range),
id: conv.id_alloc().alloc(entry.open_range, None),
char: match entry.subtree.delimiter.unwrap().kind {
tt::DelimiterKind::Parenthesis => '(',
tt::DelimiterKind::Brace => '{',
@ -367,11 +379,18 @@ struct TokenIdAlloc {
}
impl TokenIdAlloc {
fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
fn alloc(
&mut self,
absolute_range: TextRange,
synthetic_id: Option<SyntheticTokenId>,
) -> tt::TokenId {
let relative_range = absolute_range - self.global_offset;
let token_id = tt::TokenId(self.next_id);
self.next_id += 1;
self.map.insert(token_id, relative_range);
if let Some(id) = synthetic_id {
self.map.insert_synthetic(token_id, id);
}
token_id
}
@ -411,6 +430,8 @@ trait SrcToken<Ctx>: std::fmt::Debug {
fn to_char(&self, ctx: &Ctx) -> Option<char>;
fn to_text(&self, ctx: &Ctx) -> SmolStr;
fn synthetic_id(&self, ctx: &Ctx) -> Option<SyntheticTokenId>;
}
trait TokenConvertor: Sized {
@ -437,6 +458,10 @@ impl<'a> SrcToken<RawConvertor<'a>> for usize {
fn to_text(&self, ctx: &RawConvertor<'_>) -> SmolStr {
ctx.lexed.text(*self).into()
}
fn synthetic_id(&self, _ctx: &RawConvertor<'a>) -> Option<SyntheticTokenId> {
None
}
}
impl<'a> TokenConvertor for RawConvertor<'a> {
@ -564,13 +589,14 @@ impl SrcToken<Convertor> for SynToken {
match self {
SynToken::Ordinary(token) => token.kind(),
SynToken::Punch(token, _) => token.kind(),
SynToken::Synthetic((kind, _)) => *kind,
SynToken::Synthetic(token) => token.kind,
}
}
fn to_char(&self, _ctx: &Convertor) -> Option<char> {
match self {
SynToken::Ordinary(_) => None,
SynToken::Punch(it, i) => it.text().chars().nth((*i).into()),
SynToken::Synthetic(token) if token.text.len() == 1 => token.text.chars().next(),
SynToken::Synthetic(_) => None,
}
}
@ -578,7 +604,14 @@ impl SrcToken<Convertor> for SynToken {
match self {
SynToken::Ordinary(token) => token.text().into(),
SynToken::Punch(token, _) => token.text().into(),
SynToken::Synthetic((_, text)) => text.clone(),
SynToken::Synthetic(token) => token.text.clone(),
}
}
fn synthetic_id(&self, _ctx: &Convertor) -> Option<SyntheticTokenId> {
match self {
SynToken::Synthetic(token) => Some(token.id),
_ => None,
}
}
}

View file

@ -5,6 +5,8 @@ use std::hash::Hash;
use parser::{SyntaxKind, T};
use syntax::{TextRange, TextSize};
use crate::syntax_bridge::SyntheticTokenId;
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
enum TokenTextRange {
Token(TextRange),
@ -31,6 +33,7 @@ impl TokenTextRange {
pub struct TokenMap {
/// Maps `tt::TokenId` to the *relative* source range.
entries: Vec<(tt::TokenId, TokenTextRange)>,
pub synthetic_entries: Vec<(tt::TokenId, SyntheticTokenId)>,
}
impl TokenMap {
@ -57,6 +60,10 @@ impl TokenMap {
.filter_map(move |(_, range)| range.by_kind(kind))
}
pub fn synthetic_token_id(&self, token_id: tt::TokenId) -> Option<SyntheticTokenId> {
self.synthetic_entries.iter().find(|(tid, _)| *tid == token_id).map(|(_, id)| *id)
}
pub fn first_range_by_token(
&self,
token_id: tt::TokenId,
@ -73,6 +80,10 @@ impl TokenMap {
self.entries.push((token_id, TokenTextRange::Token(relative_range)));
}
pub(crate) fn insert_synthetic(&mut self, token_id: tt::TokenId, id: SyntheticTokenId) {
self.synthetic_entries.push((token_id, id));
}
pub(crate) fn insert_delim(
&mut self,
token_id: tt::TokenId,

View file

@ -87,6 +87,16 @@ pub struct Ident {
pub id: TokenId,
}
impl Leaf {
pub fn id(&self) -> TokenId {
match self {
Leaf::Literal(l) => l.id,
Leaf::Punct(p) => p.id,
Leaf::Ident(i) => i.id,
}
}
}
fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result {
let align = " ".repeat(level);