Make macro-system type and constructor names more uniform; more comments.

This commit is contained in:
Graydon Hoare 2012-07-27 19:14:46 -07:00
parent eabd233dcd
commit e11e90f31c
9 changed files with 230 additions and 194 deletions

View file

@ -362,33 +362,53 @@ type capture_item = @{
#[auto_serialize]
type capture_clause = @~[capture_item];
//
// When the main rust parser encounters a syntax-extension invocation, it
// parses the arguments to the invocation as a token-tree. This is a very
// loose structure, such that all sorts of different AST-fragments can
// be passed to syntax extensions using a uniform type.
//
// If the syntax extension is an MBE macro, it will attempt to match its
// LHS "matchers" against the provided token tree, and if it finds a
// match, will transcribe the RHS token tree, splicing in any captured
// early_parser::matched_nonterminals into the tt_nonterminals it finds.
//
// The RHS of an MBE macro is the only place a tt_nonterminal or tt_seq
// makes any real sense. You could write them elsewhere but nothing
// else knows what to do with them, so you'll probably get a syntax
// error.
//
#[auto_serialize]
#[doc="For macro invocations; parsing is delegated to the macro"]
enum token_tree {
tt_tok(span, token::token),
tt_delim(~[token_tree]),
tt_flat(span, token::token),
/* These only make sense for right-hand-sides of MBE macros*/
tt_dotdotdot(span, ~[token_tree], option<token::token>, bool),
tt_interpolate(span, ident)
// These only make sense for right-hand-sides of MBE macros
tt_seq(span, ~[token_tree], option<token::token>, bool),
tt_nonterminal(span, ident)
}
#[auto_serialize]
type matcher = spanned<matcher_>;
#[auto_serialize]
//
// Matchers are nodes defined-by and recognized-by the main rust parser and
// language, but they're only ever found inside syntax-extension invocations.
// They represent a small sub-language for pattern-matching token-trees, and
// are thus primarily used by the macro-defining extension itself.
// language, but they're only ever found inside syntax-extension invocations;
// indeed, the only thing that ever _activates_ the rules in the rust parser
// for parsing a matcher is a matcher looking for the 'mtcs' nonterminal
// itself. Matchers represent a small sub-language for pattern-matching
// token-trees, and are thus primarily used by the macro-defining extension
// itself.
//
// mtc_tok ===> A matcher that matches a single token,
// denoted by the token itself. So long as
// there's no $ involved.
// match_tok
// ---------
//
// A matcher that matches a single token, denoted by the token itself. So
// long as there's no $ involved.
//
//
// mtc_rep ===> A matcher that matches a sequence of
// sub-matchers, denoted various ways:
// match_seq
// ---------
//
// A matcher that matches a sequence of sub-matchers, denoted various
// possible ways:
//
// $(M)* zero or more Ms
// $(M)+ one or more Ms
@ -396,12 +416,14 @@ type matcher = spanned<matcher_>;
// $(A B C);* zero or more semi-separated 'A B C' seqs
//
//
// mtc_bb ===> A matcher that matches one of a few interesting named rust
// nonterminals, such as types, expressions, items, or raw
// token-trees. A black-box matcher on expr, for example, binds an
// expr to a given ident, and that ident can re-occur as an
// interpolation in the RHS of a macro-by-example rule. For
// example:
// match_nonterminal
// -----------------
//
// A matcher that matches one of a few interesting named rust
// nonterminals, such as types, expressions, items, or raw token-trees. A
// black-box matcher on expr, for example, binds an expr to a given ident,
// and that ident can re-occur as an interpolation in the RHS of a
// macro-by-example rule. For example:
//
// $foo:expr => 1 + $foo // interpolate an expr
// $foo:tt => $foo // interpolate a token-tree
@ -411,21 +433,25 @@ type matcher = spanned<matcher_>;
//
// As a final, horrifying aside, note that macro-by-example's input is
// also matched by one of these matchers. Holy self-referential! It is matched
// by an mtc_rep, specifically this one:
// by an match_seq, specifically this one:
//
// $( $lhs:mtcs => $rhs:tt );+
//
// If you understand that, you have closed to loop and understand the whole
// macro system. Congratulations.
//
#[auto_serialize]
type matcher = spanned<matcher_>;
#[auto_serialize]
enum matcher_ {
/* match one token */
mtc_tok(token::token),
/* match repetitions of a sequence: body, separator, zero ok?,
lo, hi position-in-match-array used: */
mtc_rep(~[matcher], option<token::token>, bool, uint, uint),
/* parse a Rust NT: name to bind, name of NT, position in match array : */
mtc_bb(ident, ident, uint)
// match one token
match_tok(token::token),
// match repetitions of a sequence: body, separator, zero ok?,
// lo, hi position-in-match-array used:
match_seq(~[matcher], option<token::token>, bool, uint, uint),
// parse a Rust NT: name to bind, name of NT, position in match array:
match_nonterminal(ident, ident, uint)
}
#[auto_serialize]

View file

@ -4,18 +4,6 @@ import diagnostic::span_handler;
import codemap::{codemap, span, expn_info, expanded_from};
import std::map::str_hash;
// Nomenclature / abbreviations in the ext modules:
//
// ms: matcher span, wraps a matcher with fake span
// mtc: matcher
// mtcs: matchers
// tt: token tree
// bt: backtrace
// cx: expansion context
// mr: macro result
//
// obsolete old-style #macro code:
//
// syntax_expander, normal, macro_defining, macro_definer,
@ -288,17 +276,18 @@ fn get_mac_body(cx: ext_ctxt, sp: span, args: ast::mac_body)
// using new syntax. This will be obsolete when #old_macros go away.
fn tt_args_to_original_flavor(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree])
-> ast::mac_arg {
import ast::{matcher, matcher_, mtc_tok, mtc_rep, mtc_bb};
import ast::{matcher, matcher_, match_tok, match_seq, match_nonterminal};
import parse::lexer::{new_tt_reader, tt_reader_as_reader, reader};
import tt::earley_parser::{parse_or_else, seq, leaf};
import tt::earley_parser::{parse_or_else, matched_seq,
matched_nonterminal};
// these spans won't matter, anyways
fn ms(m: matcher_) -> matcher {
{node: m, span: {lo: 0u, hi: 0u, expn_info: none}}
}
let argument_gram = ~[ms(mtc_rep(~[
ms(mtc_bb(@~"arg",@~"expr", 0u))
let argument_gram = ~[ms(match_seq(~[
ms(match_nonterminal(@~"arg",@~"expr", 0u))
], some(parse::token::COMMA), true, 0u, 1u))];
let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic,
@ -306,10 +295,10 @@ fn tt_args_to_original_flavor(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree])
let args =
alt parse_or_else(cx.parse_sess(), cx.cfg(), arg_reader as reader,
argument_gram).get(@~"arg") {
@seq(s, _) {
@matched_seq(s, _) {
do s.map() |lf| {
alt lf {
@leaf(parse::token::w_expr(arg)) {
@matched_nonterminal(parse::token::nt_expr(arg)) {
arg /* whew! list of exprs, here we come! */
}
_ { fail ~"badly-structured parse result"; }

View file

@ -1,7 +1,7 @@
import std::map::hashmap;
import ast::{crate, expr_, expr_mac, mac_invoc, mac_invoc_tt,
tt_delim, tt_flat, item_mac};
tt_delim, tt_tok, item_mac};
import fold::*;
import ext::base::*;
import ext::qquote::{qq_helper};

View file

@ -1,6 +1,6 @@
// Earley-like parser for macros.
import parse::token;
import parse::token::{token, EOF, to_str, whole_nt};
import parse::token::{token, EOF, to_str, nonterminal};
import parse::lexer::*; //resolve bug?
//import parse::lexer::{reader, tt_reader, tt_reader_as_reader};
import parse::parser::{parser,SOURCE_FILE};
@ -8,20 +8,22 @@ import parse::parser::{parser,SOURCE_FILE};
import parse::common::*; //resolve bug?
import parse::parse_sess;
import dvec::{dvec, extensions};
import ast::{matcher, mtc_tok, mtc_rep, mtc_bb, ident};
import ast::{matcher, match_tok, match_seq, match_nonterminal, ident};
import ast_util::mk_sp;
import std::map::{hashmap, box_str_hash};
/* This is an Earley-like parser, without support for nonterminals. This
means that there are no completer or predictor rules, and therefore no need to
store one column per token: instead, there's a set of current Earley items and
a set of next ones. Instead of NTs, we have a special case for Kleene
star. The big-O, in pathological cases, is worse than traditional Earley
parsing, but it's an easier fit for Macro-by-Example-style rules, and I think
the overhead is lower. */
/* This is an Earley-like parser, without support for in-grammar nonterminals,
onlyl calling out to the main rust parser for named nonterminals (which it
commits to fully when it hits one in a grammar). This means that there are no
completer or predictor rules, and therefore no need to store one column per
token: instead, there's a set of current Earley items and a set of next
ones. Instead of NTs, we have a special case for Kleene star. The big-O, in
pathological cases, is worse than traditional Earley parsing, but it's an
easier fit for Macro-by-Example-style rules, and I think the overhead is
lower. */
/* to avoid costly uniqueness checks, we require that `mtc_rep` always has a
/* to avoid costly uniqueness checks, we require that `match_seq` always has a
nonempty body. */
enum matcher_pos_up { /* to break a circularity */
@ -40,7 +42,7 @@ type matcher_pos = ~{
sep: option<token>,
mut idx: uint,
mut up: matcher_pos_up, // mutable for swapping only
matches: ~[dvec<@arb_depth>],
matches: ~[dvec<@named_match>],
match_lo: uint, match_hi: uint,
sp_lo: uint,
};
@ -55,9 +57,9 @@ fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos {
fn count_names(ms: &[matcher]) -> uint {
vec::foldl(0u, ms, |ct, m| {
ct + alt m.node {
mtc_tok(_) { 0u }
mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) }
mtc_bb(_,_,_) { 1u }
match_tok(_) { 0u }
match_seq(more_ms, _, _, _, _) { count_names(more_ms) }
match_nonterminal(_,_,_) { 1u }
}})
}
@ -67,9 +69,13 @@ fn initial_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
let mut match_idx_hi = 0u;
for ms.each() |elt| {
alt elt.node {
mtc_tok(_) {}
mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic...
mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest
match_tok(_) {}
match_seq(_,_,_,_,hi) {
match_idx_hi = hi; // it is monotonic...
}
match_nonterminal(_,_,pos) {
match_idx_hi = pos+1u; // ...so latest is highest
}
}
}
~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none),
@ -77,38 +83,42 @@ fn initial_matcher_pos(ms: ~[matcher], sep: option<token>, lo: uint)
match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo}
}
// arb_depth is a pattern-match result for a single black-box matcher
// (ast::mtc_bb): so it is associated with a single ident in a parse, and all
// leaves in the arb_depth have the same nonterminal type (expr, item,
// etc). All the leaves in a single arb_depth correspond to a single mtc_bb in
// the ast::matcher that produced it.
// named_match is a pattern-match result for a single ast::match_nonterminal:
// so it is associated with a single ident in a parse, and all
// matched_nonterminals in the named_match have the same nonterminal type
// (expr, item, etc). All the leaves in a single named_match correspond to a
// single matcher_nonterminal in the ast::matcher that produced it.
//
// It should probably be renamed, it has more or less exact correspondence to
// ast::match nodes, and the in-memory structure of a particular arb_depth
// ast::match nodes, and the in-memory structure of a particular named_match
// represents the match that occurred when a particular subset of an
// ast::match -- those ast::matcher nodes leading to a single mtc_bb -- was
// applied to a particular token tree.
// ast::match -- those ast::matcher nodes leading to a single
// match_nonterminal -- was applied to a particular token tree.
//
// The width of each seq in the arb_depth, and the identity of the leaf nodes,
// will depend on the token tree it was applied to: each seq corresponds to a
// single mtc_rep in the originating ast::matcher. The depth of the arb_depth
// structure will therefore depend only on the nesting depth of mtc_reps in
// the originating ast::matcher it was derived from.
// The width of each matched_seq in the named_match, and the identity of the
// matched_nonterminals, will depend on the token tree it was applied to: each
// matched_seq corresponds to a single match_seq in the originating
// ast::matcher. The depth of the named_match structure will therefore depend
// only on the nesting depth of ast::match_seqs in the originating
// ast::matcher it was derived from.
enum arb_depth { leaf(whole_nt), seq(~[@arb_depth], codemap::span) }
enum named_match {
matched_seq(~[@named_match], codemap::span),
matched_nonterminal(nonterminal)
}
type earley_item = matcher_pos;
fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth])
-> hashmap<ident,@arb_depth> {
fn n_rec(p_s: parse_sess, m: matcher, res: ~[@arb_depth],
ret_val: hashmap<ident, @arb_depth>) {
fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@named_match])
-> hashmap<ident,@named_match> {
fn n_rec(p_s: parse_sess, m: matcher, res: ~[@named_match],
ret_val: hashmap<ident, @named_match>) {
alt m {
{node: mtc_tok(_), span: _} { }
{node: mtc_rep(more_ms, _, _, _, _), span: _} {
{node: match_tok(_), span: _} { }
{node: match_seq(more_ms, _, _, _, _), span: _} {
for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) };
}
{node: mtc_bb(bind_name, _, idx), span: sp} {
{node: match_nonterminal(bind_name, _, idx), span: sp} {
if ret_val.contains_key(bind_name) {
p_s.span_diagnostic.span_fatal(sp, ~"Duplicated bind name: "
+ *bind_name)
@ -117,18 +127,18 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth])
}
}
}
let ret_val = box_str_hash::<@arb_depth>();
let ret_val = box_str_hash::<@named_match>();
for ms.each() |m| { n_rec(p_s, m, res, ret_val) }
ret ret_val;
}
enum parse_result {
success(hashmap<ident, @arb_depth>),
success(hashmap<ident, @named_match>),
failure(codemap::span, ~str)
}
fn parse_or_else(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader,
ms: ~[matcher]) -> hashmap<ident, @arb_depth> {
ms: ~[matcher]) -> hashmap<ident, @named_match> {
alt parse(sess, cfg, rdr, ms) {
success(m) { m }
failure(sp, str) {
@ -182,7 +192,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
for uint::range(ei.match_lo, ei.match_hi) |idx| {
let sub = ei.matches[idx].get();
new_pos.matches[idx]
.push(@seq(sub, mk_sp(ei.sp_lo,sp.hi)));
.push(@matched_seq(sub,
mk_sp(ei.sp_lo,
sp.hi)));
}
new_pos.idx += 1u;
@ -212,20 +224,21 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
} else {
alt copy ei.elts[idx].node {
/* need to descend into sequence */
mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){
match_seq(matchers, sep, zero_ok,
match_idx_lo, match_idx_hi){
if zero_ok {
let new_ei = copy ei;
new_ei.idx += 1u;
//we specifically matched zero repeats.
for uint::range(match_idx_lo, match_idx_hi) |idx| {
new_ei.matches[idx].push(@seq(~[], sp));
new_ei.matches[idx].push(@matched_seq(~[], sp));
}
vec::push(cur_eis, new_ei);
}
let matches = vec::map(ei.matches, // fresh, same size:
|_m| dvec::<@arb_depth>());
|_m| dvec::<@named_match>());
let ei_t <- ei;
vec::push(cur_eis, ~{
elts: matchers, sep: sep, mut idx: 0u,
@ -235,8 +248,8 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
sp_lo: sp.lo
});
}
mtc_bb(_,_,_) { vec::push(bb_eis, ei) }
mtc_tok(t) {
match_nonterminal(_,_,_) { vec::push(bb_eis, ei) }
match_tok(t) {
let ei_t <- ei;
if t == tok { ei_t.idx += 1u; vec::push(next_eis, ei_t)}
}
@ -260,7 +273,7 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
|| bb_eis.len() > 1u {
let nts = str::connect(vec::map(bb_eis, |ei| {
alt ei.elts[ei.idx].node {
mtc_bb(bind,name,_) {
match_nonterminal(bind,name,_) {
#fmt["%s ('%s')", *name, *bind]
}
_ { fail; } } }), ~" or ");
@ -282,8 +295,8 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
let ei = vec::pop(bb_eis);
alt ei.elts[ei.idx].node {
mtc_bb(_, name, idx) {
ei.matches[idx].push(@leaf(
match_nonterminal(_, name, idx) {
ei.matches[idx].push(@matched_nonterminal(
parse_nt(rust_parser, *name)));
ei.idx += 1u;
}
@ -305,31 +318,31 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher])
}
}
fn parse_nt(p: parser, name: ~str) -> whole_nt {
fn parse_nt(p: parser, name: ~str) -> nonterminal {
alt name {
~"item" { alt p.parse_item(~[], ast::public) {
some(i) { token::w_item(i) }
some(i) { token::nt_item(i) }
none { p.fatal(~"expected an item keyword") }
}}
~"block" { token::w_block(p.parse_block()) }
~"stmt" { token::w_stmt(p.parse_stmt(~[])) }
~"pat" { token::w_pat(p.parse_pat()) }
~"expr" { token::w_expr(p.parse_expr()) }
~"ty" { token::w_ty(p.parse_ty(false /* no need to disambiguate*/)) }
~"block" { token::nt_block(p.parse_block()) }
~"stmt" { token::nt_stmt(p.parse_stmt(~[])) }
~"pat" { token::nt_pat(p.parse_pat()) }
~"expr" { token::nt_expr(p.parse_expr()) }
~"ty" { token::nt_ty(p.parse_ty(false /* no need to disambiguate*/)) }
// this could be handled like a token, since it is one
~"ident" { alt copy p.token {
token::IDENT(sn,b) { p.bump(); token::w_ident(sn,b) }
token::IDENT(sn,b) { p.bump(); token::nt_ident(sn,b) }
_ { p.fatal(~"expected ident, found "
+ token::to_str(*p.reader.interner(), copy p.token)) }
} }
~"path" { token::w_path(p.parse_path_with_tps(false)) }
~"path" { token::nt_path(p.parse_path_with_tps(false)) }
~"tt" {
p.quote_depth += 1u; //but in theory, non-quoted tts might be useful
let res = token::w_tt(@p.parse_token_tree());
let res = token::nt_tt(@p.parse_token_tree());
p.quote_depth -= 1u;
res
}
~"mtcs" { token::w_mtcs(p.parse_matchers()) }
~"matchers" { token::nt_matchers(p.parse_matchers()) }
_ { p.fatal(~"Unsupported builtin nonterminal parser: " + name)}
}
}

View file

@ -1,10 +1,12 @@
import base::{ext_ctxt, mac_result, mr_expr, mr_def, expr_tt};
import codemap::span;
import ast::{ident, matcher_, matcher, mtc_tok, mtc_bb, mtc_rep, tt_delim};
import ast::{ident, matcher_, matcher, match_tok,
match_nonterminal, match_seq, tt_delim};
import parse::lexer::{new_tt_reader, tt_reader_as_reader, reader};
import parse::token::{FAT_ARROW, SEMI, LBRACE, RBRACE, w_mtcs, w_tt};
import parse::token::{FAT_ARROW, SEMI, LBRACE, RBRACE, nt_matchers, nt_tt};
import parse::parser::{parser, SOURCE_FILE};
import earley_parser::{parse, success, failure, arb_depth, seq, leaf};
import earley_parser::{parse, success, failure, named_match,
matched_seq, matched_nonterminal};
import std::map::hashmap;
@ -17,10 +19,10 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
}
let argument_gram = ~[
ms(mtc_rep(~[
ms(mtc_bb(@~"lhs",@~"mtcs", 0u)),
ms(mtc_tok(FAT_ARROW)),
ms(mtc_bb(@~"rhs",@~"tt", 1u)),
ms(match_seq(~[
ms(match_nonterminal(@~"lhs",@~"matchers", 0u)),
ms(match_tok(FAT_ARROW)),
ms(match_nonterminal(@~"rhs",@~"tt", 1u)),
], some(SEMI), false, 0u, 2u))];
let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic,
@ -32,16 +34,16 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
};
let lhses = alt arguments.get(@~"lhs") {
@seq(s, sp) { s }
@matched_seq(s, sp) { s }
_ { cx.span_bug(sp, ~"wrong-structured lhs") }
};
let rhses = alt arguments.get(@~"rhs") {
@seq(s, sp) { s }
@matched_seq(s, sp) { s }
_ { cx.span_bug(sp, ~"wrong-structured rhs") }
};
fn generic_extension(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree],
lhses: ~[@arb_depth], rhses: ~[@arb_depth])
lhses: ~[@named_match], rhses: ~[@named_match])
-> mac_result {
let mut best_fail_spot = {lo: 0u, hi: 0u, expn_info: none};
let mut best_fail_msg = ~"internal error: ran no matchers";
@ -51,12 +53,12 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident,
for lhses.eachi() |i, lhs| {
alt lhs {
@leaf(w_mtcs(mtcs)) {
@matched_nonterminal(nt_matchers(mtcs)) {
let arg_rdr = new_tt_reader(s_d, itr, none, arg) as reader;
alt parse(cx.parse_sess(), cx.cfg(), arg_rdr, mtcs) {
success(m) {
let rhs = alt rhses[i] {
@leaf(w_tt(@tt)) { tt }
@matched_nonterminal(nt_tt(@tt)) { tt }
_ { cx.span_bug(sp, ~"bad thing in rhs") }
};
let trncbr = new_tt_reader(s_d, itr, some(m), ~[rhs]);

View file

@ -1,10 +1,10 @@
import util::interner::interner;
import diagnostic::span_handler;
import ast::{token_tree,tt_delim,tt_flat,tt_dotdotdot,tt_interpolate,ident};
import earley_parser::{arb_depth,seq,leaf};
import ast::{token_tree, tt_delim, tt_tok, tt_seq, tt_nonterminal,ident};
import earley_parser::{named_match, matched_seq, matched_nonterminal};
import codemap::span;
import parse::token::{EOF,ACTUALLY,IDENT,token,w_ident};
import std::map::{hashmap,box_str_hash};
import parse::token::{EOF, INTERPOLATED, IDENT, token, nt_ident};
import std::map::{hashmap, box_str_hash};
export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token;
@ -28,7 +28,7 @@ type tt_reader = @{
interner: @interner<@~str>,
mut cur: tt_frame,
/* for MBE-style macro transcription */
interpolations: std::map::hashmap<ident, @arb_depth>,
interpolations: std::map::hashmap<ident, @named_match>,
mut repeat_idx: ~[mut uint],
mut repeat_len: ~[uint],
/* cached: */
@ -37,17 +37,17 @@ type tt_reader = @{
};
/** This can do Macro-By-Example transcription. On the other hand, if
* `src` contains no `tt_dotdotdot`s and `tt_interpolate`s, `interp` can (and
* `src` contains no `tt_seq`s and `tt_nonterminal`s, `interp` can (and
* should) be none. */
fn new_tt_reader(sp_diag: span_handler, itr: @interner<@~str>,
interp: option<std::map::hashmap<ident,@arb_depth>>,
interp: option<std::map::hashmap<ident,@named_match>>,
src: ~[ast::token_tree])
-> tt_reader {
let r = @{sp_diag: sp_diag, interner: itr,
mut cur: @{readme: src, mut idx: 0u, dotdotdoted: false,
sep: none, up: tt_frame_up(option::none)},
interpolations: alt interp { /* just a convienience */
none { std::map::box_str_hash::<@arb_depth>() }
none { std::map::box_str_hash::<@named_match>() }
some(x) { x }
},
mut repeat_idx: ~[mut], mut repeat_len: ~[],
@ -79,18 +79,22 @@ pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader {
}
pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth {
pure fn red(&&ad: @arb_depth, &&idx: uint) -> @arb_depth {
pure fn lookup_cur_matched_by_matched(r: tt_reader,
start: @named_match) -> @named_match {
pure fn red(&&ad: @named_match, &&idx: uint) -> @named_match {
alt *ad {
leaf(_) { ad /* end of the line; duplicate henceforth */ }
seq(ads, _) { ads[idx] }
matched_nonterminal(_) {
// end of the line; duplicate henceforth
ad
}
matched_seq(ads, _) { ads[idx] }
}
}
vec::foldl(start, r.repeat_idx, red)
}
fn lookup_cur_ad(r: tt_reader, name: ident) -> @arb_depth {
lookup_cur_ad_by_ad(r, r.interpolations.get(name))
fn lookup_cur_matched(r: tt_reader, name: ident) -> @named_match {
lookup_cur_matched_by_matched(r, r.interpolations.get(name))
}
enum lis {
lis_unconstrained, lis_constraint(uint, ident), lis_contradiction(~str)
@ -116,15 +120,15 @@ fn lockstep_iter_size(&&t: token_tree, &&r: tt_reader) -> lis {
}
}
alt t {
tt_delim(tts) | tt_dotdotdot(_, tts, _, _) {
tt_delim(tts) | tt_seq(_, tts, _, _) {
vec::foldl(lis_unconstrained, tts, {|lis, tt|
lis_merge(lis, lockstep_iter_size(tt, r)) })
}
tt_flat(*) { lis_unconstrained }
tt_interpolate(_, name) {
alt *lookup_cur_ad(r, name) {
leaf(_) { lis_unconstrained }
seq(ads, _) { lis_constraint(ads.len(), name) }
tt_tok(*) { lis_unconstrained }
tt_nonterminal(_, name) {
alt *lookup_cur_matched(r, name) {
matched_nonterminal(_) { lis_unconstrained }
matched_seq(ads, _) { lis_constraint(ads.len(), name) }
}
}
}
@ -166,20 +170,20 @@ fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} {
}
}
loop { /* because it's easiest, this handles `tt_delim` not starting
with a `tt_flat`, even though it won't happen */
with a `tt_tok`, even though it won't happen */
alt r.cur.readme[r.cur.idx] {
tt_delim(tts) {
r.cur = @{readme: tts, mut idx: 0u, dotdotdoted: false,
sep: none, up: tt_frame_up(option::some(r.cur)) };
// if this could be 0-length, we'd need to potentially recur here
}
tt_flat(sp, tok) {
tt_tok(sp, tok) {
r.cur_span = sp; r.cur_tok = tok;
r.cur.idx += 1u;
ret ret_val;
}
tt_dotdotdot(sp, tts, sep, zerok) {
alt lockstep_iter_size(tt_dotdotdot(sp, tts, sep, zerok), r) {
tt_seq(sp, tts, sep, zerok) {
alt lockstep_iter_size(tt_seq(sp, tts, sep, zerok), r) {
lis_unconstrained {
r.sp_diag.span_fatal(
sp, /* blame macro writer */
@ -211,22 +215,22 @@ fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} {
}
}
// FIXME #2887: think about span stuff here
tt_interpolate(sp, ident) {
alt *lookup_cur_ad(r, ident) {
tt_nonterminal(sp, ident) {
alt *lookup_cur_matched(r, ident) {
/* sidestep the interpolation tricks for ident because
(a) idents can be in lots of places, so it'd be a pain
(b) we actually can, since it's a token. */
leaf(w_ident(sn,b)) {
matched_nonterminal(nt_ident(sn,b)) {
r.cur_span = sp; r.cur_tok = IDENT(sn,b);
r.cur.idx += 1u;
ret ret_val;
}
leaf(w_nt) {
r.cur_span = sp; r.cur_tok = ACTUALLY(w_nt);
matched_nonterminal(nt) {
r.cur_span = sp; r.cur_tok = INTERPOLATED(nt);
r.cur.idx += 1u;
ret ret_val;
}
seq(*) {
matched_seq(*) {
r.sp_diag.span_fatal(
copy r.cur_span, /* blame the macro writer */
#fmt["variable '%s' is still repeating at this depth",

View file

@ -86,7 +86,7 @@ impl parser_common of parser_common for parser {
fn parse_ident() -> ast::ident {
alt copy self.token {
token::IDENT(i, _) { self.bump(); ret self.get_str(i); }
token::ACTUALLY(token::w_ident(*)) { self.bug(
token::INTERPOLATED(token::nt_ident(*)) { self.bug(
~"ident interpolation not converted to real token"); }
_ { self.fatal(~"expected ident, found `"
+ token_to_str(self.reader, self.token)

View file

@ -3,7 +3,7 @@ import print::pprust::expr_to_str;
import result::result;
import either::{either, left, right};
import std::map::{hashmap, str_hash};
import token::{can_begin_expr, is_ident, is_plain_ident, ACTUALLY};
import token::{can_begin_expr, is_ident, is_plain_ident, INTERPOLATED};
import codemap::{span,fss_none};
import util::interner;
import ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec};
@ -39,15 +39,15 @@ import ast::{_mod, add, alt_check, alt_exhaustive, arg, arm, attribute,
item_ty, lit, lit_, lit_bool, lit_float, lit_int,
lit_int_unsuffixed, lit_nil, lit_str, lit_uint, local, m_const,
m_imm, m_mutbl, mac_, mac_aq, mac_ellipsis,
mac_invoc, mac_invoc_tt, mac_var, matcher,
method, mode, mt, mtc_bb, mtc_rep, mtc_tok, mul, mutability, neg,
mac_invoc, mac_invoc_tt, mac_var, matcher, match_nonterminal,
match_seq, match_tok, method, mode, mt, mul, mutability, neg,
noreturn, not, pat, pat_box, pat_enum, pat_ident, pat_lit,
pat_range, pat_rec, pat_tup, pat_uniq, pat_wild, path, private,
proto, proto_any, proto_bare, proto_block, proto_box, proto_uniq,
provided, public, pure_fn, purity, re_anon, re_named, region,
rem, required, ret_style, return_val, shl, shr, stmt, stmt_decl,
stmt_expr, stmt_semi, subtract, token_tree, trait_method,
trait_ref, tt_delim, tt_dotdotdot, tt_flat, tt_interpolate, ty,
trait_ref, tt_delim, tt_seq, tt_tok, tt_nonterminal, ty,
ty_, ty_bot, ty_box, ty_field, ty_fn, ty_infer, ty_mac,
ty_method, ty_nil, ty_param, ty_path, ty_ptr, ty_rec, ty_rptr,
ty_tup, ty_u32, ty_uniq, ty_vec, ty_fixed_length, unchecked_blk,
@ -104,14 +104,14 @@ type item_info = (ident, item_, option<~[attribute]>);
/* The expr situation is not as complex as I thought it would be.
The important thing is to make sure that lookahead doesn't balk
at ACTUALLY tokens */
macro_rules! maybe_whole_expr{
at INTERPOLATED tokens */
macro_rules! maybe_whole_expr {
{$p:expr} => { alt copy $p.token {
ACTUALLY(token::w_expr(e)) {
INTERPOLATED(token::nt_expr(e)) {
$p.bump();
ret pexpr(e);
}
ACTUALLY(token::w_path(pt)) {
INTERPOLATED(token::nt_path(pt)) {
$p.bump();
ret $p.mk_pexpr($p.span.lo, $p.span.lo,
expr_path(pt));
@ -122,7 +122,7 @@ macro_rules! maybe_whole_expr{
macro_rules! maybe_whole {
{$p:expr, $constructor:path} => { alt copy $p.token {
ACTUALLY($constructor(x)) { $p.bump(); ret x; }
INTERPOLATED($constructor(x)) { $p.bump(); ret x; }
_ {}
}}
}
@ -133,7 +133,7 @@ fn dummy() {
/* we will need this to bootstrap maybe_whole! */
#macro[[#maybe_whole_path[p],
alt p.token {
ACTUALLY(token::w_path(pt)) { p.bump(); ret pt; }
INTERPOLATED(token::nt_path(pt)) { p.bump(); ret pt; }
_ {} }]];
}
@ -1090,7 +1090,7 @@ class parser {
}
}
fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree {
fn parse_tt_tok(p: parser, delim_ok: bool) -> token_tree {
alt p.token {
token::RPAREN | token::RBRACE | token::RBRACKET
if !delim_ok {
@ -1110,14 +1110,14 @@ class parser {
seq_sep_none(),
|p| p.parse_token_tree());
let (s, z) = p.parse_sep_and_zerok();
ret tt_dotdotdot(mk_sp(sp.lo ,p.span.hi), seq.node, s, z);
ret tt_seq(mk_sp(sp.lo ,p.span.hi), seq.node, s, z);
} else {
ret tt_interpolate(sp, p.parse_ident());
ret tt_nonterminal(sp, p.parse_ident());
}
}
_ { /* ok */ }
}
let res = tt_flat(p.span, p.token);
let res = tt_tok(p.span, p.token);
p.bump();
ret res;
}
@ -1126,14 +1126,14 @@ class parser {
token::LPAREN | token::LBRACE | token::LBRACKET {
let ket = flip(self.token);
tt_delim(vec::append(
~[parse_tt_flat(self, true)],
~[parse_tt_tok(self, true)],
vec::append(
self.parse_seq_to_before_end(
ket, seq_sep_none(),
|p| p.parse_token_tree()),
~[parse_tt_flat(self, true)])))
~[parse_tt_tok(self, true)])))
}
_ { parse_tt_flat(self, false) }
_ { parse_tt_tok(self, false) }
};
}
@ -1177,17 +1177,17 @@ class parser {
self.fatal(~"repetition body must be nonempty");
}
let (sep, zerok) = self.parse_sep_and_zerok();
mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx)
match_seq(ms, sep, zerok, name_idx_lo, *name_idx)
} else {
let bound_to = self.parse_ident();
self.expect(token::COLON);
let nt_name = self.parse_ident();
let m = mtc_bb(bound_to, nt_name, *name_idx);
let m = match_nonterminal(bound_to, nt_name, *name_idx);
*name_idx += 1u;
m
}
} else {
let m = mtc_tok(self.token);
let m = match_tok(self.token);
self.bump();
m
};

View file

@ -79,7 +79,7 @@ enum token {
UNDERSCORE,
/* For interpolation */
ACTUALLY(whole_nt),
INTERPOLATED(nonterminal),
DOC_COMMENT(str_num),
EOF,
@ -87,17 +87,17 @@ enum token {
#[auto_serialize]
/// For interpolation during macro expansion.
enum whole_nt {
w_item(@ast::item),
w_block(ast::blk),
w_stmt(@ast::stmt),
w_pat( @ast::pat),
w_expr(@ast::expr),
w_ty( @ast::ty),
w_ident(str_num, bool),
w_path(@ast::path),
w_tt( @ast::token_tree), //needs @ed to break a circularity
w_mtcs(~[ast::matcher])
enum nonterminal {
nt_item(@ast::item),
nt_block(ast::blk),
nt_stmt(@ast::stmt),
nt_pat( @ast::pat),
nt_expr(@ast::expr),
nt_ty( @ast::ty),
nt_ident(str_num, bool),
nt_path(@ast::path),
nt_tt( @ast::token_tree), //needs @ed to break a circularity
nt_matchers(~[ast::matcher])
}
fn binop_to_str(o: binop) -> ~str {
@ -184,14 +184,14 @@ fn to_str(in: interner<@~str>, t: token) -> ~str {
/* Other */
DOC_COMMENT(s) { *interner::get(in, s) }
EOF { ~"<eof>" }
ACTUALLY(w_nt) {
INTERPOLATED(nt) {
~"an interpolated " +
alt w_nt {
w_item(*) { ~"item" } w_block(*) { ~"block" }
w_stmt(*) { ~"statement" } w_pat(*) { ~"pattern" }
w_expr(*) { ~"expression" } w_ty(*) { ~"type" }
w_ident(*) { ~"identifier" } w_path(*) { ~"path" }
w_tt(*) { ~"tt" } w_mtcs(*) { ~"matcher sequence" }
alt nt {
nt_item(*) { ~"item" } nt_block(*) { ~"block" }
nt_stmt(*) { ~"statement" } nt_pat(*) { ~"pattern" }
nt_expr(*) { ~"expression" } nt_ty(*) { ~"type" }
nt_ident(*) { ~"identifier" } nt_path(*) { ~"path" }
nt_tt(*) { ~"tt" } nt_matchers(*) { ~"matcher sequence" }
}
}
}
@ -219,8 +219,10 @@ pure fn can_begin_expr(t: token) -> bool {
BINOP(OR) { true } // in lambda syntax
OROR { true } // in lambda syntax
MOD_SEP { true }
ACTUALLY(w_expr(*)) | ACTUALLY(w_ident(*)) | ACTUALLY(w_block(*))
| ACTUALLY(w_path(*)) { true }
INTERPOLATED(nt_expr(*))
| INTERPOLATED(nt_ident(*))
| INTERPOLATED(nt_block(*))
| INTERPOLATED(nt_path(*)) { true }
_ { false }
}
}