From 047e3c45b3b502c63e4323343897cc777d698b6d Mon Sep 17 00:00:00 2001 From: Paul Stansifer Date: Fri, 15 Jun 2012 09:32:17 -0700 Subject: [PATCH] Lexers now emit spans, not chposes. --- src/libsyntax/ast.rs | 2 +- src/libsyntax/parse/comments.rs | 22 ++++++++------ src/libsyntax/parse/eval.rs | 4 +-- src/libsyntax/parse/lexer.rs | 51 +++++++++++++++++++++------------ src/libsyntax/parse/parser.rs | 31 +++++++++----------- 5 files changed, 62 insertions(+), 48 deletions(-) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index ce674f53662..5f2a5b2accb 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -377,7 +377,7 @@ enum blk_sort { enum token_tree { /* for macro invocations; parsing is the macro's job */ tt_delim([token_tree]), - tt_flat(uint, token::token) + tt_flat(span, token::token) } #[auto_serialize] diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 53a6238d57f..69aa4775e43 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -1,7 +1,7 @@ import io::reader_util; import io::println;//XXXXXXXXxxx import util::interner; -import lexer::{ string_reader, bump, is_eof, nextch, new_string_reader, +import lexer::{ string_reader, bump, is_eof, nextch, is_whitespace, get_str_from, string_reader_as_reader }; export cmnt; @@ -176,8 +176,9 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, {|x|str::hash(*x)}, {|x,y|str::eq(*x, *y)} ); - let rdr = new_string_reader(span_diagnostic, - codemap::new_filemap(path, src, 0u, 0u), itr); + let rdr = lexer::new_low_level_string_reader + (span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr); + let mut comments: [cmnt] = []; let mut literals: [lit] = []; let mut first_read: bool = true; @@ -195,14 +196,17 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, } break; } - let bpos = rdr.pos; - let tok = rdr.next_token(); - if token::is_lit(tok.tok) { - let s = get_str_from(rdr, bpos); - literals += [{lit: s, pos: tok.chpos}]; + + + let bstart = rdr.pos; + //discard, and look ahead; we're working with internal state + let {tok: tok, sp: sp} = rdr.next_token(); + if token::is_lit(tok) { + let s = get_str_from(rdr, bstart); + literals += [{lit: s, pos: sp.lo}]; log(debug, "tok lit: " + s); } else { - log(debug, "tok: " + token::to_str(*rdr.interner, tok.tok)); + log(debug, "tok: " + token::to_str(*rdr.interner, tok)); } first_read = false; } diff --git a/src/libsyntax/parse/eval.rs b/src/libsyntax/parse/eval.rs index 5ca9b22524b..5ef8417ec6f 100644 --- a/src/libsyntax/parse/eval.rs +++ b/src/libsyntax/parse/eval.rs @@ -68,7 +68,7 @@ fn parse_companion_mod(cx: ctx, prefix: str, suffix: option) modpath, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); let m0 = p0.parse_mod_items(token::EOF, inner_attrs.next); - cx.sess.chpos = p0.reader.chpos(); + cx.sess.chpos = r0.chpos; cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; ret (m0.view_items, m0.items, inner_attrs.inner); } else { @@ -106,7 +106,7 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: str, /* FIXME: bad */ copy id, ast::item_mod(m0), ast::public, mod_attrs); // Thread defids, chpos and byte_pos through the parsers - cx.sess.chpos = p0.reader.chpos(); + cx.sess.chpos = r0.chpos; cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; items += [i]; } diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 8849da5d270..14cb8b41473 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -2,17 +2,17 @@ import util::interner; import util::interner::intern; import diagnostic; import ast::{tt_delim,tt_flat}; +import codemap::span; export reader, string_reader, new_string_reader, is_whitespace; export tt_reader, new_tt_reader, dup_tt_reader; -export nextch, is_eof, bump, get_str_from; +export nextch, is_eof, bump, get_str_from, new_low_level_string_reader; export string_reader_as_reader, tt_reader_as_reader; iface reader { fn is_eof() -> bool; - fn next_token() -> {tok: token::token, chpos: uint}; + fn next_token() -> {tok: token::token, sp: span}; fn fatal(str) -> !; - fn chpos() -> uint; fn interner() -> @interner::interner<@str>; } @@ -33,7 +33,7 @@ type tt_reader = ~{ mut cur: tt_frame, /* cached: */ mut cur_tok: token::token, - mut cur_chpos: uint + mut cur_span: span }; fn new_tt_reader(span_diagnostic: diagnostic::span_handler, @@ -42,10 +42,11 @@ fn new_tt_reader(span_diagnostic: diagnostic::span_handler, let r = ~{span_diagnostic: span_diagnostic, interner: itr, mut cur: @{readme: src, mut idx: 0u, up: tt_frame_up(option::none)}, - mut cur_tok: token::EOF, /* dummy value, never read */ - mut cur_chpos: 0u /* dummy value, never read */ + /* dummy values, never read: */ + mut cur_tok: token::EOF, + mut cur_span: ast_util::mk_sp(0u,0u) }; - tt_next_token(r); /* get cur_tok and cur_chpos set up */ + tt_next_token(r); /* get cur_tok and cur_span set up */ ret r; } @@ -63,7 +64,7 @@ pure fn dup_tt_frame(&&f: tt_frame) -> tt_frame { pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader { ~{span_diagnostic: r.span_diagnostic, interner: r.interner, mut cur: dup_tt_frame(r.cur), - mut cur_tok: r.cur_tok, mut cur_chpos: r.cur_chpos} + mut cur_tok: r.cur_tok, mut cur_span: r.cur_span} } type string_reader = @{ @@ -80,6 +81,15 @@ type string_reader = @{ fn new_string_reader(span_diagnostic: diagnostic::span_handler, filemap: codemap::filemap, itr: @interner::interner<@str>) -> string_reader { + let r = new_low_level_string_reader(span_diagnostic, filemap, itr); + ret r; +} + +/* For comments.rs, which hackily pokes into 'pos' and 'curr' */ +fn new_low_level_string_reader(span_diagnostic: diagnostic::span_handler, + filemap: codemap::filemap, + itr: @interner::interner<@str>) + -> string_reader { let r = @{span_diagnostic: span_diagnostic, src: filemap.src, mut col: 0u, mut pos: 0u, mut curr: -1 as char, mut chpos: filemap.start_pos.ch, @@ -94,7 +104,7 @@ fn new_string_reader(span_diagnostic: diagnostic::span_handler, impl string_reader_as_reader of reader for string_reader { fn is_eof() -> bool { is_eof(self) } - fn next_token() -> {tok: token::token, chpos: uint} { + fn next_token() -> {tok: token::token, sp: span} { consume_whitespace_and_comments(self); let start_chpos = self.chpos; let tok = if is_eof(self) { @@ -102,19 +112,18 @@ impl string_reader_as_reader of reader for string_reader { } else { next_token_inner(self) }; - ret {tok: tok, chpos: start_chpos}; + ret {tok: tok, sp: ast_util::mk_sp(start_chpos, self.chpos)}; } fn fatal(m: str) -> ! { self.span_diagnostic.span_fatal( ast_util::mk_sp(self.chpos, self.chpos), m) } - fn chpos() -> uint { self.chpos } fn interner() -> @interner::interner<@str> { self.interner } } impl tt_reader_as_reader of reader for tt_reader { fn is_eof() -> bool { self.cur_tok == token::EOF } - fn next_token() -> {tok: token::token, chpos: uint} { + fn next_token() -> {tok: token::token, sp: span} { /* weird resolve bug: if the following `if`, or any of its statements are removed, we get resolution errors */ if false { @@ -124,15 +133,19 @@ impl tt_reader_as_reader of reader for tt_reader { tt_next_token(self) } fn fatal(m: str) -> ! { - self.span_diagnostic.span_fatal( - ast_util::mk_sp(self.chpos(), self.chpos()), m); + self.span_diagnostic.span_fatal(copy self.cur_span, m); } - fn chpos() -> uint { self.cur_chpos } fn interner() -> @interner::interner<@str> { self.interner } } -fn tt_next_token(&&r: tt_reader) -> {tok: token::token, chpos: uint} { - let ret_val = { tok: r.cur_tok, chpos: r.cur_chpos }; +fn string_advance_token(&&r: string_reader) { + consume_whitespace_and_comments(r); + + next_token_inner(r); +} + +fn tt_next_token(&&r: tt_reader) -> {tok: token::token, sp: span} { + let ret_val = { tok: r.cur_tok, sp: r.cur_span }; if r.cur.idx >= vec::len(r.cur.readme) { /* done with this set; pop */ alt r.cur.up { @@ -158,8 +171,8 @@ fn tt_next_token(&&r: tt_reader) -> {tok: token::token, chpos: uint} { r.cur = @{readme: tts, mut idx: 0u, up: tt_frame_up(option::some(copy r.cur)) }; } - tt_flat(chpos, tok) { - r.cur_chpos = chpos; r.cur_tok = tok; + tt_flat(sp, tok) { + r.cur_span = sp; r.cur_tok = tok; r.cur.idx += 1u; ret ret_val; } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index f2481e4adda..08eaa779291 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -63,7 +63,7 @@ class parser { let mut token: token::token; let mut span: span; let mut last_span: span; - let mut buffer: [mut {tok: token::token, span: span}]/4; + let mut buffer: [mut {tok: token::token, sp: span}]/4; let mut buffer_start: int; let mut buffer_end: int; let mut restriction: restriction; @@ -75,7 +75,7 @@ class parser { { self.reader <- rdr; let tok0 = self.reader.next_token(); - let span0 = ast_util::mk_sp(tok0.chpos, self.reader.chpos()); + let span0 = tok0.sp; self.sess = sess; self.cfg = cfg; self.file_type = ftype; @@ -83,10 +83,10 @@ class parser { self.span = span0; self.last_span = span0; self.buffer = [mut - {tok: tok0.tok, span: span0}, - {tok: tok0.tok, span: span0}, - {tok: tok0.tok, span: span0}, - {tok: tok0.tok, span: span0} + {tok: tok0.tok, sp: span0}, + {tok: tok0.tok, sp: span0}, + {tok: tok0.tok, sp: span0}, + {tok: tok0.tok, sp: span0} ]/4; self.buffer_start = 0; self.buffer_end = 0; @@ -100,16 +100,15 @@ class parser { fn bump() { self.last_span = self.span; - if self.buffer_start == self.buffer_end { - let next = self.reader.next_token(); - self.token = next.tok; - self.span = mk_sp(next.chpos, self.reader.chpos()); + let next = if self.buffer_start == self.buffer_end { + self.reader.next_token() } else { let next = self.buffer[self.buffer_start]; self.buffer_start = (self.buffer_start + 1) & 3; - self.token = next.tok; - self.span = next.span; - } + next + }; + self.token = next.tok; + self.span = next.sp; } fn swap(next: token::token, lo: uint, hi: uint) { self.token = next; @@ -124,9 +123,7 @@ class parser { fn look_ahead(distance: uint) -> token::token { let dist = distance as int; while self.buffer_length() < dist { - let next = self.reader.next_token(); - let sp = mk_sp(next.chpos, self.reader.chpos()); - self.buffer[self.buffer_end] = {tok: next.tok, span: sp}; + self.buffer[self.buffer_end] = self.reader.next_token(); self.buffer_end = (self.buffer_end + 1) & 3; } ret copy self.buffer[(self.buffer_start + dist - 1) & 3].tok; @@ -1082,7 +1079,7 @@ class parser { } _ { /* ok */ } } - let res = tt_flat(p.span.lo, p.token); + let res = tt_flat(p.span, p.token); p.bump(); ret res; }