diff --git a/src/comp/front/lexer.rs b/src/comp/front/lexer.rs index a17dd514951..2e5304e72a2 100644 --- a/src/comp/front/lexer.rs +++ b/src/comp/front/lexer.rs @@ -18,6 +18,8 @@ state type reader = state obj { fn bump(); fn mark(); fn get_mark_chpos() -> uint; + fn add_str(str) -> token.str_num; + fn get_str(token.str_num) -> str; fn get_chpos() -> uint; fn get_keywords() -> hashmap[str,token.token]; fn get_reserved() -> hashmap[str,()]; @@ -32,6 +34,7 @@ fn new_reader(IO.reader rdr, str filename, codemap.filemap filemap) mutable char ch, mutable uint mark_chpos, mutable uint chpos, + mutable vec[str] strs, hashmap[str,token.token] keywords, hashmap[str,()] reserved, codemap.filemap fm) { @@ -79,6 +82,15 @@ fn new_reader(IO.reader rdr, str filename, codemap.filemap filemap) ret keywords; } + fn add_str(str s) -> token.str_num { + strs += vec(s); + ret Vec.len[str](strs) - 1u; + } + + fn get_str(token.str_num i) -> str { + ret strs.(i); + } + fn get_reserved() -> hashmap[str,()] { ret reserved; } @@ -88,9 +100,10 @@ fn new_reader(IO.reader rdr, str filename, codemap.filemap filemap) } } auto file = Str.unsafe_from_bytes(rdr.read_whole_stream()); + let vec[str] strs = vec(); auto rd = reader(file, Str.byte_len(file), 0u, -1 as char, filemap.start_pos, filemap.start_pos, - keyword_table(), + strs, keyword_table(), reserved_word_table(), filemap); rd.init(); @@ -500,25 +513,25 @@ fn scan_number(char c, reader rdr) -> token.token { if (c == '3' && n == '2') { rdr.bump(); rdr.bump(); ret token.LIT_MACH_FLOAT(util.common.ty_f32, - float_str); + rdr.add_str(float_str)); } else if (c == '6' && n == '4') { rdr.bump(); rdr.bump(); ret token.LIT_MACH_FLOAT(util.common.ty_f64, - float_str); + rdr.add_str(float_str)); /* FIXME: if this is out of range for either a 32-bit or 64-bit float, it won't be noticed till the back-end */ } } else { - ret token.LIT_FLOAT(float_str); + ret token.LIT_FLOAT(rdr.add_str(float_str)); } } auto maybe_exponent = scan_exponent(rdr); alt(maybe_exponent) { case(some[str](?s)) { - ret token.LIT_FLOAT(dec_str + s); + ret token.LIT_FLOAT(rdr.add_str(dec_str + s)); } case(none[str]) { ret token.LIT_INT(accum_int); @@ -594,7 +607,7 @@ fn next_token(reader rdr) -> token.token { fail; } - ret token.IDENT(accum_str); + ret token.IDENT(rdr.add_str(accum_str)); } if (is_dec_digit(c)) { @@ -786,7 +799,7 @@ fn next_token(reader rdr) -> token.token { rdr.bump(); } rdr.bump(); - ret token.LIT_STR(accum_str); + ret token.LIT_STR(rdr.add_str(accum_str)); } case ('-') { diff --git a/src/comp/front/parser.rs b/src/comp/front/parser.rs index bcde167157e..570b09896ff 100644 --- a/src/comp/front/parser.rs +++ b/src/comp/front/parser.rs @@ -38,6 +38,8 @@ state type parser = fn next_def_id() -> ast.def_id; fn set_def(ast.def_num); fn get_prec_table() -> vec[op_spec]; + fn get_str(token.str_num) -> str; + fn get_reader() -> lexer.reader; fn get_filemap() -> codemap.filemap; fn get_chpos() -> uint; }; @@ -111,6 +113,14 @@ fn new_parser(session.session sess, ret precs; } + fn get_str(token.str_num i) -> str { + ret rdr.get_str(i); + } + + fn get_reader() -> lexer.reader { + ret rdr; + } + fn get_filemap() -> codemap.filemap { ret rdr.get_filemap(); } @@ -135,7 +145,7 @@ fn new_parser(session.session sess, fn unexpected(parser p, token.token t) { let str s = "unexpected token: "; - s += token.to_str(t); + s += token.to_str(p.get_reader(), t); p.err(s); } @@ -144,9 +154,9 @@ fn expect(parser p, token.token t) { p.bump(); } else { let str s = "expecting "; - s += token.to_str(t); + s += token.to_str(p.get_reader(), t); s += ", found "; - s += token.to_str(p.peek()); + s += token.to_str(p.get_reader(), p.peek()); p.err(s); } } @@ -157,7 +167,7 @@ fn spanned[T](uint lo, uint hi, &T node) -> ast.spanned[T] { fn parse_ident(parser p) -> ast.ident { alt (p.peek()) { - case (token.IDENT(?i)) { p.bump(); ret i; } + case (token.IDENT(?i)) { p.bump(); ret p.get_str(i); } case (_) { p.err("expecting ident"); fail; @@ -173,10 +183,10 @@ fn parse_ident(parser p) -> ast.ident { */ fn parse_str_lit_or_env_ident(parser p) -> ast.ident { alt (p.peek()) { - case (token.LIT_STR(?s)) { p.bump(); ret s; } + case (token.LIT_STR(?s)) { p.bump(); ret p.get_str(s); } case (token.IDENT(?i)) { auto v = eval.lookup(p.get_session(), p.get_env(), - p.get_span(), i); + p.get_span(), p.get_str(i)); if (!eval.val_is_str(v)) { p.err("expecting string-valued variable"); } @@ -549,7 +559,7 @@ fn parse_lit(parser p) -> ast.lit { } case (token.LIT_FLOAT(?s)) { p.bump(); - lit = ast.lit_float(s); + lit = ast.lit_float(p.get_str(s)); } case (token.LIT_MACH_INT(?tm, ?i)) { p.bump(); @@ -557,7 +567,7 @@ fn parse_lit(parser p) -> ast.lit { } case (token.LIT_MACH_FLOAT(?tm, ?s)) { p.bump(); - lit = ast.lit_mach_float(tm, s); + lit = ast.lit_mach_float(tm, p.get_str(s)); } case (token.LIT_CHAR(?c)) { p.bump(); @@ -569,7 +579,7 @@ fn parse_lit(parser p) -> ast.lit { } case (token.LIT_STR(?s)) { p.bump(); - lit = ast.lit_str(s); + lit = ast.lit_str(p.get_str(s)); } case (?t) { unexpected(p, t); @@ -617,7 +627,7 @@ fn parse_path(parser p, greed g) -> ast.path { alt (p.peek()) { case (token.IDENT(?i)) { hi = p.get_hi_pos(); - ids += vec(i); + ids += vec(p.get_str(i)); p.bump(); if (p.peek() == token.DOT) { if (g == GREEDY) { @@ -1025,7 +1035,7 @@ fn parse_dot_or_call_expr(parser p) -> @ast.expr { case (token.IDENT(?i)) { hi = p.get_hi_pos(); p.bump(); - e = extend_expr_by_ident(p, lo, hi, e, i); + e = extend_expr_by_ident(p, lo, hi, e, p.get_str(i)); } case (token.LPAREN) { @@ -1373,7 +1383,7 @@ fn parse_alt_expr(parser p) -> @ast.expr { case (token.RBRACE) { /* empty */ } case (?tok) { p.err("expected 'case' or '}' when parsing 'alt' statement " + - "but found " + token.to_str(tok)); + "but found " + token.to_str(p.get_reader(), tok)); } } } @@ -1483,16 +1493,17 @@ fn parse_pat(parser p) -> @ast.pat { case (token.IDENT(?id)) { hi = p.get_hi_pos(); p.bump(); - pat = ast.pat_bind(id, p.next_def_id(), ast.ann_none); + pat = ast.pat_bind(p.get_str(id), p.next_def_id(), + ast.ann_none); } case (?tok) { p.err("expected identifier after '?' in pattern but " + - "found " + token.to_str(tok)); + "found " + token.to_str(p.get_reader(), tok)); fail; } } } - case (token.IDENT(?id)) { + case (token.IDENT(_)) { auto tag_path = parse_path(p, GREEDY); hi = tag_path.span.hi; @@ -1723,7 +1734,7 @@ fn parse_block(parser p) -> ast.block { if (stmt_ends_with_semi(stmt)) { p.err("expected ';' or '}' after " + "expression but found " + - token.to_str(t)); + token.to_str(p.get_reader(), t)); fail; } stmts += vec(stmt); @@ -2102,13 +2113,14 @@ fn parse_item_tag(parser p) -> @ast.item { expect(p, token.SEMI); auto id = p.next_def_id(); - auto vr = rec(name=name, args=args, id=id, ann=ast.ann_none); + auto vr = rec(name=p.get_str(name), args=args, + id=id, ann=ast.ann_none); variants += vec(spanned[ast.variant_](vlo, vhi, vr)); } case (token.RBRACE) { /* empty */ } case (_) { p.err("expected name of variant or '}' but found " + - token.to_str(tok)); + token.to_str(p.get_reader(), tok)); } } } @@ -2210,7 +2222,8 @@ fn parse_item(parser p) -> @ast.item { ret parse_item_obj(p, lyr); } case (?t) { - p.err("expected item but found " + token.to_str(t)); + p.err("expected item but found " + + token.to_str(p.get_reader(), t)); } } fail; @@ -2224,7 +2237,8 @@ fn parse_meta_item(parser p) -> @ast.meta_item { case (token.LIT_STR(?s)) { auto hi = p.get_hi_pos(); p.bump(); - ret @spanned(lo, hi, rec(name = ident, value = s)); + ret @spanned(lo, hi, rec(name = ident, + value = p.get_str(s))); } case (_) { p.err("Metadata items must be string literals"); @@ -2294,9 +2308,9 @@ fn parse_rest_import_name(parser p, ast.ident first, fn parse_full_import_name(parser p, ast.ident def_ident) -> @ast.view_item { alt (p.peek()) { - case (token.IDENT(?ident)) { + case (token.IDENT(?i)) { p.bump(); - ret parse_rest_import_name(p, ident, some(def_ident)); + ret parse_rest_import_name(p, p.get_str(i), some(def_ident)); } case (_) { p.err("expecting an identifier"); @@ -2308,15 +2322,16 @@ fn parse_full_import_name(parser p, ast.ident def_ident) fn parse_import(parser p) -> @ast.view_item { expect(p, token.IMPORT); alt (p.peek()) { - case (token.IDENT(?ident)) { + case (token.IDENT(?i)) { p.bump(); alt (p.peek()) { case (token.EQ) { p.bump(); - ret parse_full_import_name(p, ident); + ret parse_full_import_name(p, p.get_str(i)); } case (_) { - ret parse_rest_import_name(p, ident, none[ast.ident]); + ret parse_rest_import_name(p, p.get_str(i), + none[ast.ident]); } } } diff --git a/src/comp/front/token.rs b/src/comp/front/token.rs index f0b6c4be352..4c2891f3263 100644 --- a/src/comp/front/token.rs +++ b/src/comp/front/token.rs @@ -5,6 +5,8 @@ import std.Int; import std.UInt; import std.Str; +type str_num = uint; + tag binop { PLUS; MINUS; @@ -127,14 +129,14 @@ tag token { LIT_INT(int); LIT_UINT(uint); LIT_MACH_INT(ty_mach, int); - LIT_FLOAT(str); - LIT_MACH_FLOAT(ty_mach, str); - LIT_STR(str); + LIT_FLOAT(str_num); + LIT_MACH_FLOAT(ty_mach, str_num); + LIT_STR(str_num); LIT_CHAR(char); LIT_BOOL(bool); /* Name components */ - IDENT(str); + IDENT(str_num); IDX(int); UNDERSCORE; @@ -168,7 +170,7 @@ tag token { PORT; TASK; - BRACEQUOTE(str); + BRACEQUOTE(str_num); EOF; } @@ -188,7 +190,7 @@ fn binop_to_str(binop o) -> str { } } -fn to_str(token t) -> str { +fn to_str(lexer.reader r, token t) -> str { alt (t) { case (EQ) { ret "="; } @@ -301,10 +303,14 @@ fn to_str(token t) -> str { ret Int.to_str(i, 10u) + "_" + ty_mach_to_str(tm); } - case (LIT_FLOAT(?s)) { ret s; } + case (LIT_MACH_FLOAT(?tm, ?s)) { + ret r.get_str(s) + "_" + ty_mach_to_str(tm); + } + + case (LIT_FLOAT(?s)) { ret r.get_str(s); } case (LIT_STR(?s)) { // FIXME: escape. - ret "\"" + s + "\""; + ret "\"" + r.get_str(s) + "\""; } case (LIT_CHAR(?c)) { // FIXME: escape. @@ -319,7 +325,11 @@ fn to_str(token t) -> str { } /* Name components */ - case (IDENT(?s)) { auto si = "ident:"; si += s; ret si; } + case (IDENT(?s)) { + auto si = "ident:"; + si += r.get_str(s); + ret si; + } case (IDX(?i)) { ret "_" + Int.to_str(i, 10u); } case (UNDERSCORE) { ret "_"; } @@ -360,7 +370,6 @@ fn to_str(token t) -> str { } - // Local Variables: // fill-column: 78; // indent-tabs-mode: nil