Make the parser more careful about keywords

Keywords are now only recognized in contexts where they are valid. The
lexer no longer recognizes them, all words are lexed as IDENT tokens,
that get interpreted by the parser.
This commit is contained in:
Marijn Haverbeke 2011-05-13 21:30:08 +02:00
parent be9aa1cc5e
commit 57ffa2a487
6 changed files with 614 additions and 1154 deletions

View file

@ -22,8 +22,6 @@ state type reader = state obj {
fn add_str(str) -> token::str_num;
fn get_str(token::str_num) -> str;
fn get_chpos() -> uint;
fn get_keywords() -> hashmap[str,token::token];
fn get_reserved() -> hashmap[str,()];
fn get_filemap() -> codemap::filemap;
fn err(str m);
};
@ -39,8 +37,6 @@ fn new_reader(session sess, io::reader rdr,
mutable uint mark_chpos,
mutable uint chpos,
mutable vec[str] strs,
hashmap[str,token::token] keywords,
hashmap[str,()] reserved,
codemap::filemap fm) {
fn is_eof() -> bool {
@ -82,10 +78,6 @@ fn new_reader(session sess, io::reader rdr,
}
}
fn get_keywords() -> hashmap[str,token::token] {
ret keywords;
}
fn add_str(str s) -> token::str_num {
strs += vec(s);
ret _vec::len[str](strs) - 1u;
@ -95,10 +87,6 @@ fn new_reader(session sess, io::reader rdr,
ret strs.(i);
}
fn get_reserved() -> hashmap[str,()] {
ret reserved;
}
fn get_filemap() -> codemap::filemap {
ret fm;
}
@ -111,133 +99,11 @@ fn new_reader(session sess, io::reader rdr,
let vec[str] strs = vec();
auto rd = reader(sess, file, _str::byte_len(file), 0u, -1 as char,
filemap.start_pos, filemap.start_pos,
strs, keyword_table(),
reserved_word_table(),
filemap);
strs, filemap);
rd.init();
ret rd;
}
fn keyword_table() -> std::map::hashmap[str, token::token] {
auto keywords = new_str_hash[token::token]();
keywords.insert("mod", token::MOD);
keywords.insert("use", token::USE);
keywords.insert("meta", token::META);
keywords.insert("auth", token::AUTH);
keywords.insert("syntax", token::SYNTAX);
keywords.insert("if", token::IF);
keywords.insert("else", token::ELSE);
keywords.insert("while", token::WHILE);
keywords.insert("do", token::DO);
keywords.insert("alt", token::ALT);
keywords.insert("case", token::CASE);
keywords.insert("for", token::FOR);
keywords.insert("each", token::EACH);
keywords.insert("break", token::BREAK);
keywords.insert("cont", token::CONT);
keywords.insert("put", token::PUT);
keywords.insert("ret", token::RET);
keywords.insert("be", token::BE);
keywords.insert("fail", token::FAIL);
keywords.insert("drop", token::DROP);
keywords.insert("type", token::TYPE);
keywords.insert("check", token::CHECK);
keywords.insert("assert", token::ASSERT);
keywords.insert("claim", token::CLAIM);
keywords.insert("prove", token::PROVE);
keywords.insert("state", token::STATE);
keywords.insert("gc", token::GC);
keywords.insert("unsafe", token::UNSAFE);
keywords.insert("native", token::NATIVE);
keywords.insert("mutable", token::MUTABLE);
keywords.insert("auto", token::AUTO);
keywords.insert("fn", token::FN);
keywords.insert("pred", token::PRED);
keywords.insert("iter", token::ITER);
keywords.insert("import", token::IMPORT);
keywords.insert("export", token::EXPORT);
keywords.insert("let", token::LET);
keywords.insert("const", token::CONST);
keywords.insert("log", token::LOG);
keywords.insert("log_err", token::LOG_ERR);
keywords.insert("spawn", token::SPAWN);
keywords.insert("thread", token::THREAD);
keywords.insert("yield", token::YIELD);
keywords.insert("join", token::JOIN);
keywords.insert("bool", token::BOOL);
keywords.insert("int", token::INT);
keywords.insert("uint", token::UINT);
keywords.insert("float", token::FLOAT);
keywords.insert("char", token::CHAR);
keywords.insert("str", token::STR);
keywords.insert("rec", token::REC);
keywords.insert("tup", token::TUP);
keywords.insert("tag", token::TAG);
keywords.insert("vec", token::VEC);
keywords.insert("any", token::ANY);
keywords.insert("obj", token::OBJ);
keywords.insert("self", token::SELF);
keywords.insert("port", token::PORT);
keywords.insert("chan", token::CHAN);
keywords.insert("task", token::TASK);
keywords.insert("true", token::LIT_BOOL(true));
keywords.insert("false", token::LIT_BOOL(false));
keywords.insert("in", token::IN);
keywords.insert("as", token::AS);
keywords.insert("with", token::WITH);
keywords.insert("bind", token::BIND);
keywords.insert("u8", token::MACH(common::ty_u8));
keywords.insert("u16", token::MACH(common::ty_u16));
keywords.insert("u32", token::MACH(common::ty_u32));
keywords.insert("u64", token::MACH(common::ty_u64));
keywords.insert("i8", token::MACH(common::ty_i8));
keywords.insert("i16", token::MACH(common::ty_i16));
keywords.insert("i32", token::MACH(common::ty_i32));
keywords.insert("i64", token::MACH(common::ty_i64));
keywords.insert("f32", token::MACH(common::ty_f32));
keywords.insert("f64", token::MACH(common::ty_f64));
ret keywords;
}
fn reserved_word_table() -> std::map::hashmap[str, ()] {
auto reserved = new_str_hash[()]();
reserved.insert("f16", ()); // IEEE 754-2008 'binary16' interchange fmt
reserved.insert("f80", ()); // IEEE 754-1985 'extended'
reserved.insert("f128", ()); // IEEE 754-2008 'binary128'
reserved.insert("m32", ()); // IEEE 754-2008 'decimal32'
reserved.insert("m64", ()); // IEEE 754-2008 'decimal64'
reserved.insert("m128", ()); // IEEE 754-2008 'decimal128'
reserved.insert("dec", ()); // One of m32, m64, m128
ret reserved;
}
fn in_range(char c, char lo, char hi) -> bool {
ret lo <= c && c <= hi;
}
@ -604,17 +470,6 @@ fn next_token(reader rdr) -> token::token {
ret token::UNDERSCORE;
}
auto kwds = rdr.get_keywords();
if (kwds.contains_key(accum_str)) {
ret kwds.get(accum_str);
}
auto rsvd = rdr.get_reserved();
if (rsvd.contains_key(accum_str)) {
rdr.err(#fmt("reserved keyword: %s", accum_str));
fail;
}
ret token::IDENT(rdr.add_str(accum_str));
}

File diff suppressed because it is too large Load diff

View file

@ -38,9 +38,6 @@ tag token {
BINOP(binop);
BINOPEQ(binop);
AS;
WITH;
/* Structural symbols */
AT;
DOT;
@ -59,73 +56,8 @@ tag token {
LBRACE;
RBRACE;
/* Module and crate keywords */
MOD;
USE;
AUTH;
META;
/* Metaprogramming keywords */
SYNTAX;
POUND;
/* Statement keywords */
IF;
ELSE;
DO;
WHILE;
ALT;
CASE;
BREAK;
CONT;
FAIL;
DROP;
IN;
FOR;
EACH;
PUT;
RET;
BE;
/* Type and type-state keywords */
TYPE;
ASSERT;
CHECK;
CLAIM;
PROVE;
/* Layer keywords */
STATE;
GC;
/* Unsafe-block keyword */
UNSAFE;
/* Type qualifiers */
NATIVE;
AUTO;
MUTABLE;
/* Name management */
IMPORT;
EXPORT;
/* Value / stmt declarators */
LET;
CONST;
/* Magic runtime services */
LOG;
LOG_ERR;
SPAWN;
BIND;
THREAD;
YIELD;
JOIN;
/* Literals */
LIT_INT(int);
LIT_UINT(uint);
@ -141,36 +73,6 @@ tag token {
IDX(int);
UNDERSCORE;
/* Reserved type names */
BOOL;
INT;
UINT;
FLOAT;
CHAR;
STR;
MACH(ty_mach);
/* Algebraic type constructors */
REC;
TUP;
TAG;
VEC;
ANY;
/* Callable type constructors */
FN;
PRED;
ITER;
/* Object type and related keywords */
OBJ;
SELF;
/* Comm and task types */
CHAN;
PORT;
TASK;
BRACEQUOTE(str_num);
EOF;
}
@ -209,10 +111,6 @@ fn to_str(lexer::reader r, token t) -> str {
case (BINOP(?op)) { ret binop_to_str(op); }
case (BINOPEQ(?op)) { ret binop_to_str(op) + "="; }
case (AS) { ret "as"; }
case (WITH) { ret "with"; }
/* Structural symbols */
case (AT) { ret "@"; }
case (DOT) { ret "."; }
@ -231,73 +129,8 @@ fn to_str(lexer::reader r, token t) -> str {
case (LBRACE) { ret "{"; }
case (RBRACE) { ret "}"; }
/* Module and crate keywords */
case (MOD) { ret "mod"; }
case (USE) { ret "use"; }
case (AUTH) { ret "auth"; }
case (META) { ret "meta"; }
/* Metaprogramming keywords */
case (SYNTAX) { ret "syntax"; }
case (POUND) { ret "#"; }
/* Statement keywords */
case (IF) { ret "if"; }
case (ELSE) { ret "else"; }
case (DO) { ret "do"; }
case (WHILE) { ret "while"; }
case (ALT) { ret "alt"; }
case (CASE) { ret "case"; }
case (BREAK) { ret "break"; }
case (CONT) { ret "cont"; }
case (FAIL) { ret "fail"; }
case (DROP) { ret "drop"; }
case (IN) { ret "in"; }
case (FOR) { ret "for"; }
case (EACH) { ret "each"; }
case (PUT) { ret "put"; }
case (RET) { ret "ret"; }
case (BE) { ret "be"; }
/* Type and type-state keywords */
case (TYPE) { ret "type"; }
case (ASSERT) { ret "assert"; }
case (CHECK) { ret "check"; }
case (CLAIM) { ret "claim"; }
case (PROVE) { ret "prove"; }
/* Layer keywords */
case (STATE) { ret "state"; }
case (GC) { ret "gc"; }
/* Unsafe-block keyword */
case (UNSAFE) { ret "unsafe"; }
/* Type qualifiers */
case (NATIVE) { ret "native"; }
case (AUTO) { ret "auto"; }
case (MUTABLE) { ret "mutable"; }
/* Name management */
case (IMPORT) { ret "import"; }
case (EXPORT) { ret "export"; }
/* Value / stmt declarators */
case (LET) { ret "let"; }
case (CONST) { ret "const"; }
/* Magic runtime services */
case (LOG) { ret "log"; }
case (LOG_ERR) { ret "log_err"; }
case (SPAWN) { ret "spawn"; }
case (BIND) { ret "bind"; }
case (THREAD) { ret "thread"; }
case (YIELD) { ret "yield"; }
case (JOIN) { ret "join"; }
/* Literals */
case (LIT_INT(?i)) { ret _int::to_str(i, 10u); }
case (LIT_UINT(?u)) { ret _uint::to_str(u, 10u); }
@ -328,44 +161,11 @@ fn to_str(lexer::reader r, token t) -> str {
/* Name components */
case (IDENT(?s)) {
auto si = "ident:";
si += r.get_str(s);
ret si;
ret r.get_str(s);
}
case (IDX(?i)) { ret "_" + _int::to_str(i, 10u); }
case (UNDERSCORE) { ret "_"; }
/* Reserved type names */
case (BOOL) { ret "bool"; }
case (INT) { ret "int"; }
case (UINT) { ret "uint"; }
case (FLOAT) { ret "float"; }
case (CHAR) { ret "char"; }
case (STR) { ret "str"; }
case (MACH(?tm)) { ret ty_mach_to_str(tm); }
/* Algebraic type constructors */
case (REC) { ret "rec"; }
case (TUP) { ret "tup"; }
case (TAG) { ret "tag"; }
case (VEC) { ret "vec"; }
case (ANY) { ret "any"; }
/* Callable type constructors */
case (FN) { ret "fn"; }
case (PRED) { ret "pred"; }
case (ITER) { ret "iter"; }
/* Object type */
case (OBJ) { ret "obj"; }
case (SELF) { ret "self"; }
/* Comm and task types */
case (CHAN) { ret "chan"; }
case (PORT) { ret "port"; }
case (TASK) { ret "task"; }
case (BRACEQUOTE(_)) { ret "<bracequote>"; }
case (EOF) { ret "<eof>"; }
}

View file

@ -9,7 +9,6 @@ import util::common;
import pp::end; import pp::wrd; import pp::space; import pp::line;
const uint indent_unit = 4u;
const int as_prec = 5;
const uint default_columns = 78u;
type ps = @rec(pp::ps s,
@ -494,7 +493,7 @@ fn print_expr(ps s, &@ast::expr expr) {
print_literal(s, lit);
}
case (ast::expr_cast(?expr,?ty,_)) {
print_maybe_parens(s, expr, as_prec);
print_maybe_parens(s, expr, front::parser::as_prec);
space(s.s);
wrd1(s, "as");
print_type(s, ty);
@ -888,7 +887,7 @@ fn print_maybe_parens(ps s, @ast::expr expr, int outer_prec) {
add_them = operator_prec(op) < outer_prec;
}
case (ast::expr_cast(_,_,_)) {
add_them = as_prec < outer_prec;
add_them = front::parser::as_prec < outer_prec;
}
case (_) {
add_them = false;

View file

@ -73,7 +73,7 @@ mod lib {
mod llvm;
}
}
else {
case (_) {
let (llvm_lib = "librustllvm.so") {
mod llvm;
}

View file

@ -51,7 +51,7 @@ alt (target_os) {
} case ("macos") {
mod os = "macos_os.rs";
mod os_fs = "posix_fs.rs";
} else {
} case (_) {
mod os = "linux_os.rs";
mod os_fs = "posix_fs.rs";
}