Expand rustc lexer to do almost-nearly-nontrivial stuff.

This commit is contained in:
Graydon Hoare 2010-08-20 11:41:34 -07:00
parent 9fc4fc6692
commit 0f224f977d
4 changed files with 201 additions and 4 deletions

View file

@ -17,7 +17,7 @@ fn write_module() {
llvm.LLVMDisposeModule(llmod);
}
fn main(vec[str] args) -> () {
fn main(vec[str] args) {
log "This is the rust 'self-hosted' compiler.";
log "The one written in rust.";
@ -30,6 +30,16 @@ fn main(vec[str] args) -> () {
auto p = parser.new_parser(filename);
log "opened file: " + filename;
auto tok = p.peek();
while (true) {
alt (tok) {
case (token.EOF()) { ret; }
case (_) {
log token.to_str(tok);
p.bump();
tok = p.peek();
}
}
}
}
i += 1;
}

View file

@ -38,8 +38,36 @@ fn next_token(stdio_reader rdr) -> token.token {
if (c == eof) { ret token.EOF(); }
if (is_alpha(c)) {
accum += (c as u8);
while (is_alpha(c)) {
accum += (c as u8);
c = rdr.getc() as char;
ret token.IDENT(accum);
}
}
if (is_dec_digit(c)) {
if (c == '0') {
} else {
while (is_dec_digit(c)) {
accum += (c as u8);
ret token.LIT_INT(0);
}
}
}
// One-byte structural symbols.
if (c == ';') { ret token.SEMI(); }
if (c == '.') { ret token.DOT(); }
if (c == '(') { ret token.LPAREN(); }
if (c == ')') { ret token.RPAREN(); }
if (c == '{') { ret token.LBRACE(); }
if (c == '}') { ret token.RBRACE(); }
if (c == '[') { ret token.LBRACKET(); }
if (c == ']') { ret token.RBRACKET(); }
if (c == '@') { ret token.AT(); }
if (c == '#') { ret token.POUND(); }
log "lexer stopping at ";
log c;
ret token.EOF();
}

View file

@ -1,4 +1,7 @@
import util.common.ty_mach;
import util.common.ty_mach_to_str;
import std._int;
import std._uint;
type op = tag
(PLUS(),
@ -107,10 +110,10 @@ type token = tag
/* Literals */
LIT_INT(int),
LIT_UINT(int),
LIT_UINT(uint),
LIT_MACH_INT(ty_mach, int),
LIT_STR(str),
LIT_CHAR(int),
LIT_CHAR(char),
LIT_BOOL(bool),
/* Name components */
@ -149,6 +152,149 @@ type token = tag
BRACEQUOTE(str),
EOF());
fn to_str(token t) -> str {
alt (t) {
case (OP(_)) { ret "<op>"; }
case (OPEQ(_)) { ret "<op>="; }
case (AS()) { ret "as"; }
case (WITH()) { ret "with"; }
/* Structural symbols */
case (AT()) { ret "@"; }
case (DOT()) { ret "."; }
case (COMMA()) { ret ","; }
case (SEMI()) { ret ";"; }
case (COLON()) { ret ":"; }
case (RARROW()) { ret "->"; }
case (SEND()) { ret "<|"; }
case (LARROW()) { ret "<-"; }
case (LPAREN()) { ret "("; }
case (RPAREN()) { ret ")"; }
case (LBRACKET()) { ret "["; }
case (RBRACKET()) { ret "]"; }
case (LBRACE()) { ret "{"; }
case (RBRACE()) { ret "}"; }
/* Module and crate keywords */
case (MOD()) { ret "mod"; }
case (USE()) { ret "use"; }
case (AUTH()) { ret "auth"; }
case (META()) { ret "meta"; }
/* Metaprogramming keywords */
case (SYNTAX()) { ret "syntax"; }
case (POUND()) { ret "#"; }
/* Statement keywords */
case (IF()) { ret "if"; }
case (ELSE()) { ret "else"; }
case (DO()) { ret "do"; }
case (WHILE()) { ret "while"; }
case (ALT()) { ret "alt"; }
case (CASE()) { ret "case"; }
case (FAIL()) { ret "fail"; }
case (DROP()) { ret "drop"; }
case (IN()) { ret "in"; }
case (FOR()) { ret "for"; }
case (EACH()) { ret "each"; }
case (PUT()) { ret "put"; }
case (RET()) { ret "ret"; }
case (BE()) { ret "be"; }
/* Type and type-state keywords */
case (TYPE()) { ret "type"; }
case (CHECK()) { ret "check"; }
case (CLAIM()) { ret "claim"; }
case (PROVE()) { ret "prove"; }
/* Effect keywords */
case (IO()) { ret "io"; }
case (STATE()) { ret "state"; }
case (UNSAFE()) { ret "unsafe"; }
/* Type qualifiers */
case (NATIVE()) { ret "native"; }
case (AUTO()) { ret "auto"; }
case (MUTABLE()) { ret "mutable"; }
/* Name management */
case (IMPORT()) { ret "import"; }
case (EXPORT()) { ret "export"; }
/* Value / stmt declarators */
case (LET()) { ret "let"; }
/* Magic runtime services */
case (LOG()) { ret "log"; }
case (SPAWN()) { ret "spawn"; }
case (BIND()) { ret "bind"; }
case (THREAD()) { ret "thread"; }
case (YIELD()) { ret "yield"; }
case (JOIN()) { ret "join"; }
/* Literals */
case (LIT_INT(i)) { ret _int.to_str(i, 10u); }
case (LIT_UINT(u)) { ret _uint.to_str(u, 10u); }
case (LIT_MACH_INT(tm, i)) {
ret _int.to_str(i, 10u)
+ "_" + ty_mach_to_str(tm);
}
case (LIT_STR(s)) {
// FIXME: escape.
ret "\"" + s + "\"";
}
case (LIT_CHAR(c)) {
// FIXME: escape and encode.
auto tmp = "";
tmp += (c as u8);
ret tmp;
}
case (LIT_BOOL(b)) {
if (b) { ret "true"; } else { ret "false"; }
}
/* Name components */
case (IDENT(s)) { ret s; }
case (IDX(i)) { ret "_" + _int.to_str(i, 10u); }
case (UNDERSCORE()) { ret "_"; }
/* Reserved type names */
case (BOOL()) { ret "bool"; }
case (INT()) { ret "int"; }
case (UINT()) { ret "uint"; }
case (FLOAT()) { ret "float"; }
case (CHAR()) { ret "char"; }
case (STR()) { ret "str"; }
case (MACH(tm)) { ret ty_mach_to_str(tm); }
/* Algebraic type constructors */
case (REC()) { ret "rec"; }
case (TUP()) { ret "tup"; }
case (TAG()) { ret "tag"; }
case (VEC()) { ret "vec"; }
case (ANY()) { ret "any"; }
/* Callable type constructors */
case (FN()) { ret "fn"; }
case (ITER()) { ret "iter"; }
/* Object type */
case (OBJ()) { ret "obj"; }
/* Comm and task types */
case (CHAN()) { ret "chan"; }
case (PORT()) { ret "port"; }
case (TASK()) { ret "task"; }
case (BRACEQUOTE(_)) { ret "<bracequote>"; }
case (EOF()) { ret "<eof>"; }
}
}
// Local Variables:

View file

@ -3,6 +3,19 @@ type ty_mach = tag( ty_i8(), ty_i16(), ty_i32(), ty_i64(),
ty_u8(), ty_u16(), ty_u32(), ty_u64(),
ty_f32(), ty_f16() );
fn ty_mach_to_str(ty_mach tm) -> str {
alt (tm) {
case (ty_u8()) { ret "u8"; }
case (ty_i8()) { ret "i8"; }
case (ty_u16()) { ret "u16"; }
case (ty_i16()) { ret "i16"; }
case (ty_u32()) { ret "u32"; }
case (ty_i32()) { ret "i32"; }
case (ty_u64()) { ret "u64"; }
case (ty_i64()) { ret "i64"; }
}
}
//
// Local Variables:
// mode: rust