rust/src/comp/fe/lexer.rs

import std._io.stdio_reader;

fn in_range(char c, char lo, char hi) -> bool {
    ret lo <= c && c <= hi;
}

fn is_alpha(char c) -> bool {
    ret in_range(c, 'a', 'z') ||
        in_range(c, 'A', 'Z');
}

fn is_dec_digit(char c) -> bool {
    ret in_range(c, '0', '9');
}

fn is_hex_digit(char c) -> bool {
    ret in_range(c, '0', '9') ||
        in_range(c, 'a', 'f') ||
        in_range(c, 'A', 'F');
}

fn is_bin_digit(char c) -> bool {
    ret c == '0' || c == '1';
}

fn is_whitespace(char c) -> bool {
    ret c == ' ' || c == '\t' || c == '\r' || c == '\n';
}

fn next_token(stdio_reader rdr) -> token.token {
    auto eof = (-1) as char;
    auto c = rdr.getc() as char;
    auto accum_str = "";
    auto accum_int = 0;

    while (is_whitespace(c) && c != eof) {
        c = rdr.getc() as char;
    }

    if (c == eof) { ret token.EOF(); }

    if (is_alpha(c)) {
        while (is_alpha(c)) {
            accum_str += (c as u8);
            c = rdr.getc() as char;
        }
        rdr.ungetc(c as int);
        ret token.IDENT(accum_str);
    }

    if (is_dec_digit(c)) {
        if (c == '0') {
        } else {
            while (is_dec_digit(c)) {
                accum_int *= 10;
                accum_int += (c as int) - ('0' as int);
                c = rdr.getc() as char;
            }
            rdr.ungetc(c as int);
            ret token.LIT_INT(accum_int);
        }
    }

    // One-byte structural symbols.
    alt (c) {
        case (';') { ret token.SEMI(); }
        case (',') { ret token.COMMA(); }
        case ('.') { ret token.DOT(); }
        case ('(') { ret token.LPAREN(); }
        case (')') { ret token.RPAREN(); }
        case ('{') { ret token.LBRACE(); }
        case ('}') { ret token.RBRACE(); }
        case ('[') { ret token.LBRACKET(); }
        case (']') { ret token.RBRACKET(); }
        case ('@') { ret token.AT(); }
        case ('#') { ret token.POUND(); }
        case ('=') {
            auto c2 = rdr.getc() as char;
            if (c2 == '=') {
                ret token.OP(token.EQEQ());
            } else {
                rdr.ungetc(c2 as int);
                ret token.OP(token.EQ());
            }
        }
    }

    log "lexer stopping at ";
    log c;
    ret token.EOF();
}


//
// Local Variables:
// mode: rust
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
// End:
//
Simplify lexer/parser structure to use stdio_reader. 2010-08-19 00:41:13 +02:00			`import std._io.stdio_reader;`
Sketch some not-quite-compiling code into comp/fe/lexer.rs. 2010-07-14 18:41:36 +02:00
Add some code to lexer in rustc. 2010-08-20 03:42:17 +02:00			`fn in_range(char c, char lo, char hi) -> bool {`
Fix some lexer bugs in rustc. Beginning to lex stuff now. 2010-08-20 21:12:37 +02:00			`ret lo <= c && c <= hi;`
Add some code to lexer in rustc. 2010-08-20 03:42:17 +02:00			`}`

			`fn is_alpha(char c) -> bool {`
			`ret in_range(c, 'a', 'z') \|\|`
			`in_range(c, 'A', 'Z');`
			`}`

			`fn is_dec_digit(char c) -> bool {`
			`ret in_range(c, '0', '9');`
			`}`

			`fn is_hex_digit(char c) -> bool {`
			`ret in_range(c, '0', '9') \|\|`
			`in_range(c, 'a', 'f') \|\|`
			`in_range(c, 'A', 'F');`
			`}`

			`fn is_bin_digit(char c) -> bool {`
			`ret c == '0' \|\| c == '1';`
			`}`

			`fn is_whitespace(char c) -> bool {`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`ret c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n';`
Add some code to lexer in rustc. 2010-08-20 03:42:17 +02:00			`}`

Simplify lexer/parser structure to use stdio_reader. 2010-08-19 00:41:13 +02:00			`fn next_token(stdio_reader rdr) -> token.token {`
Add some code to lexer in rustc. 2010-08-20 03:42:17 +02:00			`auto eof = (-1) as char;`
			`auto c = rdr.getc() as char;`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`auto accum_str = "";`
			`auto accum_int = 0;`
Add some code to lexer in rustc. 2010-08-20 03:42:17 +02:00
			`while (is_whitespace(c) && c != eof) {`
			`c = rdr.getc() as char;`
			`}`

			`if (c == eof) { ret token.EOF(); }`
Fix some lexer bugs in rustc. Beginning to lex stuff now. 2010-08-20 21:12:37 +02:00
Use str += u8 in rustc lexer. 2010-08-20 19:03:31 +02:00			`if (is_alpha(c)) {`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00			`while (is_alpha(c)) {`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`accum_str += (c as u8);`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00			`c = rdr.getc() as char;`
			`}`
Fix some lexer bugs in rustc. Beginning to lex stuff now. 2010-08-20 21:12:37 +02:00			`rdr.ungetc(c as int);`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`ret token.IDENT(accum_str);`
Use str += u8 in rustc lexer. 2010-08-20 19:03:31 +02:00			`}`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00
			`if (is_dec_digit(c)) {`
			`if (c == '0') {`
			`} else {`
			`while (is_dec_digit(c)) {`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`accum_int *= 10;`
			`accum_int += (c as int) - ('0' as int);`
Fix some lexer bugs in rustc. Beginning to lex stuff now. 2010-08-20 21:12:37 +02:00			`c = rdr.getc() as char;`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00			`}`
Fix some lexer bugs in rustc. Beginning to lex stuff now. 2010-08-20 21:12:37 +02:00			`rdr.ungetc(c as int);`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`ret token.LIT_INT(accum_int);`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00			`}`
			`}`

			`// One-byte structural symbols.`
Use pattern matching for the one-byte structural symbols in the self-hosted compiler 2010-08-20 23:34:48 +02:00			`alt (c) {`
Accumulate number tokens properly, handle newline, EQ and EQEQ in rustc lexer. 2010-08-21 00:36:48 +02:00			`case (';') { ret token.SEMI(); }`
			`case (',') { ret token.COMMA(); }`
			`case ('.') { ret token.DOT(); }`
			`case ('(') { ret token.LPAREN(); }`
			`case (')') { ret token.RPAREN(); }`
			`case ('{') { ret token.LBRACE(); }`
			`case ('}') { ret token.RBRACE(); }`
			`case ('[') { ret token.LBRACKET(); }`
			`case (']') { ret token.RBRACKET(); }`
			`case ('@') { ret token.AT(); }`
			`case ('#') { ret token.POUND(); }`
			`case ('=') {`
			`auto c2 = rdr.getc() as char;`
			`if (c2 == '=') {`
			`ret token.OP(token.EQEQ());`
			`} else {`
			`rdr.ungetc(c2 as int);`
			`ret token.OP(token.EQ());`
			`}`
			`}`
Use pattern matching for the one-byte structural symbols in the self-hosted compiler 2010-08-20 23:34:48 +02:00			`}`
Expand rustc lexer to do almost-nearly-nontrivial stuff. 2010-08-20 20:41:34 +02:00
			`log "lexer stopping at ";`
Simplify lexer/parser structure to use stdio_reader. 2010-08-19 00:41:13 +02:00			`log c;`
			`ret token.EOF();`
Sketch some not-quite-compiling code into comp/fe/lexer.rs. 2010-07-14 18:41:36 +02:00			`}`

Add do-nothing obj type for lexer to rustc. 2010-08-18 20:35:34 +02:00
Add modelines and related emacs chatter to rustc. 2010-08-12 19:29:23 +02:00			`//`
			`// Local Variables:`
			`// mode: rust`
			`// fill-column: 78;`
			`// indent-tabs-mode: nil`
			`// c-basic-offset: 4`
			`// buffer-file-coding-system: utf-8-unix`
			`// compile-command: "make -k -C ../.. 2>&1 \| sed -e 's/\\/x\\//x:\\//g'";`
			`// End:`
			`//`