rollup merge of #23872: huonw/eager-lexing

Conflicts:
	src/libsyntax/parse/lexer/mod.rs
This commit is contained in:
Alex Crichton 2015-03-31 10:16:54 -07:00
commit e3f2d45cb3
5 changed files with 76 additions and 16 deletions

View file

@ -620,8 +620,8 @@ impl<'a> StringReader<'a> {
let base = 10;
// find the integer representing the name
self.scan_digits(base);
let encoded_name: u32 = self.with_str_from(start_bpos, |s| {
self.scan_digits(base, base);
let encoded_name : u32 = self.with_str_from(start_bpos, |s| {
u32::from_str_radix(s, 10).unwrap_or_else(|_| {
panic!("expected digits representing a name, got {:?}, {}, range [{:?},{:?}]",
s, whence, start_bpos, self.last_pos);
@ -638,7 +638,7 @@ impl<'a> StringReader<'a> {
// find the integer representing the ctxt
let start_bpos = self.last_pos;
self.scan_digits(base);
self.scan_digits(base, base);
let encoded_ctxt : ast::SyntaxContext = self.with_str_from(start_bpos, |s| {
u32::from_str_radix(s, 10).unwrap_or_else(|_| {
panic!("expected digits representing a ctxt, got {:?}, {}", s, whence);
@ -652,16 +652,28 @@ impl<'a> StringReader<'a> {
ctxt: encoded_ctxt, }
}
/// Scan through any digits (base `radix`) or underscores, and return how
/// many digits there were.
fn scan_digits(&mut self, radix: u32) -> usize {
/// Scan through any digits (base `scan_radix`) or underscores,
/// and return how many digits there were.
///
/// `real_radix` represents the true radix of the number we're
/// interested in, and errors will be emitted for any digits
/// between `real_radix` and `scan_radix`.
fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
assert!(real_radix <= scan_radix);
let mut len = 0;
loop {
let c = self.curr;
if c == Some('_') { debug!("skipping a _"); self.bump(); continue; }
match c.and_then(|cc| cc.to_digit(radix)) {
match c.and_then(|cc| cc.to_digit(scan_radix)) {
Some(_) => {
debug!("{:?} in scan_digits", c);
// check that the hypothetical digit is actually
// in range for the true radix
if c.unwrap().to_digit(real_radix).is_none() {
self.err_span_(self.last_pos, self.pos,
&format!("invalid digit for a base {} literal",
real_radix));
}
len += 1;
self.bump();
}
@ -680,11 +692,11 @@ impl<'a> StringReader<'a> {
if c == '0' {
match self.curr.unwrap_or('\0') {
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2); }
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8); }
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16); }
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); }
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); }
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); }
'0'...'9' | '_' | '.' => {
num_digits = self.scan_digits(10) + 1;
num_digits = self.scan_digits(10, 10) + 1;
}
_ => {
// just a 0
@ -692,7 +704,7 @@ impl<'a> StringReader<'a> {
}
}
} else if c.is_digit(10) {
num_digits = self.scan_digits(10) + 1;
num_digits = self.scan_digits(10, 10) + 1;
} else {
num_digits = 0;
}
@ -711,7 +723,7 @@ impl<'a> StringReader<'a> {
// with a number
self.bump();
if self.curr.unwrap_or('\0').is_digit(10) {
self.scan_digits(10);
self.scan_digits(10, 10);
self.scan_float_exponent();
}
let last_pos = self.last_pos;
@ -934,7 +946,7 @@ impl<'a> StringReader<'a> {
if self.curr_is('-') || self.curr_is('+') {
self.bump();
}
if self.scan_digits(10) == 0 {
if self.scan_digits(10, 10) == 0 {
self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent")
}
}

View file

@ -736,7 +736,20 @@ pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) ->
let res = match u64::from_str_radix(s, base).ok() {
Some(r) => r,
None => { sd.span_err(sp, "int literal is too large"); 0 }
None => {
// small bases are lexed as if they were base 10, e.g, the string
// might be `0b10201`. This will cause the conversion above to fail,
// but these cases have errors in the lexer: we don't want to emit
// two errors, and we especially don't want to emit this error since
// it isn't necessarily true.
let already_errored = base < 10 &&
s.chars().any(|c| c.to_digit(10).map_or(false, |d| d >= base));
if !already_errored {
sd.span_err(sp, "int literal is too large");
}
0
}
};
// adjust the sign

View file

@ -10,5 +10,5 @@
// error-pattern:no valid digits found for number
fn main() {
log(error, 0b42);
log(error, 0b);
}

View file

@ -0,0 +1,21 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
fn main() {
0b121; //~ ERROR invalid digit for a base 2 literal
0b10_10301; //~ ERROR invalid digit for a base 2 literal
0b30; //~ ERROR invalid digit for a base 2 literal
0b41; //~ ERROR invalid digit for a base 2 literal
0b5; //~ ERROR invalid digit for a base 2 literal
0b6; //~ ERROR invalid digit for a base 2 literal
0b7; //~ ERROR invalid digit for a base 2 literal
0b8; //~ ERROR invalid digit for a base 2 literal
0b9; //~ ERROR invalid digit for a base 2 literal
}

View file

@ -0,0 +1,14 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
fn main() {
0o18; //~ ERROR invalid digit for a base 8 literal
0o1234_9_5670; //~ ERROR invalid digit for a base 8 literal
}