Rollup merge of #65849 - popzxc:document-librustc_lexer, r=petrochenkov

librustc_lexer: Enhance documentation This PR enhances documentation state of the `librustc_lexer` (as initiative caused by [rustc-guide#474](https://github.com/rust-lang/rustc-guide/issues/474)), by adding: - Module documentation. - Doc-comments (and a bit of usual comments) in non-obvious (as for me) places. r? @petrochenkov cc @Centril
2019-10-28 04:53:09 +01:00 · 2019-10-28 04:53:09 +01:00 · 575058f3d7
commit 575058f3d7
parent 83260d5c43 993b920032
3 changed files with 280 additions and 79 deletions
--- a/src/librustc_lexer/src/cursor.rs
+++ b/src/librustc_lexer/src/cursor.rs
@ -1,5 +1,9 @@
 use std::str::Chars;
 /// Peekable iterator over a char sequence.
 ///
 /// Next characters can be peeked via `nth_char` method,
 /// and position can be shifted forward via `bump` method.
 pub(crate) struct Cursor<'a> {
    initial_len: usize,
    chars: Chars<'a>,
@ -18,7 +22,9 @@ impl<'a> Cursor<'a> {
            prev: EOF_CHAR,
        }
    }
    /// For debug assertions only
    /// Returns the last eaten symbol (or '\0' in release builds).
    pub(crate) fn prev(&self) -> char {
        #[cfg(debug_assertions)]
        {
@ -30,19 +36,30 @@ impl<'a> Cursor<'a> {
            '\0'
        }
    }
    /// Returns nth character relative to the current cursor position.
    /// If requested position doesn't exist, `EOF_CHAR` is returned.
    /// However, getting `EOF_CHAR` doesn't always mean actual end of file,
    /// it should be checked with `is_eof` method.
    pub(crate) fn nth_char(&self, n: usize) -> char {
        self.chars().nth(n).unwrap_or(EOF_CHAR)
    }
    /// Checks if there is nothing more to consume.
    pub(crate) fn is_eof(&self) -> bool {
        self.chars.as_str().is_empty()
    }
    /// Returns amount of already consumed symbols.
    pub(crate) fn len_consumed(&self) -> usize {
        self.initial_len - self.chars.as_str().len()
    }
-    /// Returns an iterator over the remaining characters.
+
    /// Returns a `Chars` iterator over the remaining characters.
    fn chars(&self) -> Chars<'a> {
        self.chars.clone()
    }
    /// Moves to the next character.
    pub(crate) fn bump(&mut self) -> Option<char> {
        let c = self.chars.next()?;
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@ -1,3 +1,16 @@
 //! Low-level Rust lexer.
 //!
 //! Tokens produced by this lexer are not yet ready for parsing the Rust syntax,
 //! for that see `libsyntax::parse::lexer`, which converts this basic token stream
 //! into wide tokens used by actual parser.
 //!
 //! The purpose of this crate is to convert raw sources into a labeled sequence
 //! of well-known token types, so building an actual Rust token stream will
 //! be easier.
 //!
 //! Main entity of this crate is [`TokenKind`] enum which represents common
 //! lexeme types.
 // We want to be able to build this crate with a stable compiler, so no
 // `#![feature]` attributes should be added.
@ -6,78 +19,144 @@ pub mod unescape;
 use crate::cursor::{Cursor, EOF_CHAR};
 /// Parsed token.
 /// It doesn't contain information about data that has been parsed,
 /// only the type of the token and its size.
 pub struct Token {
    pub kind: TokenKind,
    pub len: usize,
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TokenKind {
    LineComment,
    BlockComment { terminated: bool },
    Whitespace,
    Ident,
    RawIdent,
    Literal { kind: LiteralKind, suffix_start: usize },
    Lifetime { starts_with_number: bool },
    Semi,
    Comma,
    Dot,
    OpenParen,
    CloseParen,
    OpenBrace,
    CloseBrace,
    OpenBracket,
    CloseBracket,
    At,
    Pound,
    Tilde,
    Question,
    Colon,
    Dollar,
    Eq,
    Not,
    Lt,
    Gt,
    Minus,
    And,
    Or,
    Plus,
    Star,
    Slash,
    Caret,
    Percent,
    Unknown,
 }
 use self::TokenKind::*;
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum LiteralKind {
    Int { base: Base, empty_int: bool },
    Float { base: Base, empty_exponent: bool },
    Char { terminated: bool },
    Byte { terminated: bool },
    Str { terminated: bool },
    ByteStr { terminated: bool },
    RawStr { n_hashes: usize, started: bool, terminated: bool },
    RawByteStr { n_hashes: usize, started: bool, terminated: bool },
 }
 use self::LiteralKind::*;
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Base {
    Binary,
    Octal,
    Hexadecimal,
    Decimal,
 }
 impl Token {
    fn new(kind: TokenKind, len: usize) -> Token {
        Token { kind, len }
    }
 }
 /// Enum represening common lexeme types.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum TokenKind {
    // Multi-char tokens:
    /// "// comment"
    LineComment,
    /// "/* block comment */"
    /// Block comments can be recursive, so the sequence like "/* /* */"
    /// will not be considered terminated and will result in a parsing error.
    BlockComment { terminated: bool },
    /// Any whitespace characters sequence.
    Whitespace,
    /// "ident" or "continue"
    /// At this step keywords are also considered identifiers.
    Ident,
    /// "r#ident"
    RawIdent,
    /// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
    Literal { kind: LiteralKind, suffix_start: usize },
    /// "'a"
    Lifetime { starts_with_number: bool },
    // One-char tokens:
    /// ";"
    Semi,
    /// ","
    Comma,
    /// "."
    Dot,
    /// "("
    OpenParen,
    /// ")"
    CloseParen,
    /// "{"
    OpenBrace,
    /// "}"
    CloseBrace,
    /// "["
    OpenBracket,
    /// "]"
    CloseBracket,
    /// "@"
    At,
    /// "#"
    Pound,
    /// "~"
    Tilde,
    /// "?"
    Question,
    /// ":"
    Colon,
    /// "$"
    Dollar,
    /// "="
    Eq,
    /// "!"
    Not,
    /// "<"
    Lt,
    /// ">"
    Gt,
    /// "-"
    Minus,
    /// "&"
    And,
    /// "|"
    Or,
    /// "+"
    Plus,
    /// "*"
    Star,
    /// "/"
    Slash,
    /// "^"
    Caret,
    /// "%"
    Percent,
    /// Unknown token, not expected by the lexer, e.g. "№"
    Unknown,
 }
 use self::TokenKind::*;
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum LiteralKind {
    /// "12_u8", "0o100", "0b120i99"
    Int { base: Base, empty_int: bool },
    /// "12.34f32", "0b100.100"
    Float { base: Base, empty_exponent: bool },
    /// "'a'", "'\\'", "'''", "';"
    Char { terminated: bool },
    /// "b'a'", "b'\\'", "b'''", "b';"
    Byte { terminated: bool },
    /// ""abc"", ""abc"
    Str { terminated: bool },
    /// "b"abc"", "b"abc"
    ByteStr { terminated: bool },
    /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
    RawStr { n_hashes: usize, started: bool, terminated: bool },
    /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
    RawByteStr { n_hashes: usize, started: bool, terminated: bool },
 }
 use self::LiteralKind::*;
 /// Base of numeric literal encoding according to its prefix.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
 pub enum Base {
    /// Literal starts with "0b".
    Binary,
    /// Literal starts with "0o".
    Octal,
    /// Literal starts with "0x".
    Hexadecimal,
    /// Literal doesn't contain a prefix.
    Decimal,
 }
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
 /// but shebang isn't a part of rust syntax, so this function
 /// skips the line if it starts with a shebang ("#!").
 /// Line won't be skipped if it represents a valid Rust syntax
 /// (e.g. "#![deny(missing_docs)]").
 pub fn strip_shebang(input: &str) -> Option<usize> {
    debug_assert!(!input.is_empty());
    if !input.starts_with("#!") || input.starts_with("#![") {
@ -86,11 +165,13 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
    Some(input.find('\n').unwrap_or(input.len()))
 }
 /// Parses the first token from the provided input string.
 pub fn first_token(input: &str) -> Token {
    debug_assert!(!input.is_empty());
    Cursor::new(input).advance_token()
 }
 /// Creates an iterator that produces tokens from the input string.
 pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
    std::iter::from_fn(move || {
        if input.is_empty() {
@ -102,10 +183,9 @@ pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
    })
 }
 // See [UAX #31](http://unicode.org/reports/tr31) for definitions of these
 // classes.
 /// True if `c` is considered a whitespace according to Rust language definition.
 /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
 /// for definitions of these classes.
 pub fn is_whitespace(c: char) -> bool {
    // This is Pattern_White_Space.
    //
@ -137,6 +217,8 @@ pub fn is_whitespace(c: char) -> bool {
 }
 /// True if `c` is valid as a first character of an identifier.
 /// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
 /// a formal definition of valid identifier name.
 pub fn is_id_start(c: char) -> bool {
    // This is XID_Start OR '_' (which formally is not a XID_Start).
    // We also add fast-path for ascii idents
@ -147,6 +229,8 @@ pub fn is_id_start(c: char) -> bool {
 }
 /// True if `c` is valid as a non-first character of an identifier.
 /// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
 /// a formal definition of valid identifier name.
 pub fn is_id_continue(c: char) -> bool {
    // This is exactly XID_Continue.
    // We also add fast-path for ascii idents
@ -159,15 +243,21 @@ pub fn is_id_continue(c: char) -> bool {
 impl Cursor<'_> {
    /// Parses a token from the input string.
    fn advance_token(&mut self) -> Token {
        let first_char = self.bump().unwrap();
        let token_kind = match first_char {
            // Slash, comment or block comment.
            '/' => match self.nth_char(0) {
                '/' => self.line_comment(),
                '*' => self.block_comment(),
                _ => Slash,
            },
            // Whitespace sequence.
            c if is_whitespace(c) => self.whitespace(),
            // Raw string literal or identifier.
            'r' => match (self.nth_char(0), self.nth_char(1)) {
                ('#', c1) if is_id_start(c1) => self.raw_ident(),
                ('#', _) | ('"', _) => {
@ -181,6 +271,8 @@ impl Cursor<'_> {
                }
                _ => self.ident(),
            },
            // Byte literal, byte string literal, raw byte string literal or identifier.
            'b' => match (self.nth_char(0), self.nth_char(1)) {
                ('\'', _) => {
                    self.bump();
@ -214,13 +306,20 @@ impl Cursor<'_> {
                }
                _ => self.ident(),
            },
            // Identifier (this should be checked after other variant that can
            // start as identifier).
            c if is_id_start(c) => self.ident(),
            // Numeric literal.
            c @ '0'..='9' => {
                let literal_kind = self.number(c);
                let suffix_start = self.len_consumed();
                self.eat_literal_suffix();
                TokenKind::Literal { kind: literal_kind, suffix_start }
            }
            // One-symbol tokens.
            ';' => Semi,
            ',' => Comma,
            '.' => Dot,
@ -247,7 +346,11 @@ impl Cursor<'_> {
            '*' => Star,
            '^' => Caret,
            '%' => Percent,
            // Lifetime or character literal.
            '\'' => self.lifetime_or_char(),
            // String literal.
            '"' => {
                let terminated = self.double_quoted_string();
                let suffix_start = self.len_consumed();
@ -291,6 +394,9 @@ impl Cursor<'_> {
                    self.bump();
                    depth -= 1;
                    if depth == 0 {
                        // This block comment is closed, so for a construction like "/* */ */"
                        // there will be a successfully parsed block comment "/* */"
                        // and " */" will be processed separately.
                        break;
                    }
                }
@ -335,6 +441,7 @@ impl Cursor<'_> {
        debug_assert!('0' <= self.prev() && self.prev() <= '9');
        let mut base = Base::Decimal;
        if first_digit == '0' {
            // Attempt to parse encoding base.
            let has_digits = match self.nth_char(0) {
                'b' => {
                    base = Base::Binary;
@ -351,17 +458,21 @@ impl Cursor<'_> {
                    self.bump();
                    self.eat_hexadecimal_digits()
                }
                // Not a base prefix.
                '0'..='9' | '_' | '.' | 'e' | 'E' => {
                    self.eat_decimal_digits();
                    true
                }
-                // just a 0
+                // Just a 0.
                _ => return Int { base, empty_int: false },
            };
            // Base prefix was provided, but there were no digits
            // after it, e.g. "0x".
            if !has_digits {
                return Int { base, empty_int: true };
            }
        } else {
            // No base prefix, parse number in the usual way.
            self.eat_decimal_digits();
        };
@ -400,6 +511,9 @@ impl Cursor<'_> {
    fn lifetime_or_char(&mut self) -> TokenKind {
        debug_assert!(self.prev() == '\'');
        let mut starts_with_number = false;
        // Check if the first symbol after '\'' is a valid identifier
        // character or a number (not a digit followed by '\'').
        if (is_id_start(self.nth_char(0))
            || self.nth_char(0).is_digit(10) && {
                starts_with_number = true;
@ -408,6 +522,8 @@ impl Cursor<'_> {
            && self.nth_char(1) != '\''
        {
            self.bump();
            // Skip the identifier.
            while is_id_continue(self.nth_char(0)) {
                self.bump();
            }
@ -420,6 +536,8 @@ impl Cursor<'_> {
                Lifetime { starts_with_number }
            };
        }
        // This is not a lifetime (checked above), parse a char literal.
        let terminated = self.single_quoted_string();
        let suffix_start = self.len_consumed();
        if terminated {
@ -431,24 +549,32 @@ impl Cursor<'_> {
    fn single_quoted_string(&mut self) -> bool {
        debug_assert!(self.prev() == '\'');
-        // parse `'''` as a single char literal
+        // Parse `'''` as a single char literal.
        if self.nth_char(0) == '\'' && self.nth_char(1) == '\'' {
            self.bump();
        }
        // Parse until either quotes are terminated or error is detected.
        let mut first = true;
        loop {
            match self.nth_char(0) {
                // Probably beginning of the comment, which we don't want to include
                // to the error report.
                '/' if !first => break,
                // Newline without following '\'' means unclosed quote, stop parsing.
                '\n' if self.nth_char(1) != '\'' => break,
                // End of file, stop parsing.
                EOF_CHAR if self.is_eof() => break,
                // Quotes are terminated, finish parsing.
                '\'' => {
                    self.bump();
                    return true;
                }
                // Escaped slash is considered one character, so bump twice.
                '\\' => {
                    self.bump();
                    self.bump();
                }
                // Skip the character.
                _ => {
                    self.bump();
                }
@ -458,6 +584,8 @@ impl Cursor<'_> {
        false
    }
    /// Eats double-quoted string and returns true
    /// if string is terminated.
    fn double_quoted_string(&mut self) -> bool {
        debug_assert!(self.prev() == '"');
        loop {
@ -476,8 +604,11 @@ impl Cursor<'_> {
        }
    }
    /// Eats the double-quoted string and returns a tuple of
    /// (amount of the '#' symbols, raw string started, raw string terminated)
    fn raw_double_quoted_string(&mut self) -> (usize, bool, bool) {
        debug_assert!(self.prev() == 'r');
        // Count opening '#' symbols.
        let n_hashes = {
            let mut acc: usize = 0;
            loop {
@ -489,6 +620,8 @@ impl Cursor<'_> {
            }
        };
        // Skip the string itself and check that amount of closing '#'
        // symbols is equal to the amount of opening ones.
        loop {
            match self.bump() {
                Some('"') => {
@ -549,6 +682,7 @@ impl Cursor<'_> {
        if self.eat_decimal_digits() { Ok(()) } else { Err(()) }
    }
    // Eats the suffix if it's an identifier.
    fn eat_literal_suffix(&mut self) {
        if !is_id_start(self.nth_char(0)) {
            return;
--- a/src/librustc_lexer/src/unescape.rs
+++ b/src/librustc_lexer/src/unescape.rs
@ -7,32 +7,54 @@ use std::ops::Range;
 #[cfg(test)]
 mod tests;
 /// Errors that can occur during string unescaping.
 #[derive(Debug, PartialEq, Eq)]
 pub enum EscapeError {
    /// Expected 1 char, but 0 were found.
    ZeroChars,
    /// Expected 1 char, but more than 1 were found.
    MoreThanOneChar,
    /// Escaped '\' character without continuation.
    LoneSlash,
    /// Invalid escape characted (e.g. '\z').
    InvalidEscape,
    /// Raw '\r' encountered.
    BareCarriageReturn,
    /// Raw '\r' encountered in raw string.
    BareCarriageReturnInRawString,
    /// Unescaped character that was expected to be escaped (e.g. raw '\t').
    EscapeOnlyChar,
    /// Numeric character escape is too short (e.g. '\x1').
    TooShortHexEscape,
    /// Invalid character in numeric escape (e.g. '\xz')
    InvalidCharInHexEscape,
    /// Character code in numeric escape is non-ascii (e.g. '\xFF').
    OutOfRangeHexEscape,
    /// '\u' not followed by '{'.
    NoBraceInUnicodeEscape,
    /// Non-hexadecimal value in '\u{..}'.
    InvalidCharInUnicodeEscape,
    /// '\u{}'
    EmptyUnicodeEscape,
    /// No closing brace in '\u{..}', e.g. '\u{12'.
    UnclosedUnicodeEscape,
    /// '\u{_12}'
    LeadingUnderscoreUnicodeEscape,
    /// More than 6 charactes in '\u{..}', e.g. '\u{10FFFF_FF}'
    OverlongUnicodeEscape,
    /// Invalid in-bound unicode character code, e.g. '\u{DFFF}'.
    LoneSurrogateUnicodeEscape,
    /// Out of bounds unicode character code, e.g. '\u{FFFFFF}'.
    OutOfRangeUnicodeEscape,
    /// Unicode escape code in byte literal.
    UnicodeEscapeInByte,
    /// Non-ascii character in byte literal.
    NonAsciiCharInByte,
    /// Non-ascii character in byte string literal.
    NonAsciiCharInByteString,
 }
@ -44,15 +66,8 @@ pub fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
 }
-/// Takes a contents of a string literal (without quotes) and produces a
+/// Takes a contents of a byte literal (without quotes), and returns an
-/// sequence of escaped characters or errors.
+/// unescaped byte or an error.
 pub fn unescape_str<F>(literal_text: &str, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
    unescape_str_or_byte_str(literal_text, Mode::Str, callback)
 }
 pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
    let mut chars = literal_text.chars();
    unescape_char_or_byte(&mut chars, Mode::Byte)
@ -62,6 +77,17 @@ pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
 /// Values are returned through invoking of the provided callback.
 pub fn unescape_str<F>(literal_text: &str, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
    unescape_str_or_byte_str(literal_text, Mode::Str, callback)
 }
 /// Takes a contents of a byte string literal (without quotes) and produces a
 /// sequence of bytes or errors.
 /// Values are returned through invoking of the provided callback.
 pub fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
 where
    F: FnMut(Range<usize>, Result<u8, EscapeError>),
@ -71,8 +97,9 @@ where
    })
 }
-/// Takes a contents of a string literal (without quotes) and produces a
+/// Takes a contents of a raw string literal (without quotes) and produces a
 /// sequence of characters or errors.
 /// Values are returned through invoking of the provided callback.
 /// NOTE: Raw strings do not perform any explicit character escaping, here we
 /// only translate CRLF to LF and produce errors on bare CR.
 pub fn unescape_raw_str<F>(literal_text: &str, callback: &mut F)
@ -82,8 +109,9 @@ where
    unescape_raw_str_or_byte_str(literal_text, Mode::Str, callback)
 }
-/// Takes a contents of a string literal (without quotes) and produces a
+/// Takes a contents of a raw byte string literal (without quotes) and produces a
-/// sequence of characters or errors.
+/// sequence of bytes or errors.
 /// Values are returned through invoking of the provided callback.
 /// NOTE: Raw strings do not perform any explicit character escaping, here we
 /// only translate CRLF to LF and produce errors on bare CR.
 pub fn unescape_raw_byte_str<F>(literal_text: &str, callback: &mut F)
@ -95,6 +123,7 @@ where
    })
 }
 /// What kind of literal do we parse.
 #[derive(Debug, Clone, Copy)]
 pub enum Mode {
    Char,
@ -126,6 +155,8 @@ impl Mode {
 fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
    if first_char != '\\' {
        // Previous character was not a slash, and we don't expect it to be
        // an escape-only character.
        return match first_char {
            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
            '\r' => Err(EscapeError::BareCarriageReturn),
@ -133,6 +164,7 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
            _ => {
                if mode.is_bytes() && !first_char.is_ascii() {
                    // Byte literal can't be a non-ascii character.
                    return Err(EscapeError::NonAsciiCharInByte);
                }
                Ok(first_char)
@ -140,6 +172,8 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
        };
    }
    // Previous character is '\\', try to unescape it.
    let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
    let res = match second_char {
@ -152,6 +186,8 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
        '0' => '\0',
        'x' => {
            // Parse hexadecimal character code.
            let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
            let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
@ -160,6 +196,7 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
            let value = hi * 16 + lo;
            // For a byte literal verify that it is within ASCII range.
            if !mode.is_bytes() && !is_ascii(value) {
                return Err(EscapeError::OutOfRangeHexEscape);
            }
@ -169,10 +206,13 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
        }
        'u' => {
            // We've parsed '\u', now we have to parse '{..}'.
            if chars.next() != Some('{') {
                return Err(EscapeError::NoBraceInUnicodeEscape);
            }
            // First characrer must be a hexadecimal digit.
            let mut n_digits = 1;
            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
@ -180,6 +220,8 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
            };
            // First character is valid, now parse the rest of the number
            // and closing brace.
            loop {
                match chars.next() {
                    None => return Err(EscapeError::UnclosedUnicodeEscape),
@ -188,6 +230,9 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
                        if n_digits > 6 {
                            return Err(EscapeError::OverlongUnicodeEscape);
                        }
                        // Incorrect syntax has higher priority for error reporting
                        // than unallowed value for a literal.
                        if mode.is_bytes() {
                            return Err(EscapeError::UnicodeEscapeInByte);
                        }
@ -204,6 +249,7 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
                        let digit = c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
                        n_digits += 1;
                        if n_digits > 6 {
                            // Stop updating value since we're sure that it's is incorrect already.
                            continue;
                        }
                        let digit = digit as u32;
@ -243,6 +289,10 @@ where
                let second_char = chars.clone().next();
                match second_char {
                    Some('\n') => {
                        // Rust language specification requires us to skip whitespaces
                        // if unescaped '\' character is followed by '\n'.
                        // For details see [Rust language reference]
                        // (https://doc.rust-lang.org/reference/tokens.html#string-literals).
                        skip_ascii_whitespace(&mut chars);
                        continue;
                    }