syntax: Move comment-extraction code to its own module
This commit is contained in:
parent
4f576275be
commit
932aa893fd
4 changed files with 213 additions and 208 deletions
195
src/librustsyntax/parse/comments.rs
Normal file
195
src/librustsyntax/parse/comments.rs
Normal file
|
@ -0,0 +1,195 @@
|
|||
import io::reader_util;
|
||||
import util::interner;
|
||||
import lexer::{ reader, new_reader, next_token, is_whitespace };
|
||||
|
||||
enum cmnt_style {
|
||||
isolated, // No code on either side of each line of the comment
|
||||
trailing, // Code exists to the left of the comment
|
||||
mixed, // Code before /* foo */ and after the comment
|
||||
blank_line, // Just a manual blank line "\n\n", for layout
|
||||
}
|
||||
|
||||
type cmnt = {style: cmnt_style, lines: [str], pos: uint};
|
||||
|
||||
fn read_to_eol(rdr: reader) -> str {
|
||||
let mut val = "";
|
||||
while rdr.curr != '\n' && !rdr.is_eof() {
|
||||
str::push_char(val, rdr.curr);
|
||||
rdr.bump();
|
||||
}
|
||||
if rdr.curr == '\n' { rdr.bump(); }
|
||||
ret val;
|
||||
}
|
||||
|
||||
fn read_one_line_comment(rdr: reader) -> str {
|
||||
let val = read_to_eol(rdr);
|
||||
assert (val[0] == '/' as u8 && val[1] == '/' as u8);
|
||||
ret val;
|
||||
}
|
||||
|
||||
fn consume_non_eol_whitespace(rdr: reader) {
|
||||
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !rdr.is_eof() {
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn push_blank_line_comment(rdr: reader, &comments: [cmnt]) {
|
||||
#debug(">>> blank-line comment");
|
||||
let v: [str] = [];
|
||||
comments += [{style: blank_line, lines: v, pos: rdr.chpos}];
|
||||
}
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: reader, &comments: [cmnt]) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() {
|
||||
if rdr.col == 0u && rdr.curr == '\n' {
|
||||
push_blank_line_comment(rdr, comments);
|
||||
}
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn read_line_comments(rdr: reader, code_to_the_left: bool) -> cmnt {
|
||||
#debug(">>> line comments");
|
||||
let p = rdr.chpos;
|
||||
let mut lines: [str] = [];
|
||||
while rdr.curr == '/' && rdr.next() == '/' {
|
||||
let line = read_one_line_comment(rdr);
|
||||
log(debug, line);
|
||||
lines += [line];
|
||||
consume_non_eol_whitespace(rdr);
|
||||
}
|
||||
#debug("<<< line comments");
|
||||
ret {style: if code_to_the_left { trailing } else { isolated },
|
||||
lines: lines,
|
||||
pos: p};
|
||||
}
|
||||
|
||||
fn all_whitespace(s: str, begin: uint, end: uint) -> bool {
|
||||
let mut i: uint = begin;
|
||||
while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; }
|
||||
ret true;
|
||||
}
|
||||
|
||||
fn trim_whitespace_prefix_and_push_line(&lines: [str],
|
||||
s: str, col: uint) unsafe {
|
||||
let mut s1;
|
||||
if all_whitespace(s, 0u, col) {
|
||||
if col < str::len(s) {
|
||||
s1 = str::slice(s, col, str::len(s));
|
||||
} else { s1 = ""; }
|
||||
} else { s1 = s; }
|
||||
log(debug, "pushing line: " + s1);
|
||||
lines += [s1];
|
||||
}
|
||||
|
||||
fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
|
||||
#debug(">>> block comment");
|
||||
let p = rdr.chpos;
|
||||
let mut lines: [str] = [];
|
||||
let mut col: uint = rdr.col;
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
let mut curr_line = "/*";
|
||||
let mut level: int = 1;
|
||||
while level > 0 {
|
||||
#debug("=== block comment level %d", level);
|
||||
if rdr.is_eof() { rdr.fatal("unterminated block comment"); }
|
||||
if rdr.curr == '\n' {
|
||||
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
||||
curr_line = "";
|
||||
rdr.bump();
|
||||
} else {
|
||||
str::push_char(curr_line, rdr.curr);
|
||||
if rdr.curr == '/' && rdr.next() == '*' {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line += "*";
|
||||
level += 1;
|
||||
} else {
|
||||
if rdr.curr == '*' && rdr.next() == '/' {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line += "/";
|
||||
level -= 1;
|
||||
} else { rdr.bump(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
if str::len(curr_line) != 0u {
|
||||
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
||||
}
|
||||
let mut style = if code_to_the_left { trailing } else { isolated };
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if !rdr.is_eof() && rdr.curr != '\n' && vec::len(lines) == 1u {
|
||||
style = mixed;
|
||||
}
|
||||
#debug("<<< block comment");
|
||||
ret {style: style, lines: lines, pos: p};
|
||||
}
|
||||
|
||||
fn peeking_at_comment(rdr: reader) -> bool {
|
||||
ret rdr.curr == '/' && rdr.next() == '/' ||
|
||||
rdr.curr == '/' && rdr.next() == '*';
|
||||
}
|
||||
|
||||
fn consume_comment(rdr: reader, code_to_the_left: bool, &comments: [cmnt]) {
|
||||
#debug(">>> consume comment");
|
||||
if rdr.curr == '/' && rdr.next() == '/' {
|
||||
comments += [read_line_comments(rdr, code_to_the_left)];
|
||||
} else if rdr.curr == '/' && rdr.next() == '*' {
|
||||
comments += [read_block_comment(rdr, code_to_the_left)];
|
||||
} else { fail; }
|
||||
#debug("<<< consume comment");
|
||||
}
|
||||
|
||||
fn is_lit(t: token::token) -> bool {
|
||||
ret alt t {
|
||||
token::LIT_INT(_, _) { true }
|
||||
token::LIT_UINT(_, _) { true }
|
||||
token::LIT_FLOAT(_, _) { true }
|
||||
token::LIT_STR(_) { true }
|
||||
token::LIT_BOOL(_) { true }
|
||||
_ { false }
|
||||
}
|
||||
}
|
||||
|
||||
type lit = {lit: str, pos: uint};
|
||||
|
||||
fn gather_comments_and_literals(cm: codemap::codemap,
|
||||
span_diagnostic: diagnostic::span_handler,
|
||||
path: str,
|
||||
srdr: io::reader) ->
|
||||
{cmnts: [cmnt], lits: [lit]} {
|
||||
let src = @str::from_bytes(srdr.read_whole_stream());
|
||||
let itr = @interner::mk::<str>(str::hash, str::eq);
|
||||
let rdr = new_reader(cm, span_diagnostic,
|
||||
codemap::new_filemap(path, src, 0u, 0u), itr);
|
||||
let mut comments: [cmnt] = [];
|
||||
let mut literals: [lit] = [];
|
||||
let mut first_read: bool = true;
|
||||
while !rdr.is_eof() {
|
||||
loop {
|
||||
let mut code_to_the_left = !first_read;
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if rdr.curr == '\n' {
|
||||
code_to_the_left = false;
|
||||
consume_whitespace_counting_blank_lines(rdr, comments);
|
||||
}
|
||||
while peeking_at_comment(rdr) {
|
||||
consume_comment(rdr, code_to_the_left, comments);
|
||||
consume_whitespace_counting_blank_lines(rdr, comments);
|
||||
}
|
||||
break;
|
||||
}
|
||||
let tok = next_token(rdr);
|
||||
if is_lit(tok.tok) {
|
||||
let s = rdr.get_str_from(tok.bpos);
|
||||
literals += [{lit: s, pos: tok.chpos}];
|
||||
log(debug, "tok lit: " + s);
|
||||
} else {
|
||||
log(debug, "tok: " + token::to_str(rdr, tok.tok));
|
||||
}
|
||||
first_read = false;
|
||||
}
|
||||
ret {cmnts: comments, lits: literals};
|
||||
}
|
|
@ -1,4 +1,3 @@
|
|||
import io::reader_util;
|
||||
import util::interner;
|
||||
import util::interner::intern;
|
||||
import diagnostic;
|
||||
|
@ -541,201 +540,10 @@ fn next_token_inner(rdr: reader) -> token::token {
|
|||
}
|
||||
}
|
||||
|
||||
enum cmnt_style {
|
||||
isolated, // No code on either side of each line of the comment
|
||||
trailing, // Code exists to the left of the comment
|
||||
mixed, // Code before /* foo */ and after the comment
|
||||
blank_line, // Just a manual blank line "\n\n", for layout
|
||||
}
|
||||
|
||||
type cmnt = {style: cmnt_style, lines: [str], pos: uint};
|
||||
|
||||
fn read_to_eol(rdr: reader) -> str {
|
||||
let mut val = "";
|
||||
while rdr.curr != '\n' && !rdr.is_eof() {
|
||||
str::push_char(val, rdr.curr);
|
||||
rdr.bump();
|
||||
}
|
||||
if rdr.curr == '\n' { rdr.bump(); }
|
||||
ret val;
|
||||
}
|
||||
|
||||
fn read_one_line_comment(rdr: reader) -> str {
|
||||
let val = read_to_eol(rdr);
|
||||
assert (val[0] == '/' as u8 && val[1] == '/' as u8);
|
||||
ret val;
|
||||
}
|
||||
|
||||
fn consume_whitespace(rdr: reader) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() { rdr.bump(); }
|
||||
}
|
||||
|
||||
fn consume_non_eol_whitespace(rdr: reader) {
|
||||
while is_whitespace(rdr.curr) && rdr.curr != '\n' && !rdr.is_eof() {
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn push_blank_line_comment(rdr: reader, &comments: [cmnt]) {
|
||||
#debug(">>> blank-line comment");
|
||||
let v: [str] = [];
|
||||
comments += [{style: blank_line, lines: v, pos: rdr.chpos}];
|
||||
}
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: reader, &comments: [cmnt]) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() {
|
||||
if rdr.col == 0u && rdr.curr == '\n' {
|
||||
push_blank_line_comment(rdr, comments);
|
||||
}
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn read_line_comments(rdr: reader, code_to_the_left: bool) -> cmnt {
|
||||
#debug(">>> line comments");
|
||||
let p = rdr.chpos;
|
||||
let mut lines: [str] = [];
|
||||
while rdr.curr == '/' && rdr.next() == '/' {
|
||||
let line = read_one_line_comment(rdr);
|
||||
log(debug, line);
|
||||
lines += [line];
|
||||
consume_non_eol_whitespace(rdr);
|
||||
}
|
||||
#debug("<<< line comments");
|
||||
ret {style: if code_to_the_left { trailing } else { isolated },
|
||||
lines: lines,
|
||||
pos: p};
|
||||
}
|
||||
|
||||
fn all_whitespace(s: str, begin: uint, end: uint) -> bool {
|
||||
let mut i: uint = begin;
|
||||
while i != end { if !is_whitespace(s[i] as char) { ret false; } i += 1u; }
|
||||
ret true;
|
||||
}
|
||||
|
||||
fn trim_whitespace_prefix_and_push_line(&lines: [str],
|
||||
s: str, col: uint) unsafe {
|
||||
let mut s1;
|
||||
if all_whitespace(s, 0u, col) {
|
||||
if col < str::len(s) {
|
||||
s1 = str::slice(s, col, str::len(s));
|
||||
} else { s1 = ""; }
|
||||
} else { s1 = s; }
|
||||
log(debug, "pushing line: " + s1);
|
||||
lines += [s1];
|
||||
}
|
||||
|
||||
fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt {
|
||||
#debug(">>> block comment");
|
||||
let p = rdr.chpos;
|
||||
let mut lines: [str] = [];
|
||||
let mut col: uint = rdr.col;
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
let mut curr_line = "/*";
|
||||
let mut level: int = 1;
|
||||
while level > 0 {
|
||||
#debug("=== block comment level %d", level);
|
||||
if rdr.is_eof() { rdr.fatal("unterminated block comment"); }
|
||||
if rdr.curr == '\n' {
|
||||
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
||||
curr_line = "";
|
||||
rdr.bump();
|
||||
} else {
|
||||
str::push_char(curr_line, rdr.curr);
|
||||
if rdr.curr == '/' && rdr.next() == '*' {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line += "*";
|
||||
level += 1;
|
||||
} else {
|
||||
if rdr.curr == '*' && rdr.next() == '/' {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line += "/";
|
||||
level -= 1;
|
||||
} else { rdr.bump(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
if str::len(curr_line) != 0u {
|
||||
trim_whitespace_prefix_and_push_line(lines, curr_line, col);
|
||||
}
|
||||
let mut style = if code_to_the_left { trailing } else { isolated };
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if !rdr.is_eof() && rdr.curr != '\n' && vec::len(lines) == 1u {
|
||||
style = mixed;
|
||||
}
|
||||
#debug("<<< block comment");
|
||||
ret {style: style, lines: lines, pos: p};
|
||||
}
|
||||
|
||||
fn peeking_at_comment(rdr: reader) -> bool {
|
||||
ret rdr.curr == '/' && rdr.next() == '/' ||
|
||||
rdr.curr == '/' && rdr.next() == '*';
|
||||
}
|
||||
|
||||
fn consume_comment(rdr: reader, code_to_the_left: bool, &comments: [cmnt]) {
|
||||
#debug(">>> consume comment");
|
||||
if rdr.curr == '/' && rdr.next() == '/' {
|
||||
comments += [read_line_comments(rdr, code_to_the_left)];
|
||||
} else if rdr.curr == '/' && rdr.next() == '*' {
|
||||
comments += [read_block_comment(rdr, code_to_the_left)];
|
||||
} else { fail; }
|
||||
#debug("<<< consume comment");
|
||||
}
|
||||
|
||||
fn is_lit(t: token::token) -> bool {
|
||||
ret alt t {
|
||||
token::LIT_INT(_, _) { true }
|
||||
token::LIT_UINT(_, _) { true }
|
||||
token::LIT_FLOAT(_, _) { true }
|
||||
token::LIT_STR(_) { true }
|
||||
token::LIT_BOOL(_) { true }
|
||||
_ { false }
|
||||
}
|
||||
}
|
||||
|
||||
type lit = {lit: str, pos: uint};
|
||||
|
||||
fn gather_comments_and_literals(cm: codemap::codemap,
|
||||
span_diagnostic: diagnostic::span_handler,
|
||||
path: str,
|
||||
srdr: io::reader) ->
|
||||
{cmnts: [cmnt], lits: [lit]} {
|
||||
let src = @str::from_bytes(srdr.read_whole_stream());
|
||||
let itr = @interner::mk::<str>(str::hash, str::eq);
|
||||
let rdr = new_reader(cm, span_diagnostic,
|
||||
codemap::new_filemap(path, src, 0u, 0u), itr);
|
||||
let mut comments: [cmnt] = [];
|
||||
let mut literals: [lit] = [];
|
||||
let mut first_read: bool = true;
|
||||
while !rdr.is_eof() {
|
||||
loop {
|
||||
let mut code_to_the_left = !first_read;
|
||||
consume_non_eol_whitespace(rdr);
|
||||
if rdr.curr == '\n' {
|
||||
code_to_the_left = false;
|
||||
consume_whitespace_counting_blank_lines(rdr, comments);
|
||||
}
|
||||
while peeking_at_comment(rdr) {
|
||||
consume_comment(rdr, code_to_the_left, comments);
|
||||
consume_whitespace_counting_blank_lines(rdr, comments);
|
||||
}
|
||||
break;
|
||||
}
|
||||
let tok = next_token(rdr);
|
||||
if is_lit(tok.tok) {
|
||||
let s = rdr.get_str_from(tok.bpos);
|
||||
literals += [{lit: s, pos: tok.chpos}];
|
||||
log(debug, "tok lit: " + s);
|
||||
} else {
|
||||
log(debug, "tok: " + token::to_str(rdr, tok.tok));
|
||||
}
|
||||
first_read = false;
|
||||
}
|
||||
ret {cmnts: comments, lits: literals};
|
||||
}
|
||||
|
||||
//
|
||||
// Local Variables:
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import parse::comments;
|
||||
import parse::lexer;
|
||||
import codemap::codemap;
|
||||
import pp::{break_offset, word, printer,
|
||||
|
@ -22,8 +23,8 @@ fn no_ann() -> pp_ann {
|
|||
type ps =
|
||||
@{s: pp::printer,
|
||||
cm: option<codemap>,
|
||||
comments: option<[lexer::cmnt]>,
|
||||
literals: option<[lexer::lit]>,
|
||||
comments: option<[comments::cmnt]>,
|
||||
literals: option<[comments::lit]>,
|
||||
mut cur_cmnt: uint,
|
||||
mut cur_lit: uint,
|
||||
mut boxes: [pp::breaks],
|
||||
|
@ -37,8 +38,8 @@ fn rust_printer(writer: io::writer) -> ps {
|
|||
let boxes: [pp::breaks] = [];
|
||||
ret @{s: pp::mk_printer(writer, default_columns),
|
||||
cm: none::<codemap>,
|
||||
comments: none::<[lexer::cmnt]>,
|
||||
literals: none::<[lexer::lit]>,
|
||||
comments: none::<[comments::cmnt]>,
|
||||
literals: none::<[comments::lit]>,
|
||||
mut cur_cmnt: 0u,
|
||||
mut cur_lit: 0u,
|
||||
mut boxes: boxes,
|
||||
|
@ -57,8 +58,8 @@ fn print_crate(cm: codemap, span_diagnostic: diagnostic::span_handler,
|
|||
crate: @ast::crate, filename: str, in: io::reader,
|
||||
out: io::writer, ann: pp_ann) {
|
||||
let boxes: [pp::breaks] = [];
|
||||
let r = lexer::gather_comments_and_literals(cm, span_diagnostic, filename,
|
||||
in);
|
||||
let r = comments::gather_comments_and_literals(cm, span_diagnostic,
|
||||
filename, in);
|
||||
let s =
|
||||
@{s: pp::mk_printer(out, default_columns),
|
||||
cm: some(cm),
|
||||
|
@ -1570,7 +1571,7 @@ fn maybe_print_trailing_comment(s: ps, span: codemap::span,
|
|||
alt s.cm { some(ccm) { cm = ccm; } _ { ret; } }
|
||||
alt next_comment(s) {
|
||||
some(cmnt) {
|
||||
if cmnt.style != lexer::trailing { ret; }
|
||||
if cmnt.style != comments::trailing { ret; }
|
||||
let span_line = codemap::lookup_char_pos(cm, span.hi);
|
||||
let comment_line = codemap::lookup_char_pos(cm, cmnt.pos);
|
||||
let mut next = cmnt.pos + 1u;
|
||||
|
@ -1645,7 +1646,7 @@ fn print_literal(s: ps, &&lit: @ast::lit) {
|
|||
|
||||
fn lit_to_str(l: @ast::lit) -> str { be to_str(l, print_literal); }
|
||||
|
||||
fn next_lit(s: ps, pos: uint) -> option<lexer::lit> {
|
||||
fn next_lit(s: ps, pos: uint) -> option<comments::lit> {
|
||||
alt s.literals {
|
||||
some(lits) {
|
||||
while s.cur_lit < vec::len(lits) {
|
||||
|
@ -1674,15 +1675,15 @@ fn maybe_print_comment(s: ps, pos: uint) {
|
|||
}
|
||||
}
|
||||
|
||||
fn print_comment(s: ps, cmnt: lexer::cmnt) {
|
||||
fn print_comment(s: ps, cmnt: comments::cmnt) {
|
||||
alt cmnt.style {
|
||||
lexer::mixed {
|
||||
comments::mixed {
|
||||
assert (vec::len(cmnt.lines) == 1u);
|
||||
zerobreak(s.s);
|
||||
word(s.s, cmnt.lines[0]);
|
||||
zerobreak(s.s);
|
||||
}
|
||||
lexer::isolated {
|
||||
comments::isolated {
|
||||
pprust::hardbreak_if_not_bol(s);
|
||||
for cmnt.lines.each {|line|
|
||||
// Don't print empty lines because they will end up as trailing
|
||||
|
@ -1691,7 +1692,7 @@ fn print_comment(s: ps, cmnt: lexer::cmnt) {
|
|||
hardbreak(s.s);
|
||||
}
|
||||
}
|
||||
lexer::trailing {
|
||||
comments::trailing {
|
||||
word(s.s, " ");
|
||||
if vec::len(cmnt.lines) == 1u {
|
||||
word(s.s, cmnt.lines[0]);
|
||||
|
@ -1705,7 +1706,7 @@ fn print_comment(s: ps, cmnt: lexer::cmnt) {
|
|||
end(s);
|
||||
}
|
||||
}
|
||||
lexer::blank_line {
|
||||
comments::blank_line {
|
||||
// We need to do at least one, possibly two hardbreaks.
|
||||
let is_semi =
|
||||
alt s.s.last_token() {
|
||||
|
@ -1754,14 +1755,14 @@ fn to_str<T>(t: T, f: fn@(ps, T)) -> str {
|
|||
io::mem_buffer_str(buffer)
|
||||
}
|
||||
|
||||
fn next_comment(s: ps) -> option<lexer::cmnt> {
|
||||
fn next_comment(s: ps) -> option<comments::cmnt> {
|
||||
alt s.comments {
|
||||
some(cmnts) {
|
||||
if s.cur_cmnt < vec::len(cmnts) {
|
||||
ret some(cmnts[s.cur_cmnt]);
|
||||
} else { ret none::<lexer::cmnt>; }
|
||||
} else { ret none::<comments::cmnt>; }
|
||||
}
|
||||
_ { ret none::<lexer::cmnt>; }
|
||||
_ { ret none::<comments::cmnt>; }
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ mod parse {
|
|||
mod lexer;
|
||||
mod parser;
|
||||
mod token;
|
||||
mod comments;
|
||||
}
|
||||
|
||||
mod print {
|
||||
|
|
Loading…
Reference in a new issue