Merge pull request #756 from cassiersg/missed-comments

Detect (and recover) when comments disappear
2016-01-11 01:16:52 +01:00 · 2016-01-11 01:16:52 +01:00 · b236819f72
commit b236819f72
parent 0badc128f7 b117d7b2b8
5 changed files with 285 additions and 43 deletions
--- a/src/comment.rs
+++ b/src/comment.rs
@ -8,13 +8,17 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-// Format comments.
+// Formatting and tools for comments.
-use std::iter;
+use std::{self, iter};
 use syntax::codemap::Span;
 use Indent;
 use config::Config;
 use rewrite::RewriteContext;
 use string::{StringFormat, rewrite_string};
 use utils::wrap_str;
 pub fn rewrite_comment(orig: &str,
                       block_style: bool,
@ -150,7 +154,7 @@ impl FindUncommented for str {
                }
                Some(c) => {
                    match kind {
-                        CodeCharKind::Normal if b == c => {}
+                        FullCodeCharKind::Normal if b == c => {}
                        _ => {
                            needle_iter = pat.chars();
                        }
@ -174,7 +178,7 @@ impl FindUncommented for str {
 pub fn find_comment_end(s: &str) -> Option<usize> {
    let mut iter = CharClasses::new(s.char_indices());
    for (kind, (i, _c)) in &mut iter {
-        if kind == CodeCharKind::Normal {
+        if kind == FullCodeCharKind::Normal {
            return Some(i);
        }
    }
@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {
 /// Returns true if text contains any comment.
 pub fn contains_comment(text: &str) -> bool {
-    CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
+    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 }
 struct CharClasses<T>
@ -234,12 +238,47 @@ enum CharClassesStatus {
    LineComment,
 }
 /// Distinguish between functionnal part of code and comments
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 pub enum CodeCharKind {
    Normal,
    Comment,
 }
 /// Distinguish between functionnal part of code and comments,
 /// describing opening and closing of comments for ease when chunking
 /// code from tagged characters
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 enum FullCodeCharKind {
    Normal,
    /// The first character of a comment, there is only one for a comment (always '/')
    StartComment,
    /// Any character inside a comment including the second character of comment
    /// marks ("//", "/*")
    InComment,
    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
    EndComment,
 }
 impl FullCodeCharKind {
    fn is_comment(&self) -> bool {
        match *self {
            FullCodeCharKind::Normal => false,
            FullCodeCharKind::StartComment |
            FullCodeCharKind::InComment |
            FullCodeCharKind::EndComment => true,
        }
    }
    fn to_codecharkind(&self) -> CodeCharKind {
        if self.is_comment() {
            CodeCharKind::Comment
        } else {
            CodeCharKind::Normal
        }
    }
 }
 impl<T> CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
@ -256,9 +295,9 @@ impl<T> Iterator for CharClasses<T>
    where T: Iterator,
          T::Item: RichChar
 {
-    type Item = (CodeCharKind, T::Item);
+    type Item = (FullCodeCharKind, T::Item);
-    fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
+    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
        let item = try_opt!(self.base.next());
        let chr = item.get_char();
        self.status = match self.status {
@ -286,11 +325,11 @@ impl<T> Iterator for CharClasses<T>
                        match self.base.peek() {
                            Some(next) if next.get_char() == '*' => {
                                self.status = CharClassesStatus::BlockCommentOpening(1);
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            Some(next) if next.get_char() == '/' => {
                                self.status = CharClassesStatus::LineComment;
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                            }
                            _ => CharClassesStatus::Normal,
                        }
@ -299,12 +338,7 @@ impl<T> Iterator for CharClasses<T>
                }
            }
            CharClassesStatus::BlockComment(deepness) => {
-                if deepness == 0 {
+                assert!(deepness != 0);
                    // This is the closing '/'
                    assert_eq!(chr, '/');
                    self.status = CharClassesStatus::Normal;
                    return Some((CodeCharKind::Comment, item));
                }
                self.status = match self.base.peek() {
                    Some(next) if next.get_char() == '/' && chr == '*' => {
                        CharClassesStatus::BlockCommentClosing(deepness - 1)
@ -314,34 +348,92 @@ impl<T> Iterator for CharClasses<T>
                    }
                    _ => CharClassesStatus::BlockComment(deepness),
                };
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentOpening(deepness) => {
                assert_eq!(chr, '*');
                self.status = CharClassesStatus::BlockComment(deepness);
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
            }
            CharClassesStatus::BlockCommentClosing(deepness) => {
                assert_eq!(chr, '/');
-                self.status = if deepness == 0 {
+                if deepness == 0 {
-                    CharClassesStatus::Normal
+                    self.status = CharClassesStatus::Normal;
                    return Some((FullCodeCharKind::EndComment, item));
                } else {
-                    CharClassesStatus::BlockComment(deepness)
+                    self.status = CharClassesStatus::BlockComment(deepness);
-                };
+                    return Some((FullCodeCharKind::InComment, item));
-                return Some((CodeCharKind::Comment, item));
+                }
            }
            CharClassesStatus::LineComment => {
-                self.status = match chr {
+                match chr {
-                    '\n' => CharClassesStatus::Normal,
+                    '\n' => {
-                    _ => CharClassesStatus::LineComment,
+                        self.status = CharClassesStatus::Normal;
-                };
+                        return Some((FullCodeCharKind::EndComment, item));
-                return Some((CodeCharKind::Comment, item));
+                    }
                    _ => {
                        self.status = CharClassesStatus::LineComment;
                        return Some((FullCodeCharKind::InComment, item));
                    }
                }
            }
        };
-        Some((CodeCharKind::Normal, item))
+        Some((FullCodeCharKind::Normal, item))
    }
 }
 /// Iterator over functional and commented parts of a string. Any part of a string is either
 /// functional code, either *one* block comment, either *one* line comment. Whitespace between
 /// comments is functional code. Line comments contain their ending newlines.
 struct UngroupedCommentCodeSlices<'a> {
    slice: &'a str,
    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
 }
 impl<'a> UngroupedCommentCodeSlices<'a> {
    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
        UngroupedCommentCodeSlices {
            slice: code,
            iter: CharClasses::new(code.char_indices()).peekable(),
        }
    }
 }
 impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
    type Item = (CodeCharKind, usize, &'a str);
    fn next(&mut self) -> Option<Self::Item> {
        let (kind, (start_idx, _)) = try_opt!(self.iter.next());
        match kind {
            FullCodeCharKind::Normal => {
                // Consume all the Normal code
                while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
                    let _ = self.iter.next();
                }
            }
            FullCodeCharKind::StartComment => {
                // Consume the whole comment
                while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
            }
            _ => panic!(),
        }
        let slice = match self.iter.peek() {
            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
            None => &self.slice[start_idx..],
        };
        Some((if kind.is_comment() {
            CodeCharKind::Comment
        } else {
            CodeCharKind::Normal
        },
              start_idx,
              slice))
    }
 }
 /// Iterator over an alternating sequence of functional and commented parts of
 /// a string. The first item is always a, possibly zero length, subslice of
 /// functional text. Line style comments contain their ending newlines.
@ -383,7 +475,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
                first_whitespace = Some(i);
            }
-            if kind == self.last_slice_kind && !is_comment_connector {
+            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
                let last_index = match first_whitespace {
                    Some(j) => j,
                    None => i,
@ -419,20 +511,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
    }
 }
 /// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
 /// (if it fits in the width/offset, else return None), else return `new`
 pub fn recover_comment_removed(new: String,
                               span: Span,
                               context: &RewriteContext,
                               width: usize,
                               offset: Indent)
                               -> Option<String> {
    let snippet = context.snippet(span);
    if changed_comment_content(&snippet, &new) {
        // We missed some comments
        // Keep previous formatting if it satisfies the constrains
        return wrap_str(snippet, context.config.max_width, width, offset);
    } else {
        Some(new)
    }
 }
 /// Return true if the two strings of code have the same payload of comments.
 /// The payload of comments is everything in the string except:
 /// 	- actual code (not comments)
 /// 	- comment start/end marks
 ///		- whitespace
 ///		- '*' at the beginning of lines in block comments
 fn changed_comment_content(orig: &str, new: &str) -> bool {
    // Cannot write this as a fn since we cannot return types containing closures
    let code_comment_content = |code| {
        let slices = UngroupedCommentCodeSlices::new(code);
        slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
              .flat_map(|(_, _, s)| CommentReducer::new(s))
    };
    let res = code_comment_content(orig).ne(code_comment_content(new));
    debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
           res,
           orig,
           new,
           code_comment_content(orig).collect::<String>(),
           code_comment_content(new).collect::<String>());
    res
 }
 /// Iterator over the 'payload' characters of a comment.
 /// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
 /// The comment must be one comment, ie not more than one start mark (no multiple line comments,
 /// for example).
 struct CommentReducer<'a> {
    is_block: bool,
    at_start_line: bool,
    iter: std::str::Chars<'a>,
 }
 impl<'a> CommentReducer<'a> {
    fn new(comment: &'a str) -> CommentReducer<'a> {
        let is_block = comment.starts_with("/*");
        let comment = remove_comment_header(comment);
        CommentReducer {
            is_block: is_block,
            at_start_line: false, // There are no supplementary '*' on the first line
            iter: comment.chars(),
        }
    }
 }
 impl<'a> Iterator for CommentReducer<'a> {
    type Item = char;
    fn next(&mut self) -> Option<Self::Item> {
        loop {
            let mut c = try_opt!(self.iter.next());
            if self.is_block && self.at_start_line {
                while c.is_whitespace() {
                    c = try_opt!(self.iter.next());
                }
                // Ignore leading '*'
                if c == '*' {
                    c = try_opt!(self.iter.next());
                }
            } else {
                if c == '\n' {
                    self.at_start_line = true;
                }
            }
            if !c.is_whitespace() {
                return Some(c);
            }
        }
    }
 }
 fn remove_comment_header(comment: &str) -> &str {
    if comment.starts_with("///") || comment.starts_with("//!") {
        &comment[3..]
    } else if comment.starts_with("//") {
        &comment[2..]
    } else if comment.starts_with("/**") || comment.starts_with("/*!") {
        &comment[3..comment.len() - 2]
    } else {
        assert!(comment.starts_with("/*"),
                format!("string '{}' is not a comment", comment));
        &comment[2..comment.len() - 2]
    }
 }
 #[cfg(test)]
 mod test {
-    use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
+    use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
-                CommentCodeSlices};
+                FindUncommented, CommentCodeSlices};
    use Indent;
    #[test]
    fn char_classes() {
        let mut iter = CharClasses::new("//\n\n".chars());
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
        assert_eq!(None, iter.next());
    }
@ -507,8 +703,8 @@ mod test {
        CharClasses::new(text.chars())
            .filter_map(|(s, c)| {
                match s {
-                    CodeCharKind::Normal => Some(c),
+                    FullCodeCharKind::Normal => Some(c),
-                    CodeCharKind::Comment => None,
+                    _ => None,
                }
            })
            .collect()
--- a/src/expr.rs
+++ b/src/expr.rs
@ -23,7 +23,7 @@ use utils::{span_after, extra_offset, last_line_width, wrap_str, binary_search,
            semicolon_for_stmt};
 use visitor::FmtVisitor;
 use config::{Config, StructLitStyle, MultilineStyle};
-use comment::{FindUncommented, rewrite_comment, contains_comment};
+use comment::{FindUncommented, rewrite_comment, contains_comment, recover_comment_removed};
 use types::rewrite_path;
 use items::{span_lo_for_arg, span_hi_for_arg};
 use chains::rewrite_chain;
@ -35,7 +35,7 @@ use syntax::visit::Visitor;
 impl Rewrite for ast::Expr {
    fn rewrite(&self, context: &RewriteContext, width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Expr_::ExprVec(ref expr_vec) => {
                rewrite_array(expr_vec.iter().map(|e| &**e),
                              mk_sp(span_after(self.span, "[", context.codemap), self.span.hi),
@ -207,7 +207,8 @@ impl Rewrite for ast::Expr {
                         width,
                         offset)
            }
-        }
+        };
        result.and_then(|res| recover_comment_removed(res, self.span, context, width, offset))
    }
 }
@ -478,7 +479,7 @@ impl Rewrite for ast::Block {
 impl Rewrite for ast::Stmt {
    fn rewrite(&self, context: &RewriteContext, _width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
            ast::Stmt_::StmtDecl(ref decl, _) => {
                if let ast::Decl_::DeclLocal(ref local) = decl.node {
                    local.rewrite(context, context.config.max_width, offset)
@ -499,7 +500,8 @@ impl Rewrite for ast::Stmt {
                  .map(|s| s + suffix)
            }
            ast::Stmt_::StmtMac(..) => None,
-        }
+        };
        result.and_then(|res| recover_comment_removed(res, self.span, context, _width, offset))
    }
 }
--- a/tests/source/paths.rs
+++ b/tests/source/paths.rs
@ -17,7 +17,8 @@ fn main() {
    < *mut JSObject >:: relocate(entry);
-    let x: Foo/*::*/<A   >;
+    let x: Foo<A   >;
    let x: Foo/*::*/<A>;
 }
 fn op(foo: Bar, key : &[u8], upd : Fn(Option<&memcache::Item> , Baz  ) -> Result) -> MapResult {}
--- a/tests/target/comment-not-disappear.rs
+++ b/tests/target/comment-not-disappear.rs
@ -0,0 +1,42 @@
 // All the comments here should not disappear.
 fn a() {
    match x {
        X |
        // A comment
        Y => {}
    };
 }
 fn b() {
    match x {
        X =>
            // A comment
            y
    }
 }
 fn c() {
    a() /* ... */;
 }
 fn foo() -> Vec<i32> {
    (0..11)
        .map(|x|
        // This comment disappears.
        if x % 2 == 0 { x } else { x * 2 })
        .collect()
 }
 fn d() {
    if true /* and ... */ {
        a();
    }
 }
 fn calc_page_len(prefix_len: usize, sofar: usize) -> usize {
    2 // page type and flags
    + 1 // stored depth
    + 2 // stored count
    + prefix_len + sofar // sum of size of all the actual items
 }
--- a/tests/target/paths.rs
+++ b/tests/target/paths.rs
@ -17,6 +17,7 @@ fn main() {
    <*mut JSObject>::relocate(entry);
    let x: Foo<A>;
    let x: Foo/*::*/<A>;
 }
 fn op(foo: Bar, key: &[u8], upd: Fn(Option<&memcache::Item>, Baz) -> Result) -> MapResult {}