From b6c2e82b71ebc6d596f6c7651b22dffa37c4e41b Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Mon, 6 Apr 2015 12:06:39 -0500 Subject: [PATCH 1/2] rustdoc: Add a custom callback for codespan to collapse whitespace Because the current style for `code` in rustdoc is to prewrap whitespace, code spans that are hard wrapped in the source documentation are prematurely wrapped when rendered in HTML. CommonMark 0.18 [[1]] specifies "interior spaces and line endings are collapsed into single spaces" for code spans, which would actually prevent this issue, but hoedown does not currently conform to the CommonMark spec. The added span-level callback attempts to adhere to how whitespace is handled as described by CommonMark, fixing the issue of early, unintentional wrapping of code spans in rendered HTML. [1]: http://spec.commonmark.org/0.18/ --- src/librustdoc/html/markdown.rs | 62 +++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index afc434eb2df..4b267982f3b 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -72,6 +72,9 @@ type blockcodefn = extern "C" fn(*mut hoedown_buffer, *const hoedown_buffer, type headerfn = extern "C" fn(*mut hoedown_buffer, *const hoedown_buffer, libc::c_int, *mut libc::c_void); +type codespanfn = extern "C" fn(*mut hoedown_buffer, *const hoedown_buffer, + *mut libc::c_void); + type linkfn = extern "C" fn (*mut hoedown_buffer, *const hoedown_buffer, *const hoedown_buffer, *const hoedown_buffer, *mut libc::c_void) -> libc::c_int; @@ -89,11 +92,12 @@ struct hoedown_renderer { blockhtml: Option, header: Option, - other_block_level_callbacks: [libc::size_t; 9], /* span level callbacks - NULL or return 0 prints the span verbatim */ - other_span_level_callbacks_1: [libc::size_t; 9], + autolink: libc::size_t, // unused + codespan: Option, + other_span_level_callbacks_1: [libc::size_t; 7], link: Option, other_span_level_callbacks_2: [libc::size_t; 5], // hoedown will add `math` callback here, but we use an old version of it. @@ -185,6 +189,30 @@ fn stripped_filtered_line<'a>(s: &'a str) -> Option<&'a str> { } } +/// Returns a new string with all consecutive whitespace collapsed into +/// single spaces. +/// +/// The input is assumed to be already trimmed. +fn collapse_whitespace(s: &str) -> String { + let mut buffer = String::with_capacity(s.len()); + let mut previous_char_is_whitespace = false; + + for c in s.chars() { + if c.is_whitespace() { + if !previous_char_is_whitespace { + buffer.push(' '); + } + + previous_char_is_whitespace = true; + } else { + buffer.push(c); + previous_char_is_whitespace = false; + } + } + + buffer +} + thread_local!(static USED_HEADER_MAP: RefCell> = { RefCell::new(HashMap::new()) }); @@ -299,6 +327,20 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result { reset_headers(); + extern fn codespan(ob: *mut hoedown_buffer, text: *const hoedown_buffer, _: *mut libc::c_void) { + let content = if text.is_null() { + "".to_string() + } else { + let bytes = unsafe { (*text).as_bytes() }; + let s = str::from_utf8(bytes).unwrap(); + collapse_whitespace(s) + }; + + let content = format!("{}", Escape(&content)); + let element = CString::new(content).unwrap(); + unsafe { hoedown_buffer_puts(ob, element.as_ptr()); } + } + unsafe { let ob = hoedown_buffer_new(DEF_OUNIT); let renderer = hoedown_html_renderer_new(0, 0); @@ -310,6 +352,7 @@ pub fn render(w: &mut fmt::Formatter, s: &str, print_toc: bool) -> fmt::Result { = &mut opaque as *mut _ as *mut libc::c_void; (*renderer).blockcode = Some(block); (*renderer).header = Some(header); + (*renderer).codespan = Some(codespan); let document = hoedown_document_new(renderer, HOEDOWN_EXTENSIONS, 16); hoedown_document_render(document, ob, s.as_ptr(), @@ -523,7 +566,7 @@ pub fn plain_summary_line(md: &str) -> String { #[cfg(test)] mod tests { use super::{LangString, Markdown}; - use super::plain_summary_line; + use super::{collapse_whitespace, plain_summary_line}; #[test] fn test_lang_string_parse() { @@ -571,4 +614,17 @@ mod tests { t("# top header", "top header"); t("## header", "header"); } + + #[test] + fn test_collapse_whitespace() { + fn t(input: &str, expected: &str) { + let actual = collapse_whitespace(input); + assert_eq!(actual, expected); + } + + t("foo", "foo"); + t("foo bar", "foo bar"); + t("foo bar\nbaz", "foo bar baz"); + t("foo bar \n baz\t\tqux", "foo bar baz qux"); + } } From 46cc6e5fc31377c001a5a12c5079388253eecfbc Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Mon, 6 Apr 2015 13:56:39 -0500 Subject: [PATCH 2/2] rustdoc: Use iterators to collapse whitespace Thanks, @alexcrichton! --- src/librustdoc/html/markdown.rs | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index 4b267982f3b..49f6107869e 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -192,25 +192,11 @@ fn stripped_filtered_line<'a>(s: &'a str) -> Option<&'a str> { /// Returns a new string with all consecutive whitespace collapsed into /// single spaces. /// -/// The input is assumed to be already trimmed. +/// Any leading or trailing whitespace will be trimmed. fn collapse_whitespace(s: &str) -> String { - let mut buffer = String::with_capacity(s.len()); - let mut previous_char_is_whitespace = false; - - for c in s.chars() { - if c.is_whitespace() { - if !previous_char_is_whitespace { - buffer.push(' '); - } - - previous_char_is_whitespace = true; - } else { - buffer.push(c); - previous_char_is_whitespace = false; - } - } - - buffer + s.split(|c: char| c.is_whitespace()).filter(|s| { + !s.is_empty() + }).collect::>().connect(" ") } thread_local!(static USED_HEADER_MAP: RefCell> = { @@ -623,8 +609,9 @@ mod tests { } t("foo", "foo"); - t("foo bar", "foo bar"); - t("foo bar\nbaz", "foo bar baz"); - t("foo bar \n baz\t\tqux", "foo bar baz qux"); + t("foo bar baz", "foo bar baz"); + t(" foo bar", "foo bar"); + t("\tfoo bar\nbaz", "foo bar baz"); + t("foo bar \n baz\t\tqux\n", "foo bar baz qux"); } }