Auto merge of #80261 - GuillaumeGomez:attr-rework, r=jyn514

rustdoc DocFragment rework

Kind of a follow-up of #80119.

A few things are happening in this PR. I'm not sure about the impact on perf though so I'm very interested about that too (if the perf is worse, then we can just close this PR).

The idea here is mostly about reducing the memory usage by relying even more on `Symbol` instead of `String`. The only issue is that `DocFragment` has 3 modifications performed on it:
 1. Unindenting
 2. Collapsing similar comments into one
 3. "Beautifying" (weird JS-like comments handling).

To do so, I saved the information about unindent and the "collapse" is now on-demand (which is why I'm not sure the perf will be better, it has to be run multiple times...).

r? `@jyn514`
This commit is contained in:
bors 2021-01-03 06:29:42 +00:00
commit e821a6ef78
13 changed files with 145 additions and 164 deletions

View file

@ -122,7 +122,7 @@ impl Item {
/// Finds the `doc` attribute as a NameValue and returns the corresponding
/// value found.
crate fn doc_value(&self) -> Option<&str> {
crate fn doc_value(&self) -> Option<String> {
self.attrs.doc_value()
}
@ -469,11 +469,13 @@ crate struct DocFragment {
/// This allows distinguishing between the original documentation and a pub re-export.
/// If it is `None`, the item was not re-exported.
crate parent_module: Option<DefId>,
crate doc: String,
crate doc: Symbol,
crate kind: DocFragmentKind,
crate need_backline: bool,
crate indent: usize,
}
#[derive(Clone, PartialEq, Eq, Debug, Hash)]
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
crate enum DocFragmentKind {
/// A doc fragment created from a `///` or `//!` doc comment.
SugaredDoc,
@ -481,7 +483,33 @@ crate enum DocFragmentKind {
RawDoc,
/// A doc fragment created from a `#[doc(include="filename")]` attribute. Contains both the
/// given filename and the file contents.
Include { filename: String },
Include { filename: Symbol },
}
// The goal of this function is to apply the `DocFragment` transformations that are required when
// transforming into the final markdown. So the transformations in here are:
//
// * Applying the computed indent to each lines in each doc fragment (a `DocFragment` can contain
// multiple lines in case of `#[doc = ""]`).
// * Adding backlines between `DocFragment`s and adding an extra one if required (stored in the
// `need_backline` field).
fn add_doc_fragment(out: &mut String, frag: &DocFragment) {
let s = frag.doc.as_str();
let mut iter = s.lines().peekable();
while let Some(line) = iter.next() {
if line.chars().any(|c| !c.is_whitespace()) {
assert!(line.len() >= frag.indent);
out.push_str(&line[frag.indent..]);
} else {
out.push_str(line);
}
if iter.peek().is_some() {
out.push('\n');
}
}
if frag.need_backline {
out.push('\n');
}
}
impl<'a> FromIterator<&'a DocFragment> for String {
@ -489,11 +517,18 @@ impl<'a> FromIterator<&'a DocFragment> for String {
where
T: IntoIterator<Item = &'a DocFragment>,
{
let mut prev_kind: Option<DocFragmentKind> = None;
iter.into_iter().fold(String::new(), |mut acc, frag| {
if !acc.is_empty() {
if !acc.is_empty()
&& prev_kind
.take()
.map(|p| matches!(p, DocFragmentKind::Include { .. }) && p != frag.kind)
.unwrap_or(false)
{
acc.push('\n');
}
acc.push_str(&frag.doc);
add_doc_fragment(&mut acc, &frag);
prev_kind = Some(frag.kind);
acc
})
}
@ -565,7 +600,7 @@ impl Attributes {
/// Reads a `MetaItem` from within an attribute, looks for whether it is a
/// `#[doc(include="file")]`, and returns the filename and contents of the file as loaded from
/// its expansion.
crate fn extract_include(mi: &ast::MetaItem) -> Option<(String, String)> {
crate fn extract_include(mi: &ast::MetaItem) -> Option<(Symbol, Symbol)> {
mi.meta_item_list().and_then(|list| {
for meta in list {
if meta.has_name(sym::include) {
@ -573,17 +608,17 @@ impl Attributes {
// `#[doc(include(file="filename", contents="file contents")]` so we need to
// look for that instead
return meta.meta_item_list().and_then(|list| {
let mut filename: Option<String> = None;
let mut contents: Option<String> = None;
let mut filename: Option<Symbol> = None;
let mut contents: Option<Symbol> = None;
for it in list {
if it.has_name(sym::file) {
if let Some(name) = it.value_str() {
filename = Some(name.to_string());
filename = Some(name);
}
} else if it.has_name(sym::contents) {
if let Some(docs) = it.value_str() {
contents = Some(docs.to_string());
contents = Some(docs);
}
}
}
@ -622,15 +657,30 @@ impl Attributes {
attrs: &[ast::Attribute],
additional_attrs: Option<(&[ast::Attribute], DefId)>,
) -> Attributes {
let mut doc_strings = vec![];
let mut doc_strings: Vec<DocFragment> = vec![];
let mut sp = None;
let mut cfg = Cfg::True;
let mut doc_line = 0;
fn update_need_backline(doc_strings: &mut Vec<DocFragment>, frag: &DocFragment) {
if let Some(prev) = doc_strings.last_mut() {
if matches!(prev.kind, DocFragmentKind::Include { .. })
|| prev.kind != frag.kind
|| prev.parent_module != frag.parent_module
{
// add a newline for extra padding between segments
prev.need_backline = prev.kind == DocFragmentKind::SugaredDoc
|| prev.kind == DocFragmentKind::RawDoc
} else {
prev.need_backline = true;
}
}
}
let clean_attr = |(attr, parent_module): (&ast::Attribute, _)| {
if let Some(value) = attr.doc_str() {
trace!("got doc_str={:?}", value);
let value = beautify_doc_string(value).to_string();
let value = beautify_doc_string(value);
let kind = if attr.is_doc_comment() {
DocFragmentKind::SugaredDoc
} else {
@ -638,14 +688,20 @@ impl Attributes {
};
let line = doc_line;
doc_line += value.lines().count();
doc_strings.push(DocFragment {
doc_line += value.as_str().lines().count();
let frag = DocFragment {
line,
span: attr.span,
doc: value,
kind,
parent_module,
});
need_backline: false,
indent: 0,
};
update_need_backline(&mut doc_strings, &frag);
doc_strings.push(frag);
if sp.is_none() {
sp = Some(attr.span);
@ -663,14 +719,18 @@ impl Attributes {
} else if let Some((filename, contents)) = Attributes::extract_include(&mi)
{
let line = doc_line;
doc_line += contents.lines().count();
doc_strings.push(DocFragment {
doc_line += contents.as_str().lines().count();
let frag = DocFragment {
line,
span: attr.span,
doc: contents,
kind: DocFragmentKind::Include { filename },
parent_module,
});
need_backline: false,
indent: 0,
};
update_need_backline(&mut doc_strings, &frag);
doc_strings.push(frag);
}
}
}
@ -721,14 +781,41 @@ impl Attributes {
/// Finds the `doc` attribute as a NameValue and returns the corresponding
/// value found.
crate fn doc_value(&self) -> Option<&str> {
self.doc_strings.first().map(|s| s.doc.as_str())
crate fn doc_value(&self) -> Option<String> {
let mut iter = self.doc_strings.iter();
let ori = iter.next()?;
let mut out = String::new();
add_doc_fragment(&mut out, &ori);
while let Some(new_frag) = iter.next() {
if matches!(ori.kind, DocFragmentKind::Include { .. })
|| new_frag.kind != ori.kind
|| new_frag.parent_module != ori.parent_module
{
break;
}
add_doc_fragment(&mut out, &new_frag);
}
if out.is_empty() { None } else { Some(out) }
}
/// Return the doc-comments on this item, grouped by the module they came from.
///
/// The module can be different if this is a re-export with added documentation.
crate fn collapsed_doc_value_by_module_level(&self) -> FxHashMap<Option<DefId>, String> {
let mut ret = FxHashMap::default();
for new_frag in self.doc_strings.iter() {
let out = ret.entry(new_frag.parent_module).or_default();
add_doc_fragment(out, &new_frag);
}
ret
}
/// Finds all `doc` attributes as NameValues and returns their corresponding values, joined
/// with newlines.
crate fn collapsed_doc_value(&self) -> Option<String> {
if !self.doc_strings.is_empty() { Some(self.doc_strings.iter().collect()) } else { None }
if self.doc_strings.is_empty() { None } else { Some(self.doc_strings.iter().collect()) }
}
/// Gets links as a vector

View file

@ -525,7 +525,7 @@ crate fn run_global_ctxt(
let mut krate = tcx.sess.time("clean_crate", || clean::krate(&mut ctxt));
if let Some(ref m) = krate.module {
if let None | Some("") = m.doc_value() {
if m.doc_value().map(|d| d.is_empty()).unwrap_or(true) {
let help = "The following guide may be of use:\n\
https://doc.rust-lang.org/nightly/rustdoc/how-to-write-documentation.html";
tcx.struct_lint_node(
@ -623,6 +623,9 @@ crate fn run_global_ctxt(
ctxt.sess().abort_if_errors();
// The main crate doc comments are always collapsed.
krate.collapsed = true;
(krate, ctxt.renderinfo.into_inner(), ctxt.render_options)
}

View file

@ -987,7 +987,6 @@ impl<'a, 'hir, 'tcx> HirCollector<'a, 'hir, 'tcx> {
self.collector.names.push(name);
}
attrs.collapse_doc_comments();
attrs.unindent_doc_comments();
// The collapse-docs pass won't combine sugared/raw doc attributes, or included files with
// anything else, this will combine them for us.

View file

@ -316,7 +316,7 @@ impl DocFolder for Cache {
path: path.join("::"),
desc: item
.doc_value()
.map_or_else(|| String::new(), short_markdown_summary),
.map_or_else(String::new, |x| short_markdown_summary(&x.as_str())),
parent,
parent_idx: None,
search_type: get_index_search_type(&item),

View file

@ -78,7 +78,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
ty: item.type_(),
name: item.name.unwrap().to_string(),
path: fqp[..fqp.len() - 1].join("::"),
desc: item.doc_value().map_or_else(|| String::new(), short_markdown_summary),
desc: item.doc_value().map_or_else(String::new, |s| short_markdown_summary(&s)),
parent: Some(did),
parent_idx: None,
search_type: get_index_search_type(&item),
@ -127,7 +127,7 @@ crate fn build_index(krate: &clean::Crate, cache: &mut Cache) -> String {
let crate_doc = krate
.module
.as_ref()
.map(|module| module.doc_value().map_or_else(|| String::new(), short_markdown_summary))
.map(|module| module.doc_value().map_or_else(String::new, |s| short_markdown_summary(&s)))
.unwrap_or_default();
#[derive(Serialize)]

View file

@ -30,7 +30,6 @@ crate mod cache;
#[cfg(test)]
mod tests;
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::cmp::Ordering;
use std::collections::{BTreeMap, VecDeque};
@ -198,12 +197,8 @@ impl SharedContext<'_> {
/// Based on whether the `collapse-docs` pass was run, return either the `doc_value` or the
/// `collapsed_doc_value` of the given item.
crate fn maybe_collapsed_doc_value<'a>(&self, item: &'a clean::Item) -> Option<Cow<'a, str>> {
if self.collapsed {
item.collapsed_doc_value().map(|s| s.into())
} else {
item.doc_value().map(|s| s.into())
}
crate fn maybe_collapsed_doc_value<'a>(&self, item: &'a clean::Item) -> Option<String> {
if self.collapsed { item.collapsed_doc_value() } else { item.doc_value() }
}
}
@ -1622,7 +1617,7 @@ impl Context<'_> {
let short = short.to_string();
map.entry(short).or_default().push((
myname,
Some(item.doc_value().map_or_else(|| String::new(), plain_text_summary)),
Some(item.doc_value().map_or_else(String::new, |s| plain_text_summary(&s))),
));
}
@ -1880,7 +1875,7 @@ fn document_short(
return;
}
if let Some(s) = item.doc_value() {
let mut summary_html = MarkdownSummaryLine(s, &item.links()).into_string();
let mut summary_html = MarkdownSummaryLine(&s, &item.links()).into_string();
if s.contains('\n') {
let link = format!(r#" <a href="{}">Read more</a>"#, naive_assoc_href(item, link));
@ -2197,7 +2192,7 @@ fn item_module(w: &mut Buffer, cx: &Context<'_>, item: &clean::Item, items: &[cl
let stab = myitem.stability_class(cx.tcx());
let add = if stab.is_some() { " " } else { "" };
let doc_value = myitem.doc_value().unwrap_or("");
let doc_value = myitem.doc_value().unwrap_or_default();
write!(
w,
"<tr class=\"{stab}{add}module-item\">\
@ -2207,7 +2202,7 @@ fn item_module(w: &mut Buffer, cx: &Context<'_>, item: &clean::Item, items: &[cl
</tr>",
name = *myitem.name.as_ref().unwrap(),
stab_tags = extra_info_tags(myitem, item, cx.tcx()),
docs = MarkdownSummaryLine(doc_value, &myitem.links()).into_string(),
docs = MarkdownSummaryLine(&doc_value, &myitem.links()).into_string(),
class = myitem.type_(),
add = add,
stab = stab.unwrap_or_else(String::new),

View file

@ -235,12 +235,7 @@ impl<'a, 'b> fold::DocFolder for CoverageCalculator<'a, 'b> {
let mut tests = Tests { found_tests: 0 };
find_testable_code(
&i.attrs
.doc_strings
.iter()
.map(|d| d.doc.as_str())
.collect::<Vec<_>>()
.join("\n"),
&i.attrs.collapsed_doc_value().unwrap_or_default(),
&mut tests,
ErrorCodes::No,
false,

View file

@ -1,72 +0,0 @@
use crate::clean::{self, DocFragment, DocFragmentKind, Item};
use crate::core::DocContext;
use crate::fold;
use crate::fold::DocFolder;
use crate::passes::Pass;
use std::mem::take;
crate const COLLAPSE_DOCS: Pass = Pass {
name: "collapse-docs",
run: collapse_docs,
description: "concatenates all document attributes into one document attribute",
};
crate fn collapse_docs(krate: clean::Crate, _: &DocContext<'_>) -> clean::Crate {
let mut krate = Collapser.fold_crate(krate);
krate.collapsed = true;
krate
}
struct Collapser;
impl fold::DocFolder for Collapser {
fn fold_item(&mut self, mut i: Item) -> Option<Item> {
i.attrs.collapse_doc_comments();
Some(self.fold_item_recur(i))
}
}
fn collapse(doc_strings: &mut Vec<DocFragment>) {
let mut docs = vec![];
let mut last_frag: Option<DocFragment> = None;
for frag in take(doc_strings) {
if let Some(mut curr_frag) = last_frag.take() {
let curr_kind = &curr_frag.kind;
let new_kind = &frag.kind;
if matches!(*curr_kind, DocFragmentKind::Include { .. })
|| curr_kind != new_kind
|| curr_frag.parent_module != frag.parent_module
{
if *curr_kind == DocFragmentKind::SugaredDoc
|| *curr_kind == DocFragmentKind::RawDoc
{
// add a newline for extra padding between segments
curr_frag.doc.push('\n');
}
docs.push(curr_frag);
last_frag = Some(frag);
} else {
curr_frag.doc.push('\n');
curr_frag.doc.push_str(&frag.doc);
curr_frag.span = curr_frag.span.to(frag.span);
last_frag = Some(curr_frag);
}
} else {
last_frag = Some(frag);
}
}
if let Some(frag) = last_frag.take() {
docs.push(frag);
}
*doc_strings = docs;
}
impl clean::Attributes {
crate fn collapse_doc_comments(&mut self) {
collapse(&mut self.doc_strings);
}
}

View file

@ -891,37 +891,20 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
// In the presence of re-exports, this is not the same as the module of the item.
// Rather than merging all documentation into one, resolve it one attribute at a time
// so we know which module it came from.
let mut attrs = item.attrs.doc_strings.iter().peekable();
while let Some(attr) = attrs.next() {
// `collapse_docs` does not have the behavior we want:
// we want `///` and `#[doc]` to count as the same attribute,
// but currently it will treat them as separate.
// As a workaround, combine all attributes with the same parent module into the same attribute.
let mut combined_docs = attr.doc.clone();
loop {
match attrs.peek() {
Some(next) if next.parent_module == attr.parent_module => {
combined_docs.push('\n');
combined_docs.push_str(&attrs.next().unwrap().doc);
}
_ => break,
}
}
debug!("combined_docs={}", combined_docs);
for (parent_module, doc) in item.attrs.collapsed_doc_value_by_module_level() {
debug!("combined_docs={}", doc);
let (krate, parent_node) = if let Some(id) = attr.parent_module {
trace!("docs {:?} came from {:?}", attr.doc, id);
let (krate, parent_node) = if let Some(id) = parent_module {
(id.krate, Some(id))
} else {
trace!("no parent found for {:?}", attr.doc);
(item.def_id.krate, parent_node)
};
// NOTE: if there are links that start in one crate and end in another, this will not resolve them.
// This is a degenerate case and it's not supported by rustdoc.
for (ori_link, link_range) in markdown_links(&combined_docs) {
for (ori_link, link_range) in markdown_links(&doc) {
let link = self.resolve_link(
&item,
&combined_docs,
&doc,
&self_name,
parent_node,
krate,

View file

@ -14,9 +14,6 @@ crate use stripper::*;
mod non_autolinks;
crate use self::non_autolinks::CHECK_NON_AUTOLINKS;
mod collapse_docs;
crate use self::collapse_docs::COLLAPSE_DOCS;
mod strip_hidden;
crate use self::strip_hidden::STRIP_HIDDEN;
@ -84,7 +81,6 @@ crate const PASSES: &[Pass] = &[
CHECK_PRIVATE_ITEMS_DOC_TESTS,
STRIP_HIDDEN,
UNINDENT_COMMENTS,
COLLAPSE_DOCS,
STRIP_PRIVATE,
STRIP_PRIV_IMPORTS,
PROPAGATE_DOC_CFG,
@ -99,7 +95,6 @@ crate const PASSES: &[Pass] = &[
/// The list of passes run by default.
crate const DEFAULT_PASSES: &[ConditionalPass] = &[
ConditionalPass::always(COLLECT_TRAIT_IMPLS),
ConditionalPass::always(COLLAPSE_DOCS),
ConditionalPass::always(UNINDENT_COMMENTS),
ConditionalPass::always(CHECK_PRIVATE_ITEMS_DOC_TESTS),
ConditionalPass::new(STRIP_HIDDEN, WhenNotDocumentHidden),

View file

@ -68,7 +68,7 @@ fn unindent_fragments(docs: &mut Vec<DocFragment>) {
let min_indent = match docs
.iter()
.map(|fragment| {
fragment.doc.lines().fold(usize::MAX, |min_indent, line| {
fragment.doc.as_str().lines().fold(usize::MAX, |min_indent, line| {
if line.chars().all(|c| c.is_whitespace()) {
min_indent
} else {
@ -87,7 +87,7 @@ fn unindent_fragments(docs: &mut Vec<DocFragment>) {
};
for fragment in docs {
if fragment.doc.lines().count() == 0 {
if fragment.doc.as_str().lines().count() == 0 {
continue;
}
@ -97,18 +97,6 @@ fn unindent_fragments(docs: &mut Vec<DocFragment>) {
min_indent
};
fragment.doc = fragment
.doc
.lines()
.map(|line| {
if line.chars().all(|c| c.is_whitespace()) {
line.to_string()
} else {
assert!(line.len() >= min_indent);
line[min_indent..].to_string()
}
})
.collect::<Vec<_>>()
.join("\n");
fragment.indent = min_indent;
}
}

View file

@ -1,21 +1,27 @@
use super::*;
use rustc_span::source_map::DUMMY_SP;
use rustc_span::symbol::Symbol;
use rustc_span::with_default_session_globals;
fn create_doc_fragment(s: &str) -> Vec<DocFragment> {
vec![DocFragment {
line: 0,
span: DUMMY_SP,
parent_module: None,
doc: s.to_string(),
doc: Symbol::intern(s),
kind: DocFragmentKind::SugaredDoc,
need_backline: false,
indent: 0,
}]
}
#[track_caller]
fn run_test(input: &str, expected: &str) {
let mut s = create_doc_fragment(input);
unindent_fragments(&mut s);
assert_eq!(s[0].doc, expected);
with_default_session_globals(|| {
let mut s = create_doc_fragment(input);
unindent_fragments(&mut s);
assert_eq!(&s.iter().collect::<String>(), expected);
});
}
#[test]

View file

@ -127,8 +127,10 @@ LL | /// ```text
warning: could not parse code block as Rust code
--> $DIR/invalid-syntax.rs:92:9
|
LL | /// \____/
| ^^^^^^
LL | /// \____/
| _________^
LL | | ///
| |_
|
= note: error from rustc: unknown start of token: \