From 47d1ed96902508108a00afa1865f5e40819db5c6 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sat, 10 Apr 2021 13:48:54 -0400 Subject: [PATCH] Preprocess intra-doc links consistently Previously, rustdoc would panic on links to external crates if they were surrounded by backticks. --- src/librustdoc/core.rs | 17 +- .../passes/collect_intra_doc_links.rs | 220 +++++++++++------- src/test/rustdoc/intra-doc/auxiliary/empty.rs | 1 + .../rustdoc/intra-doc/auxiliary/empty2.rs | 1 + .../extern-crate-only-used-in-link.rs | 13 +- 5 files changed, 160 insertions(+), 92 deletions(-) create mode 100644 src/test/rustdoc/intra-doc/auxiliary/empty.rs create mode 100644 src/test/rustdoc/intra-doc/auxiliary/empty2.rs diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index c9fdaa50534..1da06f78fa2 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -365,21 +365,20 @@ crate fn create_resolver<'a>( } impl ast::visit::Visitor<'_> for IntraLinkCrateLoader { fn visit_attribute(&mut self, attr: &ast::Attribute) { - use crate::html::markdown::{markdown_links, MarkdownLink}; - use crate::passes::collect_intra_doc_links::Disambiguator; + use crate::html::markdown::markdown_links; + use crate::passes::collect_intra_doc_links::preprocess_link; if let Some(doc) = attr.doc_str() { - for MarkdownLink { link, .. } in markdown_links(&doc.as_str()) { - // FIXME: this misses a *lot* of the preprocessing done in collect_intra_doc_links - // I think most of it shouldn't be necessary since we only need the crate prefix? - let path_str = match Disambiguator::from_str(&link) { - Ok(x) => x.map_or(link.as_str(), |(_, p)| p), - Err(_) => continue, + for link in markdown_links(&doc.as_str()) { + let path_str = if let Some(Ok(x)) = preprocess_link(&link) { + x.path_str + } else { + continue; }; self.resolver.borrow_mut().access(|resolver| { let _ = resolver.resolve_str_path_error( attr.span, - path_str, + &path_str, TypeNS, self.current_mod, ); diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index 4bc7544e33d..5d280b590bd 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -68,7 +68,7 @@ impl<'a> From> for ErrorKind<'a> { } #[derive(Copy, Clone, Debug, Hash)] -enum Res { +crate enum Res { Def(DefKind, DefId), Primitive(PrimitiveType), } @@ -134,7 +134,7 @@ impl TryFrom for Res { /// A link failed to resolve. #[derive(Debug)] -enum ResolutionFailure<'a> { +crate enum ResolutionFailure<'a> { /// This resolved, but with the wrong namespace. WrongNamespace { /// What the link resolved to. @@ -172,7 +172,7 @@ enum ResolutionFailure<'a> { } #[derive(Debug)] -enum MalformedGenerics { +crate enum MalformedGenerics { /// This link has unbalanced angle brackets. /// /// For example, `Vec>`. @@ -224,7 +224,7 @@ impl ResolutionFailure<'a> { } } -enum AnchorFailure { +crate enum AnchorFailure { /// User error: `[std#x#y]` is not valid MultipleAnchors, /// The anchor provided by the user conflicts with Rustdoc's generated anchor. @@ -892,6 +892,117 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { } } +crate enum PreprocessingError<'a> { + Anchor(AnchorFailure), + Disambiguator(Range, String), + Resolution(ResolutionFailure<'a>, String, Option), +} + +impl From for PreprocessingError<'_> { + fn from(err: AnchorFailure) -> Self { + Self::Anchor(err) + } +} + +crate struct PreprocessingInfo { + crate path_str: String, + disambiguator: Option, + extra_fragment: Option, + link_text: String, +} + +/// Returns: +/// - `None` if the link should be ignored. +/// - `Some(Err)` if the link should emit an error +/// - `Some(Ok)` if the link is valid +/// +/// `link_buffer` is needed for lifetime reasons; it will always be overwritten and the contents ignored. +crate fn preprocess_link<'a>( + ori_link: &'a MarkdownLink, +) -> Option>> { + // [] is mostly likely not supposed to be a link + if ori_link.link.is_empty() { + return None; + } + + // Bail early for real links. + if ori_link.link.contains('/') { + return None; + } + + let stripped = ori_link.link.replace("`", ""); + let mut parts = stripped.split('#'); + + let link = parts.next().unwrap(); + if link.trim().is_empty() { + // This is an anchor to an element of the current page, nothing to do in here! + return None; + } + let extra_fragment = parts.next(); + if parts.next().is_some() { + // A valid link can't have multiple #'s + return Some(Err(AnchorFailure::MultipleAnchors.into())); + } + + // Parse and strip the disambiguator from the link, if present. + let (path_str, disambiguator) = match Disambiguator::from_str(&link) { + Ok(Some((d, path))) => (path.trim(), Some(d)), + Ok(None) => (link.trim(), None), + Err((err_msg, relative_range)) => { + // Only report error if we would not have ignored this link. See issue #83859. + if !should_ignore_link_with_disambiguators(link) { + let no_backticks_range = range_between_backticks(&ori_link); + let disambiguator_range = (no_backticks_range.start + relative_range.start) + ..(no_backticks_range.start + relative_range.end); + return Some(Err(PreprocessingError::Disambiguator(disambiguator_range, err_msg))); + } else { + return None; + } + } + }; + + if should_ignore_link(path_str) { + return None; + } + + // We stripped `()` and `!` when parsing the disambiguator. + // Add them back to be displayed, but not prefix disambiguators. + let link_text = + disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned()); + + // Strip generics from the path. + let path_str = if path_str.contains(['<', '>'].as_slice()) { + match strip_generics_from_path(&path_str) { + Ok(path) => path, + Err(err_kind) => { + debug!("link has malformed generics: {}", path_str); + return Some(Err(PreprocessingError::Resolution( + err_kind, + path_str.to_owned(), + disambiguator, + ))); + } + } + } else { + path_str.to_owned() + }; + + // Sanity check to make sure we don't have any angle brackets after stripping generics. + assert!(!path_str.contains(['<', '>'].as_slice())); + + // The link is not an intra-doc link if it still contains spaces after stripping generics. + if path_str.contains(' ') { + return None; + } + + Some(Ok(PreprocessingInfo { + path_str, + disambiguator, + extra_fragment: extra_fragment.map(String::from), + link_text, + })) +} + impl LinkCollector<'_, '_> { /// This is the entry point for resolving an intra-doc link. /// @@ -907,16 +1018,6 @@ impl LinkCollector<'_, '_> { ) -> Option { trace!("considering link '{}'", ori_link.link); - // Bail early for real links. - if ori_link.link.contains('/') { - return None; - } - - // [] is mostly likely not supposed to be a link - if ori_link.link.is_empty() { - return None; - } - let diag_info = DiagnosticInfo { item, dox, @@ -924,47 +1025,29 @@ impl LinkCollector<'_, '_> { link_range: ori_link.range.clone(), }; - let link = ori_link.link.replace("`", ""); - let no_backticks_range = range_between_backticks(&ori_link); - let parts = link.split('#').collect::>(); - let (link, extra_fragment) = if parts.len() > 2 { - // A valid link can't have multiple #'s - anchor_failure(self.cx, diag_info, AnchorFailure::MultipleAnchors); - return None; - } else if parts.len() == 2 { - if parts[0].trim().is_empty() { - // This is an anchor to an element of the current page, nothing to do in here! - return None; - } - (parts[0], Some(parts[1].to_owned())) - } else { - (parts[0], None) - }; - - // Parse and strip the disambiguator from the link, if present. - let (mut path_str, disambiguator) = match Disambiguator::from_str(&link) { - Ok(Some((d, path))) => (path.trim(), Some(d)), - Ok(None) => (link.trim(), None), - Err((err_msg, relative_range)) => { - if !should_ignore_link_with_disambiguators(link) { - // Only report error if we would not have ignored this link. - // See issue #83859. - let disambiguator_range = (no_backticks_range.start + relative_range.start) - ..(no_backticks_range.start + relative_range.end); - disambiguator_error(self.cx, diag_info, disambiguator_range, &err_msg); + let PreprocessingInfo { path_str, disambiguator, extra_fragment, link_text } = + match preprocess_link(&ori_link)? { + Ok(x) => x, + Err(err) => { + match err { + PreprocessingError::Anchor(err) => anchor_failure(self.cx, diag_info, err), + PreprocessingError::Disambiguator(range, msg) => { + disambiguator_error(self.cx, diag_info, range, &msg) + } + PreprocessingError::Resolution(err, path_str, disambiguator) => { + resolution_failure( + self, + diag_info, + &path_str, + disambiguator, + smallvec![err], + ); + } + } + return None; } - return None; - } - }; - - if should_ignore_link(path_str) { - return None; - } - - // We stripped `()` and `!` when parsing the disambiguator. - // Add them back to be displayed, but not prefix disambiguators. - let link_text = - disambiguator.map(|d| d.display_for(path_str)).unwrap_or_else(|| path_str.to_owned()); + }; + let mut path_str = &*path_str; // In order to correctly resolve intra-doc links we need to // pick a base AST node to work from. If the documentation for @@ -1029,39 +1112,12 @@ impl LinkCollector<'_, '_> { module_id = DefId { krate, index: CRATE_DEF_INDEX }; } - // Strip generics from the path. - let stripped_path_string; - if path_str.contains(['<', '>'].as_slice()) { - stripped_path_string = match strip_generics_from_path(path_str) { - Ok(path) => path, - Err(err_kind) => { - debug!("link has malformed generics: {}", path_str); - resolution_failure( - self, - diag_info, - path_str, - disambiguator, - smallvec![err_kind], - ); - return None; - } - }; - path_str = &stripped_path_string; - } - // Sanity check to make sure we don't have any angle brackets after stripping generics. - assert!(!path_str.contains(['<', '>'].as_slice())); - - // The link is not an intra-doc link if it still contains spaces after stripping generics. - if path_str.contains(' ') { - return None; - } - let (mut res, mut fragment) = self.resolve_with_disambiguator_cached( ResolutionInfo { module_id, dis: disambiguator, path_str: path_str.to_owned(), - extra_fragment, + extra_fragment: extra_fragment.map(String::from), }, diag_info.clone(), // this struct should really be Copy, but Range is not :( matches!(ori_link.kind, LinkType::Reference | LinkType::Shortcut), diff --git a/src/test/rustdoc/intra-doc/auxiliary/empty.rs b/src/test/rustdoc/intra-doc/auxiliary/empty.rs new file mode 100644 index 00000000000..d11c69f812a --- /dev/null +++ b/src/test/rustdoc/intra-doc/auxiliary/empty.rs @@ -0,0 +1 @@ +// intentionally empty diff --git a/src/test/rustdoc/intra-doc/auxiliary/empty2.rs b/src/test/rustdoc/intra-doc/auxiliary/empty2.rs new file mode 100644 index 00000000000..d11c69f812a --- /dev/null +++ b/src/test/rustdoc/intra-doc/auxiliary/empty2.rs @@ -0,0 +1 @@ +// intentionally empty diff --git a/src/test/rustdoc/intra-doc/extern-crate-only-used-in-link.rs b/src/test/rustdoc/intra-doc/extern-crate-only-used-in-link.rs index 0964c79de06..5d8dcf8bc1d 100644 --- a/src/test/rustdoc/intra-doc/extern-crate-only-used-in-link.rs +++ b/src/test/rustdoc/intra-doc/extern-crate-only-used-in-link.rs @@ -1,8 +1,19 @@ +// This test is just a little cursed. // aux-build:issue-66159-1.rs // aux-crate:priv:issue_66159_1=issue-66159-1.rs +// aux-build:empty.rs +// aux-crate:priv:empty=empty.rs +// aux-build:empty2.rs +// aux-crate:priv:empty2=empty2.rs // build-aux-docs -// compile-flags:-Z unstable-options +// compile-flags:-Z unstable-options --edition 2018 // @has extern_crate_only_used_in_link/index.html // @has - '//a[@href="../issue_66159_1/struct.Something.html"]' 'issue_66159_1::Something' //! [issue_66159_1::Something] + +// @has - '//a[@href="../empty/index.html"]' 'empty' +//! [`empty`] + +// @has - '//a[@href="../empty2/index.html"]' 'empty2' +//! [empty2]