From 9815010d8f6a46621ee96cb824f9939b9db0ae47 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 5 Jul 2020 23:38:31 -0400 Subject: [PATCH] Remove disambiguators from link text Related to https://github.com/rust-lang/rust/issues/65354 - Pass through the replacement text to `markdown.rs` - Add some tests - Add a state machine that actually replaces the text when parsing Markdown --- src/librustdoc/clean/types.rs | 16 +++- src/librustdoc/html/markdown.rs | 89 +++++++++++++++---- src/librustdoc/html/render/mod.rs | 3 +- .../passes/collect_intra_doc_links.rs | 22 ++++- src/test/rustdoc/disambiguator_removed.rs | 33 +++++++ 5 files changed, 141 insertions(+), 22 deletions(-) create mode 100644 src/test/rustdoc/disambiguator_removed.rs diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index d79553a8be8..05c90a7c403 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -436,6 +436,11 @@ pub struct Attributes { pub struct ItemLink { /// The original link written in the markdown pub(crate) link: String, + /// The link text displayed in the HTML. + /// + /// This may not be the same as `link` if there was a disambiguator + /// in an intra-doc link (e.g. [`fn@f`]) + pub(crate) link_text: String, pub(crate) did: Option, /// The url fragment to append to the link pub(crate) fragment: Option, @@ -444,6 +449,8 @@ pub struct ItemLink { pub struct RenderedLink { /// The text the link was original written as pub(crate) original_text: String, + /// The text to display in the HTML + pub(crate) new_text: String, /// The URL to put in the `href` pub(crate) href: String, } @@ -630,7 +637,7 @@ impl Attributes { self.links .iter() - .filter_map(|ItemLink { link: s, did, fragment }| { + .filter_map(|ItemLink { link: s, link_text, did, fragment }| { match *did { Some(did) => { if let Some((mut href, ..)) = href(did) { @@ -638,7 +645,11 @@ impl Attributes { href.push_str("#"); href.push_str(fragment); } - Some(RenderedLink { original_text: s.clone(), href }) + Some(RenderedLink { + original_text: s.clone(), + new_text: link_text.clone(), + href, + }) } else { None } @@ -660,6 +671,7 @@ impl Attributes { let tail = fragment.find('#').unwrap_or_else(|| fragment.len()); Some(RenderedLink { original_text: s.clone(), + new_text: link_text.clone(), href: format!( "{}{}std/primitive.{}.html{}", url, diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index e6484cd1c2c..6d634bac762 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -340,29 +340,86 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { /// Make headings links with anchor IDs and build up TOC. struct LinkReplacer<'a, 'b, I: Iterator>> { inner: I, - links: &'b [RenderedLink], + links: &'a [RenderedLink], + shortcut_link: Option<&'b RenderedLink>, } -impl<'a, 'b, I: Iterator>> LinkReplacer<'a, 'b, I> { - fn new(iter: I, links: &'b [RenderedLink]) -> Self { - LinkReplacer { inner: iter, links } +impl<'a, I: Iterator>> LinkReplacer<'a, '_, I> { + fn new(iter: I, links: &'a [RenderedLink]) -> Self { + LinkReplacer { inner: iter, links, shortcut_link: None } } } -impl<'a, 'b, I: Iterator>> Iterator for LinkReplacer<'a, 'b, I> { +impl<'a: 'b, 'b, I: Iterator>> Iterator for LinkReplacer<'a, 'b, I> { type Item = Event<'a>; fn next(&mut self) -> Option { - let event = self.inner.next(); - if let Some(Event::Start(Tag::Link(kind, dest, text))) = event { - if let Some(link) = self.links.iter().find(|link| link.original_text == *dest) { - Some(Event::Start(Tag::Link(kind, link.href.clone().into(), text))) - } else { - Some(Event::Start(Tag::Link(kind, dest, text))) + let mut event = self.inner.next(); + + // Remove disambiguators from shortcut links (`[fn@f]`) + match &mut event { + Some(Event::Start(Tag::Link( + pulldown_cmark::LinkType::ShortcutUnknown, + dest, + title, + ))) => { + debug!("saw start of shortcut link to {} with title {}", dest, title); + let link = if let Some(link) = + self.links.iter().find(|&link| *link.original_text == **dest) + { + // Not sure why this is necessary - maybe the broken_link_callback doesn't always work? + *dest = CowStr::Borrowed(link.href.as_ref()); + Some(link) + } else { + self.links.iter().find(|&link| *link.href == **dest) + }; + if let Some(link) = link { + trace!("it matched"); + assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested"); + self.shortcut_link = Some(link); + } } - } else { - event + Some(Event::End(Tag::Link(pulldown_cmark::LinkType::ShortcutUnknown, dest, _))) => { + debug!("saw end of shortcut link to {}", dest); + if let Some(_link) = self.links.iter().find(|&link| *link.href == **dest) { + assert!(self.shortcut_link.is_some(), "saw closing link without opening tag"); + self.shortcut_link = None; + } + } + // Handle backticks in inline code blocks + Some(Event::Code(text)) => { + trace!("saw code {}", text); + if let Some(link) = self.shortcut_link { + trace!("original text was {}", link.original_text); + if **text == link.original_text[1..link.original_text.len() - 1] { + debug!("replacing {} with {}", text, link.new_text); + *text = link.new_text.clone().into(); + } + } + } + // Replace plain text in links + Some(Event::Text(text)) => { + trace!("saw text {}", text); + if let Some(link) = self.shortcut_link { + trace!("original text was {}", link.original_text); + if **text == *link.original_text { + debug!("replacing {} with {}", text, link.new_text); + *text = link.new_text.clone().into(); + } + } + } + Some(Event::Start(Tag::Link(_, dest, _))) => { + if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) { + // Not sure why this is necessary - maybe the broken_link_callback doesn't always work? + *dest = CowStr::Borrowed(link.href.as_ref()); + } + } + // Anything else couldn't have been a valid Rust path + _ => {} } + + // Yield the modified event + event } } @@ -857,7 +914,7 @@ impl Markdown<'_> { } let replacer = |_: &str, s: &str| { if let Some(link) = links.iter().find(|link| &*link.original_text == s) { - Some((link.original_text.clone(), link.href.clone())) + Some((link.href.clone(), link.new_text.clone())) } else { None } @@ -934,8 +991,8 @@ impl MarkdownSummaryLine<'_> { } let replacer = |_: &str, s: &str| { - if let Some(rendered_link) = links.iter().find(|link| &*link.original_text == s) { - Some((rendered_link.original_text.clone(), rendered_link.href.clone())) + if let Some(link) = links.iter().find(|link| &*link.original_text == s) { + Some((link.href.clone(), link.new_text.clone())) } else { None } diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 9dc85881482..318b10e2af2 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -64,8 +64,7 @@ use serde::ser::SerializeSeq; use serde::{Serialize, Serializer}; use crate::clean::{self, AttributesExt, Deprecation, GetDefId, RenderedLink, SelfTy, TypeKind}; -use crate::config::RenderInfo; -use crate::config::RenderOptions; +use crate::config::{RenderInfo, RenderOptions}; use crate::docfs::{DocFS, PathError}; use crate::doctree; use crate::error::Error; diff --git a/src/librustdoc/passes/collect_intra_doc_links.rs b/src/librustdoc/passes/collect_intra_doc_links.rs index a0cffc92ce1..ff3c19da3cf 100644 --- a/src/librustdoc/passes/collect_intra_doc_links.rs +++ b/src/librustdoc/passes/collect_intra_doc_links.rs @@ -685,6 +685,7 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { continue; } + //let had_backticks = ori_link.contains("`"); let link = ori_link.replace("`", ""); let parts = link.split('#').collect::>(); let (link, extra_fragment) = if parts.len() > 2 { @@ -700,6 +701,7 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { (parts[0], None) }; let resolved_self; + let link_text; let mut path_str; let disambiguator; let (mut res, mut fragment) = { @@ -716,6 +718,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { continue; } + // We stripped ` characters for `path_str`. + // The original link might have had multiple pairs of backticks, + // but we don't handle this case. + //link_text = if had_backticks { format!("`{}`", path_str) } else { path_str.to_owned() }; + link_text = path_str.to_owned(); + // In order to correctly resolve intra-doc-links we need to // pick a base AST node to work from. If the documentation for // this module came from an inner comment (//!) then we anchor @@ -904,7 +912,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { if let Res::PrimTy(_) = res { match disambiguator { Some(Disambiguator::Primitive | Disambiguator::Namespace(_)) | None => { - item.attrs.links.push(ItemLink { link: ori_link, did: None, fragment }); + item.attrs.links.push(ItemLink { + link: ori_link, + link_text: path_str.to_owned(), + did: None, + fragment, + }); } Some(other) => { report_mismatch(other, Disambiguator::Primitive); @@ -955,7 +968,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> { } } let id = register_res(cx, res); - item.attrs.links.push(ItemLink { link: ori_link, did: Some(id), fragment }); + item.attrs.links.push(ItemLink { + link: ori_link, + link_text, + did: Some(id), + fragment, + }); } } diff --git a/src/test/rustdoc/disambiguator_removed.rs b/src/test/rustdoc/disambiguator_removed.rs new file mode 100644 index 00000000000..74411870e9f --- /dev/null +++ b/src/test/rustdoc/disambiguator_removed.rs @@ -0,0 +1,33 @@ +#![deny(intra_doc_link_resolution_failure)] +// first try backticks +/// Trait: [`trait@Name`], fn: [`fn@Name`], [`Name`][`macro@Name`] +// @has disambiguator_removed/struct.AtDisambiguator.html +// @has - '//a[@href="../disambiguator_removed/trait.Name.html"][code]' "Name" +// @has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name" +// @has - '//a[@href="../disambiguator_removed/macro.Name.html"][code]' "Name" +pub struct AtDisambiguator; + +/// fn: [`Name()`], macro: [`Name!`] +// @has disambiguator_removed/struct.SymbolDisambiguator.html +// @has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name()" +// @has - '//a[@href="../disambiguator_removed/macro.Name.html"][code]' "Name!" +pub struct SymbolDisambiguator; + +// Now make sure that backticks aren't added if they weren't already there +/// [fn@Name] +// @has disambiguator_removed/trait.Name.html +// @has - '//a[@href="../disambiguator_removed/fn.Name.html"]' "Name" +// @!has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name" + +// FIXME: this will turn !() into ! alone +/// [Name!()] +// @has - '//a[@href="../disambiguator_removed/macro.Name.html"]' "Name!" +pub trait Name {} + +#[allow(non_snake_case)] +pub fn Name() {} + +#[macro_export] +macro_rules! Name { + () => () +}