Remove disambiguators from link text

Related to https://github.com/rust-lang/rust/issues/65354

- Pass through the replacement text to `markdown.rs`
- Add some tests
- Add a state machine that actually replaces the text when parsing Markdown
This commit is contained in:
Joshua Nelson 2020-07-05 23:38:31 -04:00
parent 31a7b6e832
commit 9815010d8f
5 changed files with 141 additions and 22 deletions

View file

@ -436,6 +436,11 @@ pub struct Attributes {
pub struct ItemLink {
/// The original link written in the markdown
pub(crate) link: String,
/// The link text displayed in the HTML.
///
/// This may not be the same as `link` if there was a disambiguator
/// in an intra-doc link (e.g. [`fn@f`])
pub(crate) link_text: String,
pub(crate) did: Option<DefId>,
/// The url fragment to append to the link
pub(crate) fragment: Option<String>,
@ -444,6 +449,8 @@ pub struct ItemLink {
pub struct RenderedLink {
/// The text the link was original written as
pub(crate) original_text: String,
/// The text to display in the HTML
pub(crate) new_text: String,
/// The URL to put in the `href`
pub(crate) href: String,
}
@ -630,7 +637,7 @@ impl Attributes {
self.links
.iter()
.filter_map(|ItemLink { link: s, did, fragment }| {
.filter_map(|ItemLink { link: s, link_text, did, fragment }| {
match *did {
Some(did) => {
if let Some((mut href, ..)) = href(did) {
@ -638,7 +645,11 @@ impl Attributes {
href.push_str("#");
href.push_str(fragment);
}
Some(RenderedLink { original_text: s.clone(), href })
Some(RenderedLink {
original_text: s.clone(),
new_text: link_text.clone(),
href,
})
} else {
None
}
@ -660,6 +671,7 @@ impl Attributes {
let tail = fragment.find('#').unwrap_or_else(|| fragment.len());
Some(RenderedLink {
original_text: s.clone(),
new_text: link_text.clone(),
href: format!(
"{}{}std/primitive.{}.html{}",
url,

View file

@ -340,29 +340,86 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for CodeBlocks<'_, 'a, I> {
/// Make headings links with anchor IDs and build up TOC.
struct LinkReplacer<'a, 'b, I: Iterator<Item = Event<'a>>> {
inner: I,
links: &'b [RenderedLink],
links: &'a [RenderedLink],
shortcut_link: Option<&'b RenderedLink>,
}
impl<'a, 'b, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, 'b, I> {
fn new(iter: I, links: &'b [RenderedLink]) -> Self {
LinkReplacer { inner: iter, links }
impl<'a, I: Iterator<Item = Event<'a>>> LinkReplacer<'a, '_, I> {
fn new(iter: I, links: &'a [RenderedLink]) -> Self {
LinkReplacer { inner: iter, links, shortcut_link: None }
}
}
impl<'a, 'b, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, 'b, I> {
impl<'a: 'b, 'b, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, 'b, I> {
type Item = Event<'a>;
fn next(&mut self) -> Option<Self::Item> {
let event = self.inner.next();
if let Some(Event::Start(Tag::Link(kind, dest, text))) = event {
if let Some(link) = self.links.iter().find(|link| link.original_text == *dest) {
Some(Event::Start(Tag::Link(kind, link.href.clone().into(), text)))
} else {
Some(Event::Start(Tag::Link(kind, dest, text)))
let mut event = self.inner.next();
// Remove disambiguators from shortcut links (`[fn@f]`)
match &mut event {
Some(Event::Start(Tag::Link(
pulldown_cmark::LinkType::ShortcutUnknown,
dest,
title,
))) => {
debug!("saw start of shortcut link to {} with title {}", dest, title);
let link = if let Some(link) =
self.links.iter().find(|&link| *link.original_text == **dest)
{
// Not sure why this is necessary - maybe the broken_link_callback doesn't always work?
*dest = CowStr::Borrowed(link.href.as_ref());
Some(link)
} else {
self.links.iter().find(|&link| *link.href == **dest)
};
if let Some(link) = link {
trace!("it matched");
assert!(self.shortcut_link.is_none(), "shortcut links cannot be nested");
self.shortcut_link = Some(link);
}
}
} else {
event
Some(Event::End(Tag::Link(pulldown_cmark::LinkType::ShortcutUnknown, dest, _))) => {
debug!("saw end of shortcut link to {}", dest);
if let Some(_link) = self.links.iter().find(|&link| *link.href == **dest) {
assert!(self.shortcut_link.is_some(), "saw closing link without opening tag");
self.shortcut_link = None;
}
}
// Handle backticks in inline code blocks
Some(Event::Code(text)) => {
trace!("saw code {}", text);
if let Some(link) = self.shortcut_link {
trace!("original text was {}", link.original_text);
if **text == link.original_text[1..link.original_text.len() - 1] {
debug!("replacing {} with {}", text, link.new_text);
*text = link.new_text.clone().into();
}
}
}
// Replace plain text in links
Some(Event::Text(text)) => {
trace!("saw text {}", text);
if let Some(link) = self.shortcut_link {
trace!("original text was {}", link.original_text);
if **text == *link.original_text {
debug!("replacing {} with {}", text, link.new_text);
*text = link.new_text.clone().into();
}
}
}
Some(Event::Start(Tag::Link(_, dest, _))) => {
if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) {
// Not sure why this is necessary - maybe the broken_link_callback doesn't always work?
*dest = CowStr::Borrowed(link.href.as_ref());
}
}
// Anything else couldn't have been a valid Rust path
_ => {}
}
// Yield the modified event
event
}
}
@ -857,7 +914,7 @@ impl Markdown<'_> {
}
let replacer = |_: &str, s: &str| {
if let Some(link) = links.iter().find(|link| &*link.original_text == s) {
Some((link.original_text.clone(), link.href.clone()))
Some((link.href.clone(), link.new_text.clone()))
} else {
None
}
@ -934,8 +991,8 @@ impl MarkdownSummaryLine<'_> {
}
let replacer = |_: &str, s: &str| {
if let Some(rendered_link) = links.iter().find(|link| &*link.original_text == s) {
Some((rendered_link.original_text.clone(), rendered_link.href.clone()))
if let Some(link) = links.iter().find(|link| &*link.original_text == s) {
Some((link.href.clone(), link.new_text.clone()))
} else {
None
}

View file

@ -64,8 +64,7 @@ use serde::ser::SerializeSeq;
use serde::{Serialize, Serializer};
use crate::clean::{self, AttributesExt, Deprecation, GetDefId, RenderedLink, SelfTy, TypeKind};
use crate::config::RenderInfo;
use crate::config::RenderOptions;
use crate::config::{RenderInfo, RenderOptions};
use crate::docfs::{DocFS, PathError};
use crate::doctree;
use crate::error::Error;

View file

@ -685,6 +685,7 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
continue;
}
//let had_backticks = ori_link.contains("`");
let link = ori_link.replace("`", "");
let parts = link.split('#').collect::<Vec<_>>();
let (link, extra_fragment) = if parts.len() > 2 {
@ -700,6 +701,7 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
(parts[0], None)
};
let resolved_self;
let link_text;
let mut path_str;
let disambiguator;
let (mut res, mut fragment) = {
@ -716,6 +718,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
continue;
}
// We stripped ` characters for `path_str`.
// The original link might have had multiple pairs of backticks,
// but we don't handle this case.
//link_text = if had_backticks { format!("`{}`", path_str) } else { path_str.to_owned() };
link_text = path_str.to_owned();
// In order to correctly resolve intra-doc-links we need to
// pick a base AST node to work from. If the documentation for
// this module came from an inner comment (//!) then we anchor
@ -904,7 +912,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
if let Res::PrimTy(_) = res {
match disambiguator {
Some(Disambiguator::Primitive | Disambiguator::Namespace(_)) | None => {
item.attrs.links.push(ItemLink { link: ori_link, did: None, fragment });
item.attrs.links.push(ItemLink {
link: ori_link,
link_text: path_str.to_owned(),
did: None,
fragment,
});
}
Some(other) => {
report_mismatch(other, Disambiguator::Primitive);
@ -955,7 +968,12 @@ impl<'a, 'tcx> DocFolder for LinkCollector<'a, 'tcx> {
}
}
let id = register_res(cx, res);
item.attrs.links.push(ItemLink { link: ori_link, did: Some(id), fragment });
item.attrs.links.push(ItemLink {
link: ori_link,
link_text,
did: Some(id),
fragment,
});
}
}

View file

@ -0,0 +1,33 @@
#![deny(intra_doc_link_resolution_failure)]
// first try backticks
/// Trait: [`trait@Name`], fn: [`fn@Name`], [`Name`][`macro@Name`]
// @has disambiguator_removed/struct.AtDisambiguator.html
// @has - '//a[@href="../disambiguator_removed/trait.Name.html"][code]' "Name"
// @has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name"
// @has - '//a[@href="../disambiguator_removed/macro.Name.html"][code]' "Name"
pub struct AtDisambiguator;
/// fn: [`Name()`], macro: [`Name!`]
// @has disambiguator_removed/struct.SymbolDisambiguator.html
// @has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name()"
// @has - '//a[@href="../disambiguator_removed/macro.Name.html"][code]' "Name!"
pub struct SymbolDisambiguator;
// Now make sure that backticks aren't added if they weren't already there
/// [fn@Name]
// @has disambiguator_removed/trait.Name.html
// @has - '//a[@href="../disambiguator_removed/fn.Name.html"]' "Name"
// @!has - '//a[@href="../disambiguator_removed/fn.Name.html"][code]' "Name"
// FIXME: this will turn !() into ! alone
/// [Name!()]
// @has - '//a[@href="../disambiguator_removed/macro.Name.html"]' "Name!"
pub trait Name {}
#[allow(non_snake_case)]
pub fn Name() {}
#[macro_export]
macro_rules! Name {
() => ()
}