Auto merge of #83515 - tamird:string-remove-matches-rev, r=m-ou-se

String::remove_matches O(n^2) -> O(n)

Copy only non-matching bytes. Replace collection of matches into a
vector with iteration over rejections, exploiting the guarantee that we
mutate parts of the haystack that have already been searched over.

r? `@joshtriplett`
This commit is contained in:
bors 2021-06-08 01:05:48 +00:00
commit dda4a881e0

View file

@ -48,7 +48,7 @@ use core::fmt;
use core::hash; use core::hash;
#[cfg(not(no_global_oom_handling))] #[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator; use core::iter::FromIterator;
use core::iter::FusedIterator; use core::iter::{from_fn, FusedIterator};
#[cfg(not(no_global_oom_handling))] #[cfg(not(no_global_oom_handling))]
use core::ops::Add; use core::ops::Add;
#[cfg(not(no_global_oom_handling))] #[cfg(not(no_global_oom_handling))]
@ -1290,32 +1290,49 @@ impl String {
{ {
use core::str::pattern::Searcher; use core::str::pattern::Searcher;
let matches = { let rejections = {
let mut searcher = pat.into_searcher(self); let mut searcher = pat.into_searcher(self);
let mut matches = Vec::new(); // Per Searcher::next:
//
while let Some(m) = searcher.next_match() { // A Match result needs to contain the whole matched pattern,
matches.push(m); // however Reject results may be split up into arbitrary many
} // adjacent fragments. Both ranges may have zero length.
//
matches // In practice the implementation of Searcher::next_match tends to
// be more efficient, so we use it here and do some work to invert
// matches into rejections since that's what we want to copy below.
let mut front = 0;
let rejections: Vec<_> = from_fn(|| {
let (start, end) = searcher.next_match()?;
let prev_front = front;
front = end;
Some((prev_front, start))
})
.collect();
rejections.into_iter().chain(core::iter::once((front, self.len())))
}; };
let len = self.len(); let mut len = 0;
let mut shrunk_by = 0; let ptr = self.vec.as_mut_ptr();
// SAFETY: start and end will be on utf8 byte boundaries per for (start, end) in rejections {
// the Searcher docs let count = end - start;
unsafe { if start != len {
for (start, end) in matches { // SAFETY: per Searcher::next:
ptr::copy( //
self.vec.as_mut_ptr().add(end - shrunk_by), // The stream of Match and Reject values up to a Done will
self.vec.as_mut_ptr().add(start - shrunk_by), // contain index ranges that are adjacent, non-overlapping,
len - end, // covering the whole haystack, and laying on utf8
); // boundaries.
shrunk_by += end - start; unsafe {
ptr::copy(ptr.add(start), ptr.add(len), count);
}
} }
self.vec.set_len(len - shrunk_by); len += count;
}
unsafe {
self.vec.set_len(len);
} }
} }