Auto merge of #83515 - tamird:string-remove-matches-rev, r=m-ou-se

String::remove_matches O(n^2) -> O(n)

Copy only non-matching bytes. Replace collection of matches into a
vector with iteration over rejections, exploiting the guarantee that we
mutate parts of the haystack that have already been searched over.

r? `@joshtriplett`
This commit is contained in:
bors 2021-06-08 01:05:48 +00:00
commit dda4a881e0

View file

@ -48,7 +48,7 @@ use core::fmt;
use core::hash;
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::iter::FusedIterator;
use core::iter::{from_fn, FusedIterator};
#[cfg(not(no_global_oom_handling))]
use core::ops::Add;
#[cfg(not(no_global_oom_handling))]
@ -1290,32 +1290,49 @@ impl String {
{
use core::str::pattern::Searcher;
let matches = {
let rejections = {
let mut searcher = pat.into_searcher(self);
let mut matches = Vec::new();
while let Some(m) = searcher.next_match() {
matches.push(m);
}
matches
// Per Searcher::next:
//
// A Match result needs to contain the whole matched pattern,
// however Reject results may be split up into arbitrary many
// adjacent fragments. Both ranges may have zero length.
//
// In practice the implementation of Searcher::next_match tends to
// be more efficient, so we use it here and do some work to invert
// matches into rejections since that's what we want to copy below.
let mut front = 0;
let rejections: Vec<_> = from_fn(|| {
let (start, end) = searcher.next_match()?;
let prev_front = front;
front = end;
Some((prev_front, start))
})
.collect();
rejections.into_iter().chain(core::iter::once((front, self.len())))
};
let len = self.len();
let mut shrunk_by = 0;
let mut len = 0;
let ptr = self.vec.as_mut_ptr();
// SAFETY: start and end will be on utf8 byte boundaries per
// the Searcher docs
unsafe {
for (start, end) in matches {
ptr::copy(
self.vec.as_mut_ptr().add(end - shrunk_by),
self.vec.as_mut_ptr().add(start - shrunk_by),
len - end,
);
shrunk_by += end - start;
for (start, end) in rejections {
let count = end - start;
if start != len {
// SAFETY: per Searcher::next:
//
// The stream of Match and Reject values up to a Done will
// contain index ranges that are adjacent, non-overlapping,
// covering the whole haystack, and laying on utf8
// boundaries.
unsafe {
ptr::copy(ptr.add(start), ptr.add(len), count);
}
}
self.vec.set_len(len - shrunk_by);
len += count;
}
unsafe {
self.vec.set_len(len);
}
}