Auto merge of #83515 - tamird:string-remove-matches-rev, r=m-ou-se
String::remove_matches O(n^2) -> O(n) Copy only non-matching bytes. Replace collection of matches into a vector with iteration over rejections, exploiting the guarantee that we mutate parts of the haystack that have already been searched over. r? `@joshtriplett`
This commit is contained in:
commit
dda4a881e0
1 changed files with 39 additions and 22 deletions
|
@ -48,7 +48,7 @@ use core::fmt;
|
||||||
use core::hash;
|
use core::hash;
|
||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
use core::iter::FromIterator;
|
use core::iter::FromIterator;
|
||||||
use core::iter::FusedIterator;
|
use core::iter::{from_fn, FusedIterator};
|
||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
use core::ops::Add;
|
use core::ops::Add;
|
||||||
#[cfg(not(no_global_oom_handling))]
|
#[cfg(not(no_global_oom_handling))]
|
||||||
|
@ -1290,32 +1290,49 @@ impl String {
|
||||||
{
|
{
|
||||||
use core::str::pattern::Searcher;
|
use core::str::pattern::Searcher;
|
||||||
|
|
||||||
let matches = {
|
let rejections = {
|
||||||
let mut searcher = pat.into_searcher(self);
|
let mut searcher = pat.into_searcher(self);
|
||||||
let mut matches = Vec::new();
|
// Per Searcher::next:
|
||||||
|
//
|
||||||
while let Some(m) = searcher.next_match() {
|
// A Match result needs to contain the whole matched pattern,
|
||||||
matches.push(m);
|
// however Reject results may be split up into arbitrary many
|
||||||
}
|
// adjacent fragments. Both ranges may have zero length.
|
||||||
|
//
|
||||||
matches
|
// In practice the implementation of Searcher::next_match tends to
|
||||||
|
// be more efficient, so we use it here and do some work to invert
|
||||||
|
// matches into rejections since that's what we want to copy below.
|
||||||
|
let mut front = 0;
|
||||||
|
let rejections: Vec<_> = from_fn(|| {
|
||||||
|
let (start, end) = searcher.next_match()?;
|
||||||
|
let prev_front = front;
|
||||||
|
front = end;
|
||||||
|
Some((prev_front, start))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
rejections.into_iter().chain(core::iter::once((front, self.len())))
|
||||||
};
|
};
|
||||||
|
|
||||||
let len = self.len();
|
let mut len = 0;
|
||||||
let mut shrunk_by = 0;
|
let ptr = self.vec.as_mut_ptr();
|
||||||
|
|
||||||
// SAFETY: start and end will be on utf8 byte boundaries per
|
for (start, end) in rejections {
|
||||||
// the Searcher docs
|
let count = end - start;
|
||||||
unsafe {
|
if start != len {
|
||||||
for (start, end) in matches {
|
// SAFETY: per Searcher::next:
|
||||||
ptr::copy(
|
//
|
||||||
self.vec.as_mut_ptr().add(end - shrunk_by),
|
// The stream of Match and Reject values up to a Done will
|
||||||
self.vec.as_mut_ptr().add(start - shrunk_by),
|
// contain index ranges that are adjacent, non-overlapping,
|
||||||
len - end,
|
// covering the whole haystack, and laying on utf8
|
||||||
);
|
// boundaries.
|
||||||
shrunk_by += end - start;
|
unsafe {
|
||||||
|
ptr::copy(ptr.add(start), ptr.add(len), count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
self.vec.set_len(len - shrunk_by);
|
len += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
self.vec.set_len(len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue