Auto merge of #83515 - tamird:string-remove-matches-rev, r=m-ou-se
String::remove_matches O(n^2) -> O(n) Copy only non-matching bytes. Replace collection of matches into a vector with iteration over rejections, exploiting the guarantee that we mutate parts of the haystack that have already been searched over. r? `@joshtriplett`
This commit is contained in:
commit
dda4a881e0
1 changed files with 39 additions and 22 deletions
|
@ -48,7 +48,7 @@ use core::fmt;
|
|||
use core::hash;
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
use core::iter::FromIterator;
|
||||
use core::iter::FusedIterator;
|
||||
use core::iter::{from_fn, FusedIterator};
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
use core::ops::Add;
|
||||
#[cfg(not(no_global_oom_handling))]
|
||||
|
@ -1290,32 +1290,49 @@ impl String {
|
|||
{
|
||||
use core::str::pattern::Searcher;
|
||||
|
||||
let matches = {
|
||||
let rejections = {
|
||||
let mut searcher = pat.into_searcher(self);
|
||||
let mut matches = Vec::new();
|
||||
|
||||
while let Some(m) = searcher.next_match() {
|
||||
matches.push(m);
|
||||
}
|
||||
|
||||
matches
|
||||
// Per Searcher::next:
|
||||
//
|
||||
// A Match result needs to contain the whole matched pattern,
|
||||
// however Reject results may be split up into arbitrary many
|
||||
// adjacent fragments. Both ranges may have zero length.
|
||||
//
|
||||
// In practice the implementation of Searcher::next_match tends to
|
||||
// be more efficient, so we use it here and do some work to invert
|
||||
// matches into rejections since that's what we want to copy below.
|
||||
let mut front = 0;
|
||||
let rejections: Vec<_> = from_fn(|| {
|
||||
let (start, end) = searcher.next_match()?;
|
||||
let prev_front = front;
|
||||
front = end;
|
||||
Some((prev_front, start))
|
||||
})
|
||||
.collect();
|
||||
rejections.into_iter().chain(core::iter::once((front, self.len())))
|
||||
};
|
||||
|
||||
let len = self.len();
|
||||
let mut shrunk_by = 0;
|
||||
let mut len = 0;
|
||||
let ptr = self.vec.as_mut_ptr();
|
||||
|
||||
// SAFETY: start and end will be on utf8 byte boundaries per
|
||||
// the Searcher docs
|
||||
unsafe {
|
||||
for (start, end) in matches {
|
||||
ptr::copy(
|
||||
self.vec.as_mut_ptr().add(end - shrunk_by),
|
||||
self.vec.as_mut_ptr().add(start - shrunk_by),
|
||||
len - end,
|
||||
);
|
||||
shrunk_by += end - start;
|
||||
for (start, end) in rejections {
|
||||
let count = end - start;
|
||||
if start != len {
|
||||
// SAFETY: per Searcher::next:
|
||||
//
|
||||
// The stream of Match and Reject values up to a Done will
|
||||
// contain index ranges that are adjacent, non-overlapping,
|
||||
// covering the whole haystack, and laying on utf8
|
||||
// boundaries.
|
||||
unsafe {
|
||||
ptr::copy(ptr.add(start), ptr.add(len), count);
|
||||
}
|
||||
}
|
||||
self.vec.set_len(len - shrunk_by);
|
||||
len += count;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
self.vec.set_len(len);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue