Auto merge of #25028 - bluss:drain-string, r=alexcrichton
collections: Implement String::drain(range) according to RFC 574 `.drain(range)` is unstable and under feature(collections_drain). This adds a safe way to remove any range of a String as efficiently as possible. As noted in the code, this drain iterator has none of the memory safety issues of the vector version. RFC tracking issue is #23055
This commit is contained in:
commit
700b4c160b
5 changed files with 131 additions and 9 deletions
|
@ -26,7 +26,8 @@ use rustc_unicode::str as unicode_str;
|
|||
use rustc_unicode::str::Utf16Item;
|
||||
|
||||
use borrow::{Cow, IntoCow};
|
||||
use str::{self, FromStr, Utf8Error};
|
||||
use range::RangeArgument;
|
||||
use str::{self, FromStr, Utf8Error, Chars};
|
||||
use vec::{DerefVec, Vec, as_vec};
|
||||
|
||||
/// A growable string stored as a UTF-8 encoded buffer.
|
||||
|
@ -695,6 +696,59 @@ impl String {
|
|||
pub fn clear(&mut self) {
|
||||
self.vec.clear()
|
||||
}
|
||||
|
||||
/// Create a draining iterator that removes the specified range in the string
|
||||
/// and yields the removed chars from start to end. The element range is
|
||||
/// removed even if the iterator is not consumed until the end.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the starting point or end point are not on character boundaries,
|
||||
/// or if they are out of bounds.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(collections_drain)]
|
||||
///
|
||||
/// let mut s = String::from("α is alpha, β is beta");
|
||||
/// let beta_offset = s.find('β').unwrap_or(s.len());
|
||||
///
|
||||
/// // Remove the range up until the β from the string
|
||||
/// let t: String = s.drain(..beta_offset).collect();
|
||||
/// assert_eq!(t, "α is alpha, ");
|
||||
/// assert_eq!(s, "β is beta");
|
||||
///
|
||||
/// // A full range clears the string
|
||||
/// s.drain(..);
|
||||
/// assert_eq!(s, "");
|
||||
/// ```
|
||||
#[unstable(feature = "collections_drain",
|
||||
reason = "recently added, matches RFC")]
|
||||
pub fn drain<R>(&mut self, range: R) -> Drain where R: RangeArgument<usize> {
|
||||
// Memory safety
|
||||
//
|
||||
// The String version of Drain does not have the memory safety issues
|
||||
// of the vector version. The data is just plain bytes.
|
||||
// Because the range removal happens in Drop, if the Drain iterator is leaked,
|
||||
// the removal will not happen.
|
||||
let len = self.len();
|
||||
let start = *range.start().unwrap_or(&0);
|
||||
let end = *range.end().unwrap_or(&len);
|
||||
|
||||
// Take out two simultaneous borrows. The &mut String won't be accessed
|
||||
// until iteration is over, in Drop.
|
||||
let self_ptr = self as *mut _;
|
||||
// slicing does the appropriate bounds checks
|
||||
let chars_iter = self[start..end].chars();
|
||||
|
||||
Drain {
|
||||
start: start,
|
||||
end: end,
|
||||
iter: chars_iter,
|
||||
string: self_ptr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromUtf8Error {
|
||||
|
@ -1075,3 +1129,55 @@ impl fmt::Write for String {
|
|||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A draining iterator for `String`.
|
||||
#[unstable(feature = "collections_drain", reason = "recently added")]
|
||||
pub struct Drain<'a> {
|
||||
/// Will be used as &'a mut String in the destructor
|
||||
string: *mut String,
|
||||
/// Start of part to remove
|
||||
start: usize,
|
||||
/// End of part to remove
|
||||
end: usize,
|
||||
/// Current remaining range to remove
|
||||
iter: Chars<'a>,
|
||||
}
|
||||
|
||||
unsafe impl<'a> Sync for Drain<'a> {}
|
||||
unsafe impl<'a> Send for Drain<'a> {}
|
||||
|
||||
#[unstable(feature = "collections_drain", reason = "recently added")]
|
||||
impl<'a> Drop for Drain<'a> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
|
||||
// panic code being inserted again.
|
||||
let self_vec = (*self.string).as_mut_vec();
|
||||
if self.start <= self.end && self.end <= self_vec.len() {
|
||||
self_vec.drain(self.start..self.end);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "collections_drain", reason = "recently added")]
|
||||
impl<'a> Iterator for Drain<'a> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<char> {
|
||||
self.iter.next()
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.iter.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
#[unstable(feature = "collections_drain", reason = "recently added")]
|
||||
impl<'a> DoubleEndedIterator for Drain<'a> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<char> {
|
||||
self.iter.next_back()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -348,6 +348,23 @@ fn test_from_iterator() {
|
|||
assert_eq!(s, d);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_drain() {
|
||||
let mut s = String::from("αβγ");
|
||||
assert_eq!(s.drain(2..4).collect::<String>(), "β");
|
||||
assert_eq!(s, "αγ");
|
||||
|
||||
let mut t = String::from("abcd");
|
||||
t.drain(..0);
|
||||
assert_eq!(t, "abcd");
|
||||
t.drain(..1);
|
||||
assert_eq!(t, "bcd");
|
||||
t.drain(3..);
|
||||
assert_eq!(t, "bcd");
|
||||
t.drain(..);
|
||||
assert_eq!(t, "");
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_with_capacity(b: &mut Bencher) {
|
||||
b.iter(|| {
|
||||
|
|
|
@ -543,7 +543,7 @@ impl CodeMap {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
|
||||
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
|
||||
let mut files = self.files.borrow_mut();
|
||||
let start_pos = match files.last() {
|
||||
None => 0,
|
||||
|
@ -551,13 +551,9 @@ impl CodeMap {
|
|||
};
|
||||
|
||||
// Remove utf-8 BOM if any.
|
||||
// FIXME #12884: no efficient/safe way to remove from the start of a string
|
||||
// and reuse the allocation.
|
||||
let mut src = if src.starts_with("\u{feff}") {
|
||||
String::from(&src[3..])
|
||||
} else {
|
||||
String::from(&src[..])
|
||||
};
|
||||
if src.starts_with("\u{feff}") {
|
||||
src.drain(..3);
|
||||
}
|
||||
|
||||
// Append '\n' in case it's not already there.
|
||||
// This is a workaround to prevent CodeMap.lookup_filemap_idx from
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
|
||||
#![feature(associated_consts)]
|
||||
#![feature(collections)]
|
||||
#![feature(collections_drain)]
|
||||
#![feature(core)]
|
||||
#![feature(libc)]
|
||||
#![feature(rustc_private)]
|
||||
|
|
|
@ -21,6 +21,7 @@ use collections::{BitSet, BitVec};
|
|||
use collections::{BTreeMap, BTreeSet};
|
||||
use collections::EnumSet;
|
||||
use collections::LinkedList;
|
||||
use collections::String;
|
||||
use collections::Vec;
|
||||
use collections::VecDeque;
|
||||
use collections::VecMap;
|
||||
|
@ -99,4 +100,5 @@ fn main() {
|
|||
|
||||
all_sync_send!(Vec::<usize>::new(), into_iter);
|
||||
is_sync_send!(Vec::<usize>::new(), drain(..));
|
||||
is_sync_send!(String::new(), drain(..));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue