Auto merge of #25028 - bluss:drain-string, r=alexcrichton

collections: Implement String::drain(range) according to RFC 574

`.drain(range)` is unstable and under feature(collections_drain).

This adds a safe way to remove any range of a String as efficiently as
possible.

As noted in the code, this drain iterator has none of the memory safety
issues of the vector version.

RFC tracking issue is #23055
This commit is contained in:
bors 2015-05-02 04:35:33 +00:00
commit 700b4c160b
5 changed files with 131 additions and 9 deletions

View file

@ -26,7 +26,8 @@ use rustc_unicode::str as unicode_str;
use rustc_unicode::str::Utf16Item;
use borrow::{Cow, IntoCow};
use str::{self, FromStr, Utf8Error};
use range::RangeArgument;
use str::{self, FromStr, Utf8Error, Chars};
use vec::{DerefVec, Vec, as_vec};
/// A growable string stored as a UTF-8 encoded buffer.
@ -695,6 +696,59 @@ impl String {
pub fn clear(&mut self) {
self.vec.clear()
}
/// Create a draining iterator that removes the specified range in the string
/// and yields the removed chars from start to end. The element range is
/// removed even if the iterator is not consumed until the end.
///
/// # Panics
///
/// Panics if the starting point or end point are not on character boundaries,
/// or if they are out of bounds.
///
/// # Examples
///
/// ```
/// # #![feature(collections_drain)]
///
/// let mut s = String::from("α is alpha, β is beta");
/// let beta_offset = s.find('β').unwrap_or(s.len());
///
/// // Remove the range up until the β from the string
/// let t: String = s.drain(..beta_offset).collect();
/// assert_eq!(t, "α is alpha, ");
/// assert_eq!(s, "β is beta");
///
/// // A full range clears the string
/// s.drain(..);
/// assert_eq!(s, "");
/// ```
#[unstable(feature = "collections_drain",
reason = "recently added, matches RFC")]
pub fn drain<R>(&mut self, range: R) -> Drain where R: RangeArgument<usize> {
// Memory safety
//
// The String version of Drain does not have the memory safety issues
// of the vector version. The data is just plain bytes.
// Because the range removal happens in Drop, if the Drain iterator is leaked,
// the removal will not happen.
let len = self.len();
let start = *range.start().unwrap_or(&0);
let end = *range.end().unwrap_or(&len);
// Take out two simultaneous borrows. The &mut String won't be accessed
// until iteration is over, in Drop.
let self_ptr = self as *mut _;
// slicing does the appropriate bounds checks
let chars_iter = self[start..end].chars();
Drain {
start: start,
end: end,
iter: chars_iter,
string: self_ptr,
}
}
}
impl FromUtf8Error {
@ -1075,3 +1129,55 @@ impl fmt::Write for String {
Ok(())
}
}
/// A draining iterator for `String`.
#[unstable(feature = "collections_drain", reason = "recently added")]
pub struct Drain<'a> {
/// Will be used as &'a mut String in the destructor
string: *mut String,
/// Start of part to remove
start: usize,
/// End of part to remove
end: usize,
/// Current remaining range to remove
iter: Chars<'a>,
}
unsafe impl<'a> Sync for Drain<'a> {}
unsafe impl<'a> Send for Drain<'a> {}
#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> Drop for Drain<'a> {
fn drop(&mut self) {
unsafe {
// Use Vec::drain. "Reaffirm" the bounds checks to avoid
// panic code being inserted again.
let self_vec = (*self.string).as_mut_vec();
if self.start <= self.end && self.end <= self_vec.len() {
self_vec.drain(self.start..self.end);
}
}
}
}
#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> Iterator for Drain<'a> {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
self.iter.next()
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
#[unstable(feature = "collections_drain", reason = "recently added")]
impl<'a> DoubleEndedIterator for Drain<'a> {
#[inline]
fn next_back(&mut self) -> Option<char> {
self.iter.next_back()
}
}

View file

@ -348,6 +348,23 @@ fn test_from_iterator() {
assert_eq!(s, d);
}
#[test]
fn test_drain() {
let mut s = String::from("αβγ");
assert_eq!(s.drain(2..4).collect::<String>(), "β");
assert_eq!(s, "αγ");
let mut t = String::from("abcd");
t.drain(..0);
assert_eq!(t, "abcd");
t.drain(..1);
assert_eq!(t, "bcd");
t.drain(3..);
assert_eq!(t, "bcd");
t.drain(..);
assert_eq!(t, "");
}
#[bench]
fn bench_with_capacity(b: &mut Bencher) {
b.iter(|| {

View file

@ -543,7 +543,7 @@ impl CodeMap {
}
}
pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
let mut files = self.files.borrow_mut();
let start_pos = match files.last() {
None => 0,
@ -551,13 +551,9 @@ impl CodeMap {
};
// Remove utf-8 BOM if any.
// FIXME #12884: no efficient/safe way to remove from the start of a string
// and reuse the allocation.
let mut src = if src.starts_with("\u{feff}") {
String::from(&src[3..])
} else {
String::from(&src[..])
};
if src.starts_with("\u{feff}") {
src.drain(..3);
}
// Append '\n' in case it's not already there.
// This is a workaround to prevent CodeMap.lookup_filemap_idx from

View file

@ -27,6 +27,7 @@
#![feature(associated_consts)]
#![feature(collections)]
#![feature(collections_drain)]
#![feature(core)]
#![feature(libc)]
#![feature(rustc_private)]

View file

@ -21,6 +21,7 @@ use collections::{BitSet, BitVec};
use collections::{BTreeMap, BTreeSet};
use collections::EnumSet;
use collections::LinkedList;
use collections::String;
use collections::Vec;
use collections::VecDeque;
use collections::VecMap;
@ -99,4 +100,5 @@ fn main() {
all_sync_send!(Vec::<usize>::new(), into_iter);
is_sync_send!(Vec::<usize>::new(), drain(..));
is_sync_send!(String::new(), drain(..));
}