diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 72560e17250..be3f93992d9 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -168,17 +168,27 @@ impl ToOwned for str { } } -/// Any string that can be represented as a slice. +/// Methods for string slices. #[lang = "str"] #[cfg(not(test))] impl str { - /// Returns the length of `self` in bytes. + /// Returns the length of `self`. + /// + /// This length is in bytes, not [`char`]s or graphemes. In other words, + /// it may not be what a human considers the length of the string. + /// + /// [`char`]: primitive.char.html /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// assert_eq!("foo".len(), 3); - /// assert_eq!("ƒoo".len(), 4); // fancy f! + /// let len = "foo".len(); + /// assert_eq!(3, len); + /// + /// let len = "ƒoo".len(); // fancy f! + /// assert_eq!(4, len); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -190,8 +200,14 @@ impl str { /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// assert!("".is_empty()); + /// let s = ""; + /// assert!(s.is_empty()); + /// + /// let s = "not empty"; + /// assert!(!s.is_empty()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -236,12 +252,15 @@ impl str { core_str::StrExt::is_char_boundary(self, index) } - /// Converts `self` to a byte slice. + /// Converts a string slice to a byte slice. /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// assert_eq!("bors".as_bytes(), b"bors"); + /// let bytes = "bors".as_bytes(); + /// assert_eq!(b"bors", bytes); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline(always)] @@ -249,17 +268,19 @@ impl str { core_str::StrExt::as_bytes(self) } - /// Returns a raw pointer to the `&str`'s buffer. + /// Converts a string slice to a raw pointer. /// - /// The caller must ensure that the string outlives this pointer, and - /// that it is not - /// reallocated (e.g. by pushing to the string). + /// As string slices are a slice of bytes, the raw pointer points to a + /// `u8`. This pointer will be pointing to the first byte of the string + /// slice. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = "Hello"; - /// let p = s.as_ptr(); + /// let ptr = s.as_ptr(); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -267,22 +288,41 @@ impl str { core_str::StrExt::as_ptr(self) } - /// Takes a bytewise slice from a string. + /// Creates a string slice from another string slice, bypassing safety + /// checks. /// - /// Returns the substring from [`begin`..`end`). + /// This new slice goes from `begin` to `end`, including `begin` but + /// excluding `end`. + /// + /// To get a mutable string slice instead, see the + /// [`slice_mut_unchecked()`] method. + /// + /// [`slice_mut_unchecked()`]: #method.slice_mut_unchecked /// /// # Safety /// - /// Caller must check both UTF-8 sequence boundaries and the boundaries - /// of the entire slice as well. + /// Callers of this function are responsible that three preconditions are + /// satisifed: + /// + /// * `begin` must come before `end`. + /// * `begin` and `end` must be bye positions within the string slice. + /// * `begin` and `end` must lie on UTF-8 sequence boundaries. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = "Löwe 老虎 Léopard"; /// /// unsafe { - /// assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard"); + /// assert_eq!("Löwe 老虎 Léopard", s.slice_unchecked(0, 21)); + /// } + /// + /// let s = "Hello, world!"; + /// + /// unsafe { + /// assert_eq!("world", s.slice_unchecked(7, 12)); /// } /// ``` #[stable(feature = "rust1", since = "1.0.0")] @@ -291,18 +331,32 @@ impl str { core_str::StrExt::slice_unchecked(self, begin, end) } - /// Takes a bytewise mutable slice from a string. + /// Creates a string slice from another string slice, bypassing safety + /// checks. /// - /// Same as `slice_unchecked`, but works with `&mut str` instead of `&str`. + /// This new slice goes from `begin` to `end`, including `begin` but + /// excluding `end`. + /// + /// To get an immutable string slice instead, see the + /// [`slice_unchecked()`] method. + /// + /// [`slice_unchecked()`]: #method.slice_unchecked + /// + /// # Safety + /// + /// Callers of this function are responsible that three preconditions are + /// satisifed: + /// + /// * `begin` must come before `end`. + /// * `begin` and `end` must be bye positions within the string slice. + /// * `begin` and `end` must lie on UTF-8 sequence boundaries. #[stable(feature = "str_slice_mut", since = "1.5.0")] #[inline] pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str { core_str::StrExt::slice_mut_unchecked(self, begin, end) } - /// Given a byte position, return the next code point and its index. - /// - /// This can be used to iterate over the Unicode code points of a string. + /// Given a byte position, returns the next `char` and its index. /// /// # Panics /// @@ -355,9 +409,7 @@ impl str { core_str::StrExt::char_range_at(self, start) } - /// Given a byte position, return the previous `char` and its position. - /// - /// This function can be used to iterate over a Unicode code points in reverse. + /// Given a byte position, returns the previous `char` and its position. /// /// Note that Unicode has many features, such as combining marks, ligatures, /// and direction marks, that need to be taken into account to correctly reverse a string. @@ -415,7 +467,7 @@ impl str { core_str::StrExt::char_range_at_reverse(self, start) } - /// Given a byte position, return the `char` at that position. + /// Given a byte position, returns the `char` at that position. /// /// # Panics /// @@ -444,7 +496,7 @@ impl str { core_str::StrExt::char_at(self, i) } - /// Given a byte position, return the `char` at that position, counting + /// Given a byte position, returns the `char` at that position, counting /// from the end. /// /// # Panics @@ -472,7 +524,7 @@ impl str { core_str::StrExt::char_at_reverse(self, i) } - /// Retrieves the first code point from a `&str` and returns it. + /// Retrieves the first `char` from a `&str` and returns it. /// /// Note that a single Unicode character (grapheme cluster) /// can be composed of multiple `char`s. @@ -510,24 +562,33 @@ impl str { /// Divide one string slice into two at an index. /// - /// The index `mid` is a byte offset from the start of the string - /// that must be on a `char` boundary. + /// The argument, `mid`, should be a byte offset from the start of the + /// string. It must also be on the boundary of a UTF-8 code point. /// - /// Return slices `&self[..mid]` and `&self[mid..]`. + /// The two slices returned go from the start of the string slice to `mid`, + /// and from `mid` to the end of the string slice. + /// + /// To get mutable string slices instead, see the [`split_at_mut()`] + /// method. + /// + /// [`split_at_mut()`]: #method.split_at_mut /// /// # Panics /// - /// Panics if `mid` is beyond the last code point of the string, - /// or if it is not on a `char` boundary. + /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is + /// beyond the last code point of the string slice. /// /// # Examples - /// ``` - /// let s = "Löwe 老虎 Léopard"; - /// let first_space = s.find(' ').unwrap_or(s.len()); - /// let (a, b) = s.split_at(first_space); /// - /// assert_eq!(a, "Löwe"); - /// assert_eq!(b, " 老虎 Léopard"); + /// Basic usage: + /// + /// ``` + /// let s = "Per Martin-Löf"; + /// + /// let (first, last) = s.split_at(3); + /// + /// assert_eq!("Per", first); + /// assert_eq!(" Martin-Löf", last); /// ``` #[inline] #[stable(feature = "str_split_at", since = "1.4.0")] @@ -536,48 +597,135 @@ impl str { } /// Divide one mutable string slice into two at an index. + /// + /// The argument, `mid`, should be a byte offset from the start of the + /// string. It must also be on the boundary of a UTF-8 code point. + /// + /// The two slices returned go from the start of the string slice to `mid`, + /// and from `mid` to the end of the string slice. + /// + /// To get immutable string slices instead, see the [`split_at()`] method. + /// + /// [`split_at()`]: #method.split_at + /// + /// # Panics + /// + /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is + /// beyond the last code point of the string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "Per Martin-Löf"; + /// + /// let (first, last) = s.split_at(3); + /// + /// assert_eq!("Per", first); + /// assert_eq!(" Martin-Löf", last); + /// ``` #[inline] #[stable(feature = "str_split_at", since = "1.4.0")] pub fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) { core_str::StrExt::split_at_mut(self, mid) } - /// An iterator over the code points of `self`. + /// Returns an iterator over the `char`s of a string slice. /// - /// In Unicode relationship between code points and characters is complex. - /// A single character may be composed of multiple code points - /// (e.g. diacritical marks added to a letter), and a single code point - /// (e.g. Hangul syllable) may contain multiple characters. + /// As a string slice consists of valid UTF-8, we can iterate through a + /// string slice by [`char`]. This method returns such an iterator. /// - /// For iteration over human-readable characters a grapheme cluster iterator - /// may be more appropriate. See the [unicode-segmentation crate][1]. + /// It's important to remember that [`char`] represents a Unicode Scalar + /// Value, and may not match your idea of what a 'character' is. Iteration + /// over grapheme clusters may be what you actually want. /// - /// [1]: https://crates.io/crates/unicode-segmentation + /// [`char`]: ../primitive.char.html /// /// # Examples /// - /// ``` - /// let v: Vec = "ASCII żółć 🇨🇭 한".chars().collect(); + /// Basic usage: /// - /// assert_eq!(v, ['A', 'S', 'C', 'I', 'I', ' ', - /// 'z', '\u{307}', 'o', '\u{301}', 'ł', 'c', '\u{301}', ' ', - /// '\u{1f1e8}', '\u{1f1ed}', ' ', '한']); + /// ``` + /// let word = "goodbye"; + /// + /// let count = word.chars().count(); + /// assert_eq!(7, count); + /// + /// let mut chars = word.chars(); + /// + /// assert_eq!(Some('g'), chars.next()); + /// assert_eq!(Some('o'), chars.next()); + /// assert_eq!(Some('o'), chars.next()); + /// assert_eq!(Some('d'), chars.next()); + /// assert_eq!(Some('b'), chars.next()); + /// assert_eq!(Some('y'), chars.next()); + /// assert_eq!(Some('e'), chars.next()); + /// + /// assert_eq!(None, chars.next()); + /// ``` + /// + /// Remember, `char`s may not match your human intuition about characters: + /// + /// ``` + /// let y = "y̆"; + /// + /// let mut chars = y.chars(); + /// + /// assert_eq!(Some('y'), chars.next()); // not 'y̆' + /// assert_eq!(Some('\u{0306}'), chars.next()); + /// + /// assert_eq!(None, chars.next()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn chars(&self) -> Chars { core_str::StrExt::chars(self) } - - /// An iterator over the `char`s of `self` and their byte offsets. + /// Returns an iterator over the `char`s of a string slice, and their + /// positions. + /// + /// As a string slice consists of valid UTF-8, we can iterate through a + /// string slice by `char`. This method returns an iterator of both + /// these `char`s, as well as their byte positions. + /// + /// The iterator yields tuples. The position is first, the `char` is + /// second. /// /// # Examples /// - /// ``` - /// let v: Vec<(usize, char)> = "A🇨🇭".char_indices().collect(); - /// let b = vec![(0, 'A'), (1, '\u{1f1e8}'), (5, '\u{1f1ed}')]; + /// Basic usage: /// - /// assert_eq!(v, b); + /// ``` + /// let word = "goodbye"; + /// + /// let count = word.char_indices().count(); + /// assert_eq!(7, count); + /// + /// let mut char_indices = word.char_indices(); + /// + /// assert_eq!(Some((0, 'g')), char_indices.next()); + /// assert_eq!(Some((1, 'o')), char_indices.next()); + /// assert_eq!(Some((2, 'o')), char_indices.next()); + /// assert_eq!(Some((3, 'd')), char_indices.next()); + /// assert_eq!(Some((4, 'b')), char_indices.next()); + /// assert_eq!(Some((5, 'y')), char_indices.next()); + /// assert_eq!(Some((6, 'e')), char_indices.next()); + /// + /// assert_eq!(None, char_indices.next()); + /// ``` + /// + /// Remember, `char`s may not match your human intuition about characters: + /// + /// ``` + /// let y = "y̆"; + /// + /// let mut char_indices = y.char_indices(); + /// + /// assert_eq!(Some((0, 'y')), char_indices.next()); // not (0, 'y̆') + /// assert_eq!(Some((1, '\u{0306}')), char_indices.next()); + /// + /// assert_eq!(None, char_indices.next()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -585,14 +733,24 @@ impl str { core_str::StrExt::char_indices(self) } - /// An iterator over the bytes of `self`. + /// An iterator over the bytes of a string slice. + /// + /// As a string slice consists of a sequence of bytes, we can iterate + /// through a string slice by byte. This method returns such an iterator. /// /// # Examples /// - /// ``` - /// let v: Vec = "bors".bytes().collect(); + /// Basic usage: /// - /// assert_eq!(v, b"bors".to_vec()); + /// ``` + /// let mut bytes = "bors".bytes(); + /// + /// assert_eq!(Some(b'b'), bytes.next()); + /// assert_eq!(Some(b'o'), bytes.next()); + /// assert_eq!(Some(b'r'), bytes.next()); + /// assert_eq!(Some(b's'), bytes.next()); + /// + /// assert_eq!(None, bytes.next()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -600,16 +758,39 @@ impl str { core_str::StrExt::bytes(self) } - /// An iterator over the non-empty substrings of `self` which contain no whitespace, - /// and which are separated by any amount of whitespace. + /// Split a string slice by whitespace. + /// + /// The iterator returned will return string slices that are sub-slices of + /// the original string slice, separated by any amount of whitespace. + /// + /// 'Whitespace' is defined according to the terms of the Unicode Derived + /// Core Property `White_Space`. /// /// # Examples /// - /// ``` - /// let some_words = " Mary had\ta\u{2009}little \n\t lamb"; - /// let v: Vec<&str> = some_words.split_whitespace().collect(); + /// Basic usage: /// - /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]); + /// ``` + /// let mut iter = "A few words".split_whitespace(); + /// + /// assert_eq!(Some("A"), iter.next()); + /// assert_eq!(Some("few"), iter.next()); + /// assert_eq!(Some("words"), iter.next()); + /// + /// assert_eq!(None, iter.next()); + /// ``` + /// + /// All kinds of whitespace are considered: + /// + /// ``` + /// let mut iter = " Mary had\ta\u{2009}little \n\t lamb".split_whitespace(); + /// assert_eq!(Some("Mary"), iter.next()); + /// assert_eq!(Some("had"), iter.next()); + /// assert_eq!(Some("a"), iter.next()); + /// assert_eq!(Some("little"), iter.next()); + /// assert_eq!(Some("lamb"), iter.next()); + /// + /// assert_eq!(None, iter.next()); /// ``` #[stable(feature = "split_whitespace", since = "1.1.0")] #[inline] @@ -617,26 +798,41 @@ impl str { UnicodeStr::split_whitespace(self) } - /// An iterator over the lines of a string, separated by `\n` or `\r\n`. + /// An iterator over the lines of a string, as string slices. /// - /// This does not include the empty string after a trailing newline or CRLF. + /// Lines are ended with either a newline (`\n`) or a carriage return with + /// a line feed (`\r\n`). + /// + /// The final line ending is optional. /// /// # Examples /// - /// ``` - /// let four_lines = "foo\nbar\n\r\nbaz"; - /// let v: Vec<&str> = four_lines.lines().collect(); - /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); - /// ``` - /// - /// Leaving off the trailing character: + /// Basic usage: /// /// ``` - /// let four_lines = "foo\r\nbar\n\nbaz\n"; - /// let v: Vec<&str> = four_lines.lines().collect(); + /// let text = "foo\r\nbar\n\nbaz\n"; + /// let mut lines = text.lines(); /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); + /// assert_eq!(Some("foo"), lines.next()); + /// assert_eq!(Some("bar"), lines.next()); + /// assert_eq!(Some(""), lines.next()); + /// assert_eq!(Some("baz"), lines.next()); + /// + /// assert_eq!(None, lines.next()); + /// ``` + /// + /// The final line ending isn't required: + /// + /// ``` + /// let text = "foo\nbar\n\r\nbaz"; + /// let mut lines = text.lines(); + /// + /// assert_eq!(Some("foo"), lines.next()); + /// assert_eq!(Some("bar"), lines.next()); + /// assert_eq!(Some(""), lines.next()); + /// assert_eq!(Some("baz"), lines.next()); + /// + /// assert_eq!(None, lines.next()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -644,30 +840,7 @@ impl str { core_str::StrExt::lines(self) } - /// An iterator over the lines of a string, separated by either - /// `\n` or `\r\n`. - /// - /// As with `.lines()`, this does not include an empty trailing line. - /// - /// # Examples - /// - /// ``` - /// # #![allow(deprecated)] - /// let four_lines = "foo\r\nbar\n\r\nbaz"; - /// let v: Vec<&str> = four_lines.lines_any().collect(); - /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); - /// ``` - /// - /// Leaving off the trailing character: - /// - /// ``` - /// # #![allow(deprecated)] - /// let four_lines = "foo\r\nbar\n\r\nbaz\n"; - /// let v: Vec<&str> = four_lines.lines_any().collect(); - /// - /// assert_eq!(v, ["foo", "bar", "", "baz"]); - /// ``` + /// An iterator over the lines of a string. #[stable(feature = "rust1", since = "1.0.0")] #[rustc_deprecated(since = "1.4.0", reason = "use lines() instead now")] #[inline] @@ -684,38 +857,57 @@ impl str { Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) } } - /// Returns `true` if `self` contains another `&str`. + /// Returns `true` if the given `&str` is a sub-slice of this string slice. + /// + /// Returns `false` if it's not. /// /// # Examples /// - /// ``` - /// assert!("bananas".contains("nana")); + /// Basic usage: /// - /// assert!(!"bananas".contains("foobar")); + /// ``` + /// let bananas = "bananas"; + /// + /// assert!(bananas.contains("nana")); + /// assert!(!bananas.contains("apples")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { core_str::StrExt::contains(self, pat) } - /// Returns `true` if the given `&str` is a prefix of the string. + /// Returns `true` if the given `&str` is a prefix of this string slice. + /// + /// Returns `false` if it's not. /// /// # Examples /// + /// Basic usage: + /// /// ``` - /// assert!("banana".starts_with("ba")); + /// let bananas = "bananas"; + /// + /// assert!(bananas.starts_with("bana")); + /// assert!(!bananas.starts_with("nana")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool { core_str::StrExt::starts_with(self, pat) } - /// Returns true if the given `&str` is a suffix of the string. + /// Returns `true` if the given `&str` is a suffix of this string slice. + /// + /// Returns `false` if not. /// /// # Examples /// + /// Basic usage: + /// /// ```rust - /// assert!("banana".ends_with("nana")); + /// let bananas = "bananas"; + /// + /// assert!(bananas.ends_with("anas")); + /// assert!(!bananas.ends_with("nana")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool @@ -724,14 +916,15 @@ impl str { core_str::StrExt::ends_with(self, pat) } - /// Returns the byte index of the first character of `self` that matches - /// the pattern, if it - /// exists. + /// Returns the byte index of the first character of this string slice that + /// matches the pattern. /// - /// Returns `None` if it doesn't exist. + /// Returns `None` if the pattern doesn't match. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines if a character matches. + /// The pattern can be a `&str`, [`char`], or a closure that determines if + /// a character matches. + /// + /// [`char`]: primitive.char.html /// /// # Examples /// @@ -743,7 +936,6 @@ impl str { /// assert_eq!(s.find('L'), Some(0)); /// assert_eq!(s.find('é'), Some(14)); /// assert_eq!(s.find("Léopard"), Some(13)); - /// /// ``` /// /// More complex patterns with closures: @@ -768,14 +960,15 @@ impl str { core_str::StrExt::find(self, pat) } - /// Returns the byte index of the last character of `self` that - /// matches the pattern, if it - /// exists. + /// Returns the byte index of the last character of this string slice that + /// matches the pattern. /// - /// Returns `None` if it doesn't exist. + /// Returns `None` if the pattern doesn't match. /// - /// The pattern can be a simple `&str`, `char`, - /// or a closure that determines if a character matches. + /// The pattern can be a `&str`, [`char`], or a closure that determines if + /// a character matches. + /// + /// [`char`]: primitive.char.html /// /// # Examples /// @@ -812,22 +1005,25 @@ impl str { core_str::StrExt::rfind(self, pat) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern. + /// An iterator over substrings of this string slice, separated by + /// characters matched by a pattern. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. Additional libraries might provide more complex - /// patterns like regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines the + /// split. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search and forward/reverse search yields the same elements. - /// This is true for, eg, `char` but not - /// for `&str`. + /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern + /// allows a reverse search and forward/reverse search yields the same + /// elements. This is true for, eg, [`char`] but not for `&str`. + /// + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html /// /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rsplit()` can be used. + /// from a forward search, the [`rsplit()`] method can be used. + /// + /// [`char`]: primitive.char.html + /// [`rsplit()`]: #method.rsplit /// /// # Examples /// @@ -886,36 +1082,39 @@ impl str { /// assert_eq!(d, &["a", "b", "c"]); /// ``` /// - /// Use [`.split_whitespace()`][split_whitespace] for this behavior. + /// Use [`split_whitespace()`] for this behavior. /// - /// [split_whitespace]: #method.split_whitespace + /// [`split_whitespace()`]: #method.split_whitespace #[stable(feature = "rust1", since = "1.0.0")] pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> { core_str::StrExt::split(self, pat) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern and yielded in reverse order. + /// An iterator over substrings of the given string slice, separated by + /// characters matched by a pattern and yielded in reverse order. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines the + /// split. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a - /// reverse search, - /// and it will be double ended if a forward/reverse search yields - /// the same elements. + /// The returned iterator requires that the pattern supports a reverse + /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse + /// search yields the same elements. /// - /// For iterating from the front, `split()` can be used. + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html + /// + /// For iterating from the front, the [`split()`] method can be used. + /// + /// [`split()`]: #method.split /// /// # Examples /// /// Simple patterns: /// - /// ```rust + /// ``` /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect(); /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]); /// @@ -942,32 +1141,38 @@ impl str { core_str::StrExt::rsplit(self, pat) } - /// An iterator over substrings of `self`, separated by characters - /// matched by a pattern. + /// An iterator over substrings of the given string slice, separated by + /// characters matched by a pattern. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns - /// like regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines the + /// split. /// - /// Equivalent to `split`, except that the trailing substring + /// Equivalent to [`split()`], except that the trailing substring /// is skipped if empty. /// + /// [`split()`]: #method.split + /// /// This method can be used for string data that is _terminated_, /// rather than _separated_ by a pattern. /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search - /// and forward/reverse search yields the same elements. This is true - /// for, eg, `char` but not for `&str`. + /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern + /// allows a reverse search and forward/reverse search yields the same + /// elements. This is true for, eg, [`char`] but not for `&str`. + /// + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html + /// [`char`]: primitive.char.html /// /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rsplit_terminator()` can be used. + /// from a forward search, the [`rsplit_terminator()`] method can be used. + /// + /// [`rsplit_terminator()`]: #method.rsplit_terminator /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let v: Vec<&str> = "A.B.".split_terminator('.').collect(); /// assert_eq!(v, ["A", "B"]); @@ -1000,7 +1205,10 @@ impl str { /// reverse search, and it will be double ended if a forward/reverse /// search yields the same elements. /// - /// For iterating from the front, `split_terminator()` can be used. + /// For iterating from the front, the [`split_terminator()`] method can be + /// used. + /// + /// [`split_terminator()`]: #method.split_terminator /// /// # Examples /// @@ -1018,23 +1226,26 @@ impl str { core_str::StrExt::rsplit_terminator(self, pat) } - /// An iterator over substrings of `self`, separated by a pattern, - /// restricted to returning - /// at most `count` items. + /// An iterator over substrings of the given string slice, separated by a + /// pattern, restricted to returning at most `count` items. /// /// The last element returned, if any, will contain the remainder of the - /// string. - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// string slice. + /// + /// The pattern can be a `&str`, [`char`], or a closure that determines the + /// split. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// /// The returned iterator will not be double ended, because it is /// not efficient to support. /// - /// If the pattern allows a reverse search, `rsplitn()` can be used. + /// If the pattern allows a reverse search, the [`rsplitn()`] method can be + /// used. + /// + /// [`rsplitn()`]: #method.rsplitn /// /// # Examples /// @@ -1065,24 +1276,26 @@ impl str { core_str::StrExt::splitn(self, count, pat) } - /// An iterator over substrings of `self`, separated by a pattern, - /// starting from the end of the string, restricted to returning + /// An iterator over substrings of this string slice, separated by a + /// pattern, starting from the end of the string, restricted to returning /// at most `count` items. /// /// The last element returned, if any, will contain the remainder of the - /// string. + /// string slice. /// - /// The pattern can be a simple `&str`, `char`, or a closure that + /// The pattern can be a `&str`, [`char`], or a closure that /// determines the split. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// /// The returned iterator will not be double ended, because it is not /// efficient to support. /// - /// `splitn()` can be used for splitting from the front. + /// For splitting from the front, the [`splitn()`] method can be used. + /// + /// [`splitn()`]: #method.splitn /// /// # Examples /// @@ -1112,26 +1325,32 @@ impl str { core_str::StrExt::rsplitn(self, count, pat) } - /// An iterator over the matches of a pattern within `self`. + /// An iterator over the matches of a pattern within the given string + /// slice. /// - /// The pattern can be a simple `&str`, `char`, or a closure that + /// The pattern can be a `&str`, [`char`], or a closure that /// determines if a character matches. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows - /// a reverse search - /// and forward/reverse search yields the same elements. This is true - /// for, eg, `char` but not - /// for `&str`. + /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern + /// allows a reverse search and forward/reverse search yields the same + /// elements. This is true for, eg, [`char`] but not for `&str`. + /// + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html + /// [`char`]: primitive.char.html /// /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rmatches()` can be used. + /// from a forward search, the [`rmatches()`] method can be used. + /// + /// [`rmatches()`]: #method.rmatches /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect(); /// assert_eq!(v, ["abc", "abc", "abc"]); @@ -1144,25 +1363,30 @@ impl str { core_str::StrExt::matches(self, pat) } - /// An iterator over the matches of a pattern within `self`, yielded in - /// reverse order. + /// An iterator over the matches of a pattern within this string slice, + /// yielded in reverse order. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines if a character matches. - /// Additional libraries might provide more complex patterns like - /// regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines if + /// a character matches. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// - /// The returned iterator requires that the pattern supports a - /// reverse search, - /// and it will be double ended if a forward/reverse search yields - /// the same elements. + /// The returned iterator requires that the pattern supports a reverse + /// search, and it will be a [`DoubleEndedIterator`] if a forward/reverse + /// search yields the same elements. /// - /// For iterating from the front, `matches()` can be used. + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html + /// + /// For iterating from the front, the [`matches()`] method can be used. + /// + /// [`matches`]: #method.matches /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect(); /// assert_eq!(v, ["abc", "abc", "abc"]); @@ -1177,27 +1401,34 @@ impl str { core_str::StrExt::rmatches(self, pat) } - /// An iterator over the disjoint matches of a pattern within `self` as well - /// as the index that the match starts at. + /// An iterator over the disjoint matches of a pattern within this string + /// slice as well as the index that the match starts at. /// /// For matches of `pat` within `self` that overlap, only the indices /// corresponding to the first match are returned. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// if a character matches. Additional libraries might provide more complex - /// patterns like regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines + /// if a character matches. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// - /// The returned iterator will be double ended if the pattern allows a - /// reverse search and forward/reverse search yields the same elements. This - /// is true for, eg, `char` but not for `&str`. + /// The returned iterator will be a [`DoubleEndedIterator`] if the pattern + /// allows a reverse search and forward/reverse search yields the same + /// elements. This is true for, eg, [`char`] but not for `&str`. + /// + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html /// /// If the pattern allows a reverse search but its results might differ - /// from a forward search, `rmatch_indices()` can be used. + /// from a forward search, the [`rmatch_indices()`] method can be used. + /// + /// [`rmatch_indices()`]: #method.rmatch_indices /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let v: Vec<_> = "abcXXXabcYYYabc".match_indices("abc").collect(); /// assert_eq!(v, [(0, "abc"), (6, "abc"), (12, "abc")]); @@ -1219,20 +1450,27 @@ impl str { /// For matches of `pat` within `self` that overlap, only the indices /// corresponding to the last match are returned. /// - /// The pattern can be a simple `&str`, `char`, or a closure that determines - /// if a character matches. Additional libraries might provide more complex - /// patterns like regular expressions. + /// The pattern can be a `&str`, [`char`], or a closure that determines if a + /// character matches. + /// + /// [`char`]: primitive.char.html /// /// # Iterator behavior /// /// The returned iterator requires that the pattern supports a reverse - /// search, and it will be double ended if a forward/reverse search yields - /// the same elements. + /// search, and it will be a `[DoubleEndedIterator]` if a forward/reverse + /// search yields the same elements. /// - /// For iterating from the front, `match_indices()` can be used. + /// [`DoubleEndedIterator`]: iter/trait.DoubleEndedIterator.html + /// + /// For iterating from the front, the [`match_indices()`] method can be used. + /// + /// [`match_indices()`]: #method.match_indices /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let v: Vec<_> = "abcXXXabcYYYabc".rmatch_indices("abc").collect(); /// assert_eq!(v, [(12, "abc"), (6, "abc"), (0, "abc")]); @@ -1250,51 +1488,71 @@ impl str { core_str::StrExt::rmatch_indices(self, pat) } - /// Returns a `&str` with leading and trailing whitespace removed. + /// Returns a string slice with leading and trailing whitespace removed. + /// + /// 'Whitespace' is defined according to the terms of the Unicode Derived + /// Core Property `White_Space`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim(), "Hello\tworld"); + /// + /// assert_eq!("Hello\tworld", s.trim()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim(&self) -> &str { UnicodeStr::trim(self) } - /// Returns a `&str` with leading whitespace removed. + /// Returns a string slice with leading whitespace removed. + /// + /// 'Whitespace' is defined according to the terms of the Unicode Derived + /// Core Property `White_Space`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim_left(), "Hello\tworld\t"); + /// + /// assert_eq!("Hello\tworld\t", s.trim_left()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim_left(&self) -> &str { UnicodeStr::trim_left(self) } - /// Returns a `&str` with trailing whitespace removed. + /// Returns a string slice with trailing whitespace removed. + /// + /// 'Whitespace' is defined according to the terms of the Unicode Derived + /// Core Property `White_Space`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = " Hello\tworld\t"; - /// assert_eq!(s.trim_right(), " Hello\tworld"); + /// + /// assert_eq!(" Hello\tworld", s.trim_right()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn trim_right(&self) -> &str { UnicodeStr::trim_right(self) } - /// Returns a string with all pre- and suffixes that match a pattern - /// repeatedly removed. + /// Returns a string slice with all prefixes and suffixes that match a + /// pattern repeatedly removed. /// - /// The pattern can be a simple `char`, or a closure that determines + /// The pattern can be a `&str`, [`char`], or a closure that determines /// if a character matches. /// + /// [`char`]: primtive.char.html + /// /// # Examples /// /// Simple patterns: @@ -1319,14 +1577,18 @@ impl str { core_str::StrExt::trim_matches(self, pat) } - /// Returns a string with all prefixes that match a pattern + /// Returns a string slice with all prefixes that match a pattern /// repeatedly removed. /// - /// The pattern can be a simple `&str`, `char`, or a closure that - /// determines if a character matches. + /// The pattern can be a `&str`, [`char`], or a closure that determines if + /// a character matches. + /// + /// [`char`]: primitive.char.html /// /// # Examples /// + /// Basic usage: + /// /// ``` /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11"); /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123"); @@ -1339,12 +1601,14 @@ impl str { core_str::StrExt::trim_left_matches(self, pat) } - /// Returns a string with all suffixes that match a pattern + /// Returns a string slice with all suffixes that match a pattern /// repeatedly removed. /// - /// The pattern can be a simple `&str`, `char`, or a closure that + /// The pattern can be a `&str`, [`char`], or a closure that /// determines if a character matches. /// + /// [`char`]: primitive.char.html + /// /// # Examples /// /// Simple patterns: @@ -1369,22 +1633,47 @@ impl str { core_str::StrExt::trim_right_matches(self, pat) } - /// Parses `self` into the specified type. + /// Parses this string slice into another type. + /// + /// Because `parse()` is so general, it can cause problems with type + /// inference. As such, `parse()` is one of the few times you'll see + /// the syntax affectionately known as the 'turbofish': `::<>`. This + /// helps the inference algorithm understand specifically which type + /// you're trying to parse into. + /// + /// `parse()` can parse any type that implements the [`FromStr`] trait. + /// + /// [`FromStr`]: trait.FromStr.html /// /// # Failure /// - /// Will return `Err` if it's not possible to parse `self` into the type. + /// Will return `Err` if it's not possible to parse this string slice into + /// the desired type. /// /// # Example /// - /// ``` - /// assert_eq!("4".parse::(), Ok(4)); - /// ``` - /// - /// Failing: + /// Basic usage /// /// ``` - /// assert!("j".parse::().is_err()); + /// let four: u32 = "4".parse().unwrap(); + /// + /// assert_eq!(4, four); + /// ``` + /// + /// Using the 'turbofish' instead of annotationg `four`: + /// + /// ``` + /// let four = "4".parse::(); + /// + /// assert_eq!(Ok(4), four); + /// ``` + /// + /// Failing to parse: + /// + /// ``` + /// let nope = "j".parse::(); + /// + /// assert!(nope.is_err()); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] @@ -1394,23 +1683,27 @@ impl str { /// Replaces all occurrences of one string with another. /// - /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a - /// second `&str` to - /// replace it with. If the original `&str` isn't found, no change occurs. + /// `replace` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find a sub-`&str`. If it finds it, it replaces it with + /// the replacement string slice. + /// + /// [`String`]: string/struct.String.html /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = "this is old"; /// - /// assert_eq!(s.replace("old", "new"), "this is new"); + /// assert_eq!("this is new", s.replace("old", "new")); /// ``` /// /// When a `&str` isn't found: /// /// ``` /// let s = "this is old"; - /// assert_eq!(s.replace("cookie monster", "little lamb"), s); + /// assert_eq!(s, s.replace("cookie monster", "little lamb")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn replace(&self, from: &str, to: &str) -> String { @@ -1425,13 +1718,40 @@ impl str { result } - /// Returns the lowercase equivalent of this string. + /// Returns the lowercase equivalent of this string slice, as a new `String`. + /// + /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property + /// `Lowercase`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = "HELLO"; - /// assert_eq!(s.to_lowercase(), "hello"); + /// + /// assert_eq!("hello", s.to_lowercase()); + /// ``` + /// + /// A tricky example, with sigma: + /// + /// ``` + /// let sigma = "Σ"; + /// + /// assert_eq!("σ", sigma.to_lowercase()); + /// + /// // but at the end of a word, it's ς, not σ: + /// let odysseus = "ὈΔΥΣΣΕΎΣ"; + /// + /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase()); + /// ``` + /// + /// Languages without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_lowercase()); /// ``` #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_lowercase(&self) -> String { @@ -1472,13 +1792,27 @@ impl str { } } - /// Returns the uppercase equivalent of this string. + /// Returns the uppercase equivalent of this string slice, as a new `String`. + /// + /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property + /// `Uppercase`. /// /// # Examples /// + /// Basic usage: + /// /// ``` /// let s = "hello"; - /// assert_eq!(s.to_uppercase(), "HELLO"); + /// + /// assert_eq!("HELLO", s.to_uppercase()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_uppercase()); /// ``` #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_uppercase(&self) -> String { @@ -1503,7 +1837,18 @@ impl str { self.chars().flat_map(|c| c.escape_unicode()).collect() } - /// Converts the `Box` into a `String` without copying or allocating. + /// Converts a `Box` into a `String` without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let string = String::from("birthday gift"); + /// let boxed_str = string.clone().into_boxed_str(); + /// + /// assert_eq!(boxed_str.into_string(), string); + /// ``` #[stable(feature = "box_str", since = "1.4.0")] pub fn into_string(self: Box) -> String { unsafe { diff --git a/src/libstd/primitive_docs.rs b/src/libstd/primitive_docs.rs index aec8b6b1b22..e0d2011b293 100644 --- a/src/libstd/primitive_docs.rs +++ b/src/libstd/primitive_docs.rs @@ -293,44 +293,64 @@ mod prim_slice { } #[doc(primitive = "str")] // -/// Unicode string slices. +/// String slices. /// -/// Rust's `str` type is one of the core primitive types of the language. `&str` -/// is the borrowed string type. This type of string can only be created from -/// other strings, unless it is a `&'static str` (see below). It is not possible -/// to move out of borrowed strings because they are owned elsewhere. +/// The `str` type, also called a 'string slice', is the most primitive string +/// type. It is usually seen in its borrowed form, `&str`. It is also the type +/// of string literals, `&'static str`. +/// +/// Strings slices are always valid UTF-8. +/// +/// This documentation describes a number of methods and trait implementations +/// on the `str` type. For technical reasons, there is additional, separate +/// documentation in [the `std::str` module](str/index.html) as well. /// /// # Examples /// -/// Here's some code that uses a `&str`: +/// String literals are string slices: /// /// ``` -/// let s = "Hello, world."; +/// let hello = "Hello, world!"; +/// +/// // with an explicit type annotation +/// let hello: &'static str = "Hello, world!"; /// ``` /// -/// This `&str` is a `&'static str`, which is the type of string literals. -/// They're `'static` because literals are available for the entire lifetime of -/// the program. -/// -/// You can get a non-`'static` `&str` by taking a slice of a `String`: -/// -/// ``` -/// let some_string = "Hello, world.".to_string(); -/// let s = &some_string; -/// ``` +/// They are `'static` because they're stored directly in the final binary, and +/// so will be valid for the `'static` duration. /// /// # Representation /// -/// Rust's string type, `str`, is a sequence of Unicode scalar values encoded as -/// a stream of UTF-8 bytes. All [strings](../../reference.html#literals) are -/// guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are -/// not null-terminated and can thus contain null bytes. +/// A `&str` is made up of two components: a pointer to some bytes, and a +/// length. You can look at these with the [`.as_ptr()`] and [`len()`] methods: /// -/// The actual representation of `str`s have direct mappings to slices: `&str` -/// is the same as `&[u8]`. +/// ``` +/// use std::slice; +/// use std::str; /// -/// *[See also the `std::str` module](str/index.html).* +/// let story = "Once upon a time..."; /// +/// let ptr = story.as_ptr(); +/// let len = story.len(); +/// +/// // story has thirteen bytes +/// assert_eq!(19, len); +/// +/// // We can re-build a str out of ptr and len. This is all unsafe becuase +/// // we are responsible for making sure the two components are valid: +/// let s = unsafe { +/// // First, we build a &[u8]... +/// let slice = slice::from_raw_parts(ptr, len); +/// +/// // ... and then convert that slice into a string slice +/// str::from_utf8(slice) +/// }; +/// +/// assert_eq!(s, Ok(story)); +/// ``` +/// +/// [`.as_ptr()`]: #method.as_ptr +/// [`len()`]: # method.len mod prim_str { } #[doc(primitive = "tuple")]