Changed type of str::from_bytes and added str::from_byte

2012-01-24 23:44:19 -08:00 · 2012-01-24 23:44:19 -08:00 · 2496dccae4
commit 2496dccae4
parent 4d096a8c86
1 changed files with 43 additions and 18 deletions
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@ -13,6 +13,7 @@ export
   // Creating a string
   from_bytes,
   unsafe_from_bytes,
+   from_byte,
   unsafe_from_byte,
   //push_utf8_bytes,
   from_char,
@ -117,14 +118,11 @@ Section: Creating a string
 /*
 Function: from_bytes

-Safely convert a vector of bytes to a UTF-8 string, or error
+Convert a vector of bytes to a UTF-8 string.  Fails if invalid UTF-8.
 */
-fn from_bytes(vv: [u8]) -> result::t<str, str> {
-   if is_utf8(vv) {
-      ret result::ok(unsafe_from_bytes(vv));
-   } else {
-      ret result::err("vector doesn't contain valid UTF-8");
-   }
+fn from_bytes(vv: [u8]) -> str {
+   assert is_utf8(vv);
+   ret unsafe_from_bytes(vv);
 }

 /*
@ -133,7 +131,7 @@ Function: unsafe_from_bytes
 Converts a vector of bytes to a string. Does not verify that the
 vector contains valid UTF-8.

-// FIXME: remove?
+FIXME: don't export?
 */
 fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
    let vcopy: [u8] = v + [0u8];
@ -152,6 +150,16 @@ FIXME: rename to 'from_byte'
 */
 fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) }

+
+/*
+Function: from_byte
+
+Convert a byte to a UTF-8 string.  Fails if invalid UTF-8.
+*/
+fn from_byte(uu: u8) -> str {
+    from_bytes([uu])
+}
+
 fn push_utf8_bytes(&s: str, ch: char) {
    let code = ch as uint;
    let bytes =
@ -526,7 +534,7 @@ fn split(s: str, sep: u8) -> [str] {
            v += [accum];
            accum = "";
            ends_with_sep = true;
-        } else { accum += unsafe_from_byte(c); ends_with_sep = false; }
+        } else { accum += from_byte(c); ends_with_sep = false; }
    }
    if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
    ret v;
@ -554,7 +562,7 @@ fn splitn(s: str, sep: u8, count: uint) -> [str] {
            v += [accum];
            accum = "";
            ends_with_sep = true;
-        } else { accum += unsafe_from_byte(c); ends_with_sep = false; }
+        } else { accum += from_byte(c); ends_with_sep = false; }
    }
    if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
    ret v;
@ -575,12 +583,12 @@ FIXME: should behave like split and split_char:
 */
 fn split_str(s: str, sep: str) -> [str] {
    assert byte_len(sep) > 0u;
-    let v: [str] = [], accum = "", sep_match = 0u, leading = true;
+    let v: [str] = [], accum = [], sep_match = 0u, leading = true;
    for c: u8 in s {
        // Did we match the entire separator?
        if sep_match == byte_len(sep) {
-            if !leading { v += [accum]; }
-            accum = "";
+            if !leading { vec::push(v, from_bytes(accum)); }
+            accum = [];
            sep_match = 0u;
        }

@ -588,13 +596,13 @@ fn split_str(s: str, sep: str) -> [str] {
            sep_match += 1u;
        } else {
            sep_match = 0u;
-            accum += unsafe_from_byte(c);
+            vec::push(accum, c);
            leading = false;
        }
    }

-    if byte_len(accum) > 0u { v += [accum]; }
-    if sep_match == byte_len(sep) { v += [""]; }
+    if vec::len(accum) > 0u { vec::push(v, from_bytes(accum)); }
+    if sep_match == byte_len(sep) { vec::push(v, ""); }

    ret v;
 }
@ -1783,7 +1791,24 @@ mod tests {
                  0x20_u8, 0x4e_u8, 0x61_u8,
                  0x6d_u8];

-         assert ss == result::get(from_bytes(bb));
+         assert ss == from_bytes(bb);
+    }
+
+    #[test]
+    #[should_fail]
+    fn test_from_bytes_fail() {
+        let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
+                  0xe0_u8, 0xb9_u8, 0x84_u8,
+                  0xe0_u8, 0xb8_u8, 0x97_u8,
+                  0xe0_u8, 0xb8_u8, 0xa2_u8,
+                  0xe4_u8, 0xb8_u8, 0xad_u8,
+                  0xe5_u8, 0x8d_u8, 0x8e_u8,
+                  0x56_u8, 0x69_u8, 0xe1_u8,
+                  0xbb_u8, 0x87_u8, 0x74_u8,
+                  0x20_u8, 0x4e_u8, 0x61_u8,
+                  0x6d_u8];
+
+         let _x = from_bytes(bb);
    }

    #[test]
@ -1821,7 +1846,7 @@ mod tests {
        let s1: str = "All mimsy were the borogoves";

        let v: [u8] = bytes(s1);
-        let s2: str = unsafe_from_bytes(v);
+        let s2: str = from_bytes(v);
        let i: uint = 0u;
        let n1: uint = byte_len(s1);
        let n2: uint = vec::len::<u8>(v);