Changed type of str::from_bytes and added str::from_byte

This commit is contained in:
Kevin Cantu 2012-01-24 23:44:19 -08:00
parent 4d096a8c86
commit 2496dccae4

View file

@ -13,6 +13,7 @@ export
// Creating a string // Creating a string
from_bytes, from_bytes,
unsafe_from_bytes, unsafe_from_bytes,
from_byte,
unsafe_from_byte, unsafe_from_byte,
//push_utf8_bytes, //push_utf8_bytes,
from_char, from_char,
@ -117,14 +118,11 @@ Section: Creating a string
/* /*
Function: from_bytes Function: from_bytes
Safely convert a vector of bytes to a UTF-8 string, or error Convert a vector of bytes to a UTF-8 string. Fails if invalid UTF-8.
*/ */
fn from_bytes(vv: [u8]) -> result::t<str, str> { fn from_bytes(vv: [u8]) -> str {
if is_utf8(vv) { assert is_utf8(vv);
ret result::ok(unsafe_from_bytes(vv)); ret unsafe_from_bytes(vv);
} else {
ret result::err("vector doesn't contain valid UTF-8");
}
} }
/* /*
@ -133,7 +131,7 @@ Function: unsafe_from_bytes
Converts a vector of bytes to a string. Does not verify that the Converts a vector of bytes to a string. Does not verify that the
vector contains valid UTF-8. vector contains valid UTF-8.
// FIXME: remove? FIXME: don't export?
*/ */
fn unsafe_from_bytes(v: [const u8]) -> str unsafe { fn unsafe_from_bytes(v: [const u8]) -> str unsafe {
let vcopy: [u8] = v + [0u8]; let vcopy: [u8] = v + [0u8];
@ -152,6 +150,16 @@ FIXME: rename to 'from_byte'
*/ */
fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) } fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) }
/*
Function: from_byte
Convert a byte to a UTF-8 string. Fails if invalid UTF-8.
*/
fn from_byte(uu: u8) -> str {
from_bytes([uu])
}
fn push_utf8_bytes(&s: str, ch: char) { fn push_utf8_bytes(&s: str, ch: char) {
let code = ch as uint; let code = ch as uint;
let bytes = let bytes =
@ -526,7 +534,7 @@ fn split(s: str, sep: u8) -> [str] {
v += [accum]; v += [accum];
accum = ""; accum = "";
ends_with_sep = true; ends_with_sep = true;
} else { accum += unsafe_from_byte(c); ends_with_sep = false; } } else { accum += from_byte(c); ends_with_sep = false; }
} }
if byte_len(accum) != 0u || ends_with_sep { v += [accum]; } if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
ret v; ret v;
@ -554,7 +562,7 @@ fn splitn(s: str, sep: u8, count: uint) -> [str] {
v += [accum]; v += [accum];
accum = ""; accum = "";
ends_with_sep = true; ends_with_sep = true;
} else { accum += unsafe_from_byte(c); ends_with_sep = false; } } else { accum += from_byte(c); ends_with_sep = false; }
} }
if byte_len(accum) != 0u || ends_with_sep { v += [accum]; } if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
ret v; ret v;
@ -575,12 +583,12 @@ FIXME: should behave like split and split_char:
*/ */
fn split_str(s: str, sep: str) -> [str] { fn split_str(s: str, sep: str) -> [str] {
assert byte_len(sep) > 0u; assert byte_len(sep) > 0u;
let v: [str] = [], accum = "", sep_match = 0u, leading = true; let v: [str] = [], accum = [], sep_match = 0u, leading = true;
for c: u8 in s { for c: u8 in s {
// Did we match the entire separator? // Did we match the entire separator?
if sep_match == byte_len(sep) { if sep_match == byte_len(sep) {
if !leading { v += [accum]; } if !leading { vec::push(v, from_bytes(accum)); }
accum = ""; accum = [];
sep_match = 0u; sep_match = 0u;
} }
@ -588,13 +596,13 @@ fn split_str(s: str, sep: str) -> [str] {
sep_match += 1u; sep_match += 1u;
} else { } else {
sep_match = 0u; sep_match = 0u;
accum += unsafe_from_byte(c); vec::push(accum, c);
leading = false; leading = false;
} }
} }
if byte_len(accum) > 0u { v += [accum]; } if vec::len(accum) > 0u { vec::push(v, from_bytes(accum)); }
if sep_match == byte_len(sep) { v += [""]; } if sep_match == byte_len(sep) { vec::push(v, ""); }
ret v; ret v;
} }
@ -1783,7 +1791,24 @@ mod tests {
0x20_u8, 0x4e_u8, 0x61_u8, 0x20_u8, 0x4e_u8, 0x61_u8,
0x6d_u8]; 0x6d_u8];
assert ss == result::get(from_bytes(bb)); assert ss == from_bytes(bb);
}
#[test]
#[should_fail]
fn test_from_bytes_fail() {
let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
0xe0_u8, 0xb9_u8, 0x84_u8,
0xe0_u8, 0xb8_u8, 0x97_u8,
0xe0_u8, 0xb8_u8, 0xa2_u8,
0xe4_u8, 0xb8_u8, 0xad_u8,
0xe5_u8, 0x8d_u8, 0x8e_u8,
0x56_u8, 0x69_u8, 0xe1_u8,
0xbb_u8, 0x87_u8, 0x74_u8,
0x20_u8, 0x4e_u8, 0x61_u8,
0x6d_u8];
let _x = from_bytes(bb);
} }
#[test] #[test]
@ -1821,7 +1846,7 @@ mod tests {
let s1: str = "All mimsy were the borogoves"; let s1: str = "All mimsy were the borogoves";
let v: [u8] = bytes(s1); let v: [u8] = bytes(s1);
let s2: str = unsafe_from_bytes(v); let s2: str = from_bytes(v);
let i: uint = 0u; let i: uint = 0u;
let n1: uint = byte_len(s1); let n1: uint = byte_len(s1);
let n2: uint = vec::len::<u8>(v); let n2: uint = vec::len::<u8>(v);