(core::str) rename byte_len_range -> substr_len_bytes and
rename char_len_range -> substr_len_chars
This commit is contained in:
parent
2b4f5136a5
commit
2ba44e24d5
3 changed files with 56 additions and 48 deletions
|
@ -135,7 +135,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint)
|
||||||
let {fm,line} = lookup_line(cm,chpos,lookup);
|
let {fm,line} = lookup_line(cm,chpos,lookup);
|
||||||
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
|
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
|
||||||
let col = chpos - fm.lines[line].ch;
|
let col = chpos - fm.lines[line].ch;
|
||||||
let col_offset = str::byte_len_range(*fm.src, line_offset, col);
|
let col_offset = str::substr_len_bytes(*fm.src, line_offset, col);
|
||||||
ret {fm: fm, pos: line_offset + col_offset};
|
ret {fm: fm, pos: line_offset + col_offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,8 +85,8 @@ export
|
||||||
// Misc
|
// Misc
|
||||||
// FIXME: perhaps some more of this section shouldn't be exported?
|
// FIXME: perhaps some more of this section shouldn't be exported?
|
||||||
is_utf8,
|
is_utf8,
|
||||||
char_len_range,
|
substr_len_bytes,
|
||||||
byte_len_range,
|
substr_len_chars,
|
||||||
utf8_char_width,
|
utf8_char_width,
|
||||||
char_range_at,
|
char_range_at,
|
||||||
char_at,
|
char_at,
|
||||||
|
@ -271,7 +271,7 @@ Failure:
|
||||||
If the string does not contain any characters.
|
If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn pop_char(&s: str) -> char unsafe {
|
fn pop_char(&s: str) -> char unsafe {
|
||||||
let end = byte_len(s);
|
let end = len_bytes(s);
|
||||||
let {ch:ch, prev:end} = char_range_at_reverse(s, end);
|
let {ch:ch, prev:end} = char_range_at_reverse(s, end);
|
||||||
s = unsafe::slice_bytes(s, 0u, end);
|
s = unsafe::slice_bytes(s, 0u, end);
|
||||||
ret ch;
|
ret ch;
|
||||||
|
@ -288,7 +288,7 @@ If the string does not contain any characters.
|
||||||
*/
|
*/
|
||||||
fn shift_char(&s: str) -> char unsafe {
|
fn shift_char(&s: str) -> char unsafe {
|
||||||
let r = char_range_at(s, 0u);
|
let r = char_range_at(s, 0u);
|
||||||
s = unsafe::slice_bytes(s, r.next, byte_len(s));
|
s = unsafe::slice_bytes(s, r.next, len_bytes(s));
|
||||||
ret r.ch;
|
ret r.ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,7 +367,7 @@ Convert a string to a vector of characters
|
||||||
fn chars(s: str) -> [char] {
|
fn chars(s: str) -> [char] {
|
||||||
let buf: [char] = [];
|
let buf: [char] = [];
|
||||||
let i = 0u;
|
let i = 0u;
|
||||||
let len = byte_len(s);
|
let len = len_bytes(s);
|
||||||
while i < len {
|
while i < len {
|
||||||
let cur = char_range_at(s, i);
|
let cur = char_range_at(s, i);
|
||||||
buf += [cur.ch];
|
buf += [cur.ch];
|
||||||
|
@ -443,7 +443,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
|
||||||
assert u8::is_ascii(sep);
|
assert u8::is_ascii(sep);
|
||||||
|
|
||||||
let vv = [];
|
let vv = [];
|
||||||
let start = 0u, current = 0u, len = byte_len(ss);
|
let start = 0u, current = 0u, len = len_bytes(ss);
|
||||||
let splits_done = 0u;
|
let splits_done = 0u;
|
||||||
|
|
||||||
while splits_done < count && current < len {
|
while splits_done < count && current < len {
|
||||||
|
@ -550,7 +550,7 @@ up to `count` times
|
||||||
fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe {
|
fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe {
|
||||||
|
|
||||||
let vv = [];
|
let vv = [];
|
||||||
let start = 0u, current = 0u, len = byte_len(ss);
|
let start = 0u, current = 0u, len = len_bytes(ss);
|
||||||
let splits_done = 0u;
|
let splits_done = 0u;
|
||||||
|
|
||||||
while splits_done < count && current < len {
|
while splits_done < count && current < len {
|
||||||
|
@ -656,10 +656,11 @@ Returns:
|
||||||
The original string with all occurances of `from` replaced with `to`
|
The original string with all occurances of `from` replaced with `to`
|
||||||
*/
|
*/
|
||||||
fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
|
fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe {
|
||||||
if byte_len(s) == 0u {
|
if len_bytes(s) == 0u {
|
||||||
ret "";
|
ret "";
|
||||||
} else if starts_with(s, from) {
|
} else if starts_with(s, from) {
|
||||||
ret to + replace(unsafe::slice_bytes(s, byte_len(from), byte_len(s)),
|
ret to + replace(
|
||||||
|
unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)),
|
||||||
from, to);
|
from, to);
|
||||||
} else {
|
} else {
|
||||||
let idx = find(s, from);
|
let idx = find(s, from);
|
||||||
|
@ -726,7 +727,7 @@ Return true if a predicate matches all characters or
|
||||||
if the string contains no characters
|
if the string contains no characters
|
||||||
*/
|
*/
|
||||||
fn all(s: str, it: fn(char) -> bool) -> bool{
|
fn all(s: str, it: fn(char) -> bool) -> bool{
|
||||||
ret substr_all(s, 0u, byte_len(s), it);
|
ret substr_all(s, 0u, len_bytes(s), it);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -746,7 +747,7 @@ Apply a function to each character
|
||||||
*/
|
*/
|
||||||
fn map(ss: str, ff: fn(char) -> char) -> str {
|
fn map(ss: str, ff: fn(char) -> char) -> str {
|
||||||
let result = "";
|
let result = "";
|
||||||
reserve(result, byte_len(ss));
|
reserve(result, len_bytes(ss));
|
||||||
|
|
||||||
chars_iter(ss, {|cc|
|
chars_iter(ss, {|cc|
|
||||||
str::push_char(result, ff(cc));
|
str::push_char(result, ff(cc));
|
||||||
|
@ -762,7 +763,7 @@ Iterate over the bytes in a string
|
||||||
*/
|
*/
|
||||||
fn bytes_iter(ss: str, it: fn(u8)) {
|
fn bytes_iter(ss: str, it: fn(u8)) {
|
||||||
let pos = 0u;
|
let pos = 0u;
|
||||||
let len = byte_len(ss);
|
let len = len_bytes(ss);
|
||||||
|
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
it(ss[pos]);
|
it(ss[pos]);
|
||||||
|
@ -776,7 +777,7 @@ Function: chars_iter
|
||||||
Iterate over the characters in a string
|
Iterate over the characters in a string
|
||||||
*/
|
*/
|
||||||
fn chars_iter(s: str, it: fn(char)) {
|
fn chars_iter(s: str, it: fn(char)) {
|
||||||
let pos = 0u, len = byte_len(s);
|
let pos = 0u, len = len_bytes(s);
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
let {ch, next} = char_range_at(s, pos);
|
let {ch, next} = char_range_at(s, pos);
|
||||||
pos = next;
|
pos = next;
|
||||||
|
@ -833,7 +834,7 @@ Section: Searching
|
||||||
fn index(ss: str, cc: char) -> option<uint> {
|
fn index(ss: str, cc: char) -> option<uint> {
|
||||||
let bii = 0u;
|
let bii = 0u;
|
||||||
let cii = 0u;
|
let cii = 0u;
|
||||||
let len = byte_len(ss);
|
let len = len_bytes(ss);
|
||||||
while bii < len {
|
while bii < len {
|
||||||
let {ch, next} = char_range_at(ss, bii);
|
let {ch, next} = char_range_at(ss, bii);
|
||||||
|
|
||||||
|
@ -855,7 +856,7 @@ fn index(ss: str, cc: char) -> option<uint> {
|
||||||
// Returns the index of the first matching char
|
// Returns the index of the first matching char
|
||||||
// (as option some/none)
|
// (as option some/none)
|
||||||
fn rindex(ss: str, cc: char) -> option<uint> {
|
fn rindex(ss: str, cc: char) -> option<uint> {
|
||||||
let bii = byte_len(ss);
|
let bii = len_bytes(ss);
|
||||||
let cii = len(ss);
|
let cii = len(ss);
|
||||||
while bii > 0u {
|
while bii > 0u {
|
||||||
let {ch, prev} = char_range_at_reverse(ss, bii);
|
let {ch, prev} = char_range_at_reverse(ss, bii);
|
||||||
|
@ -890,8 +891,8 @@ The index of the first occurance of `needle`, or -1 if not found.
|
||||||
FIXME: UTF-8
|
FIXME: UTF-8
|
||||||
*/
|
*/
|
||||||
fn find(haystack: str, needle: str) -> int {
|
fn find(haystack: str, needle: str) -> int {
|
||||||
let haystack_len: int = byte_len(haystack) as int;
|
let haystack_len: int = len_bytes(haystack) as int;
|
||||||
let needle_len: int = byte_len(needle) as int;
|
let needle_len: int = len_bytes(needle) as int;
|
||||||
if needle_len == 0 { ret 0; }
|
if needle_len == 0 { ret 0; }
|
||||||
fn match_at(haystack: str, needle: str, i: int) -> bool {
|
fn match_at(haystack: str, needle: str, i: int) -> bool {
|
||||||
let j: int = i;
|
let j: int = i;
|
||||||
|
@ -931,8 +932,8 @@ haystack - The string to look in
|
||||||
needle - The string to look for
|
needle - The string to look for
|
||||||
*/
|
*/
|
||||||
fn starts_with(haystack: str, needle: str) -> bool {
|
fn starts_with(haystack: str, needle: str) -> bool {
|
||||||
let haystack_len: uint = byte_len(haystack);
|
let haystack_len: uint = len_bytes(haystack);
|
||||||
let needle_len: uint = byte_len(needle);
|
let needle_len: uint = len_bytes(needle);
|
||||||
if needle_len == 0u { ret true; }
|
if needle_len == 0u { ret true; }
|
||||||
if needle_len > haystack_len { ret false; }
|
if needle_len > haystack_len { ret false; }
|
||||||
ret eq(substr(haystack, 0u, needle_len), needle);
|
ret eq(substr(haystack, 0u, needle_len), needle);
|
||||||
|
@ -969,7 +970,7 @@ Function: is_ascii
|
||||||
Determines if a string contains only ASCII characters
|
Determines if a string contains only ASCII characters
|
||||||
*/
|
*/
|
||||||
fn is_ascii(s: str) -> bool {
|
fn is_ascii(s: str) -> bool {
|
||||||
let i: uint = byte_len(s);
|
let i: uint = len_bytes(s);
|
||||||
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } }
|
while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } }
|
||||||
ret true;
|
ret true;
|
||||||
}
|
}
|
||||||
|
@ -1019,7 +1020,7 @@ pure fn byte_len(s: str) -> uint unsafe { len_bytes(s) }
|
||||||
// String length or size in characters.
|
// String length or size in characters.
|
||||||
// (Synonyms: len_chars, char_len)
|
// (Synonyms: len_chars, char_len)
|
||||||
fn len(s: str) -> uint {
|
fn len(s: str) -> uint {
|
||||||
char_len_range(s, 0u, byte_len(s))
|
substr_len_chars(s, 0u, len_bytes(s))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn len_chars(s: str) -> uint { len(s) }
|
fn len_chars(s: str) -> uint { len(s) }
|
||||||
|
@ -1054,7 +1055,7 @@ fn is_utf8(v: [u8]) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: char_len_range
|
Function: substr_len_chars
|
||||||
|
|
||||||
As char_len but for a slice of a string
|
As char_len but for a slice of a string
|
||||||
|
|
||||||
|
@ -1071,10 +1072,8 @@ Safety note:
|
||||||
- This function does not check whether the substring is valid.
|
- This function does not check whether the substring is valid.
|
||||||
- This function fails if `byte_offset` or `byte_len` do not
|
- This function fails if `byte_offset` or `byte_len` do not
|
||||||
represent valid positions inside `s`
|
represent valid positions inside `s`
|
||||||
|
|
||||||
FIXME: rename to 'substr_len_chars'
|
|
||||||
*/
|
*/
|
||||||
fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
|
fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint {
|
||||||
let i = byte_start;
|
let i = byte_start;
|
||||||
let byte_stop = i + byte_len;
|
let byte_stop = i + byte_len;
|
||||||
let len = 0u;
|
let len = 0u;
|
||||||
|
@ -1088,7 +1087,7 @@ fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: byte_len_range
|
Function: substr_len_bytes
|
||||||
|
|
||||||
As byte_len but for a substring
|
As byte_len but for a substring
|
||||||
|
|
||||||
|
@ -1108,7 +1107,7 @@ valid positions in `s`
|
||||||
|
|
||||||
FIXME: rename to 'substr_len_bytes'
|
FIXME: rename to 'substr_len_bytes'
|
||||||
*/
|
*/
|
||||||
fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint {
|
fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint {
|
||||||
let i = byte_offset;
|
let i = byte_offset;
|
||||||
let chars = 0u;
|
let chars = 0u;
|
||||||
while chars < char_len {
|
while chars < char_len {
|
||||||
|
@ -1149,7 +1148,7 @@ This function can be used to iterate over the unicode characters of a string.
|
||||||
Example:
|
Example:
|
||||||
> let s = "中华Việt Nam";
|
> let s = "中华Việt Nam";
|
||||||
> let i = 0u;
|
> let i = 0u;
|
||||||
> while i < str::byte_len(s) {
|
> while i < str::len_bytes(s) {
|
||||||
> let {ch, next} = str::char_range_at(s, i);
|
> let {ch, next} = str::char_range_at(s, i);
|
||||||
> std::io::println(#fmt("%u: %c",i,ch));
|
> std::io::println(#fmt("%u: %c",i,ch));
|
||||||
> i = next;
|
> i = next;
|
||||||
|
@ -1391,7 +1390,7 @@ mod unsafe {
|
||||||
unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe {
|
unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe {
|
||||||
// FIXME: Typestate precondition
|
// FIXME: Typestate precondition
|
||||||
assert (begin <= end);
|
assert (begin <= end);
|
||||||
assert (end <= byte_len(s));
|
assert (end <= len_bytes(s));
|
||||||
|
|
||||||
let v = as_bytes(s) { |v| vec::slice(v, begin, end) };
|
let v = as_bytes(s) { |v| vec::slice(v, begin, end) };
|
||||||
v += [0u8];
|
v += [0u8];
|
||||||
|
@ -1408,7 +1407,7 @@ mod unsafe {
|
||||||
unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint)
|
unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint)
|
||||||
: uint::le(begin, end) -> str {
|
: uint::le(begin, end) -> str {
|
||||||
// would need some magic to make this a precondition
|
// would need some magic to make this a precondition
|
||||||
assert (end <= byte_len(s));
|
assert (end <= len_bytes(s));
|
||||||
ret slice_bytes(s, begin, end);
|
ret slice_bytes(s, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1430,7 +1429,7 @@ mod unsafe {
|
||||||
//
|
//
|
||||||
// Removes the last byte from a string and returns it. (Not UTF-8 safe).
|
// Removes the last byte from a string and returns it. (Not UTF-8 safe).
|
||||||
unsafe fn pop_byte(&s: str) -> u8 unsafe {
|
unsafe fn pop_byte(&s: str) -> u8 unsafe {
|
||||||
let len = byte_len(s);
|
let len = len_bytes(s);
|
||||||
assert (len > 0u);
|
assert (len > 0u);
|
||||||
let b = s[len - 1u];
|
let b = s[len - 1u];
|
||||||
s = unsafe::slice_bytes(s, 0u, len - 1u);
|
s = unsafe::slice_bytes(s, 0u, len - 1u);
|
||||||
|
@ -1441,7 +1440,7 @@ mod unsafe {
|
||||||
//
|
//
|
||||||
// Removes the first byte from a string and returns it. (Not UTF-8 safe).
|
// Removes the first byte from a string and returns it. (Not UTF-8 safe).
|
||||||
unsafe fn shift_byte(&s: str) -> u8 unsafe {
|
unsafe fn shift_byte(&s: str) -> u8 unsafe {
|
||||||
let len = byte_len(s);
|
let len = len_bytes(s);
|
||||||
assert (len > 0u);
|
assert (len > 0u);
|
||||||
let b = s[0];
|
let b = s[0];
|
||||||
s = unsafe::slice_bytes(s, 1u, len);
|
s = unsafe::slice_bytes(s, 1u, len);
|
||||||
|
@ -1470,14 +1469,23 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_bytes_len() {
|
fn test_len() {
|
||||||
assert (byte_len("") == 0u);
|
assert (len_bytes("") == 0u);
|
||||||
assert (byte_len("hello world") == 11u);
|
assert (len_bytes("hello world") == 11u);
|
||||||
assert (byte_len("\x63") == 1u);
|
assert (len_bytes("\x63") == 1u);
|
||||||
assert (byte_len("\xa2") == 2u);
|
assert (len_bytes("\xa2") == 2u);
|
||||||
assert (byte_len("\u03c0") == 2u);
|
assert (len_bytes("\u03c0") == 2u);
|
||||||
assert (byte_len("\u2620") == 3u);
|
assert (len_bytes("\u2620") == 3u);
|
||||||
assert (byte_len("\U0001d11e") == 4u);
|
assert (len_bytes("\U0001d11e") == 4u);
|
||||||
|
|
||||||
|
assert (len("") == 0u);
|
||||||
|
assert (len("hello world") == 11u);
|
||||||
|
assert (len("\x63") == 1u);
|
||||||
|
assert (len("\xa2") == 1u);
|
||||||
|
assert (len("\u03c0") == 1u);
|
||||||
|
assert (len("\u2620") == 1u);
|
||||||
|
assert (len("\U0001d11e") == 1u);
|
||||||
|
assert (len("ประเทศไทย中华Việt Nam") == 19u);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1712,7 +1720,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_substr() {
|
fn test_substr() {
|
||||||
fn t(a: str, b: str, start: int) {
|
fn t(a: str, b: str, start: int) {
|
||||||
assert (eq(substr(a, start as uint, byte_len(b)), b));
|
assert (eq(substr(a, start as uint, len_bytes(b)), b));
|
||||||
}
|
}
|
||||||
t("hello", "llo", 2);
|
t("hello", "llo", 2);
|
||||||
t("hello", "el", 1);
|
t("hello", "el", 1);
|
||||||
|
@ -2002,7 +2010,7 @@ mod tests {
|
||||||
let v: [u8] = bytes(s1);
|
let v: [u8] = bytes(s1);
|
||||||
let s2: str = from_bytes(v);
|
let s2: str = from_bytes(v);
|
||||||
let i: uint = 0u;
|
let i: uint = 0u;
|
||||||
let n1: uint = byte_len(s1);
|
let n1: uint = len_bytes(s1);
|
||||||
let n2: uint = vec::len::<u8>(v);
|
let n2: uint = vec::len::<u8>(v);
|
||||||
assert (n1 == n2);
|
assert (n1 == n2);
|
||||||
while i < n1 {
|
while i < n1 {
|
||||||
|
|
|
@ -743,7 +743,7 @@ mod node {
|
||||||
*/
|
*/
|
||||||
fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node {
|
fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node {
|
||||||
ret of_substr_unsafer(str, byte_start, byte_len,
|
ret of_substr_unsafer(str, byte_start, byte_len,
|
||||||
str::char_len_range(*str, byte_start, byte_len));
|
str::substr_len_chars(*str, byte_start, byte_len));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -795,7 +795,7 @@ mod node {
|
||||||
if i == 0u { first_leaf_char_len }
|
if i == 0u { first_leaf_char_len }
|
||||||
else { hint_max_leaf_char_len };
|
else { hint_max_leaf_char_len };
|
||||||
let chunk_byte_len =
|
let chunk_byte_len =
|
||||||
str::byte_len_range(*str, offset, chunk_char_len);
|
str::substr_len_bytes(*str, offset, chunk_char_len);
|
||||||
nodes[i] = @leaf({
|
nodes[i] = @leaf({
|
||||||
byte_offset: offset,
|
byte_offset: offset,
|
||||||
byte_len: chunk_byte_len,
|
byte_len: chunk_byte_len,
|
||||||
|
@ -998,7 +998,7 @@ mod node {
|
||||||
alt(*node) {
|
alt(*node) {
|
||||||
node::leaf(x) {
|
node::leaf(x) {
|
||||||
let char_len =
|
let char_len =
|
||||||
str::char_len_range(*x.content, byte_offset, byte_len);
|
str::substr_len_chars(*x.content, byte_offset, byte_len);
|
||||||
ret @leaf({byte_offset: byte_offset,
|
ret @leaf({byte_offset: byte_offset,
|
||||||
byte_len: byte_len,
|
byte_len: byte_len,
|
||||||
char_len: char_len,
|
char_len: char_len,
|
||||||
|
@ -1059,9 +1059,9 @@ mod node {
|
||||||
ret node;
|
ret node;
|
||||||
}
|
}
|
||||||
let byte_offset =
|
let byte_offset =
|
||||||
str::byte_len_range(*x.content, 0u, char_offset);
|
str::substr_len_bytes(*x.content, 0u, char_offset);
|
||||||
let byte_len =
|
let byte_len =
|
||||||
str::byte_len_range(*x.content, byte_offset, char_len);
|
str::substr_len_bytes(*x.content, byte_offset, char_len);
|
||||||
ret @leaf({byte_offset: byte_offset,
|
ret @leaf({byte_offset: byte_offset,
|
||||||
byte_len: byte_len,
|
byte_len: byte_len,
|
||||||
char_len: char_len,
|
char_len: char_len,
|
||||||
|
|
Loading…
Reference in a new issue