From f307688bf44404b371b91b3b2a67048088695fe1 Mon Sep 17 00:00:00 2001 From: Roy Frostig Date: Wed, 11 Aug 2010 16:06:45 -0700 Subject: [PATCH] Add native vec[u8] to str converter. Put in workaround for leak in str to vec[u8] converter. Add testcase exercising both. Drive-by fix a potential array-out-of-bounds write on rust_str buffers. --- src/Makefile | 1 + src/lib/_str.rs | 35 +++++++- src/rt/rust_builtin.cpp | 87 +++++++++++++++----- src/test/run-pass/lib-vec-str-conversions.rs | 41 +++++++++ 4 files changed, 141 insertions(+), 23 deletions(-) create mode 100644 src/test/run-pass/lib-vec-str-conversions.rs diff --git a/src/Makefile b/src/Makefile index 63cc67d0a27..2e8deb7b3f5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -477,6 +477,7 @@ TEST_XFAILS_LLVM := $(TASK_XFAILS) \ lib-deque.rs \ lib-map.rs \ lib-rand.rs \ + lib-vec-str-conversions.rs \ linear-for-loop.rs \ list.rs \ many.rs \ diff --git a/src/lib/_str.rs b/src/lib/_str.rs index a607c7d5df5..807edf314bb 100644 --- a/src/lib/_str.rs +++ b/src/lib/_str.rs @@ -1,10 +1,13 @@ import rustrt.sbuf; +import std._vec.rustrt.vbuf; + native "rust" mod rustrt { type sbuf; fn str_buf(str s) -> sbuf; fn str_byte_len(str s) -> uint; fn str_alloc(uint n_bytes) -> str; + fn str_from_vec(vec[u8] b) -> str; fn refcount[T](str s) -> uint; } @@ -40,9 +43,33 @@ fn buf(str s) -> sbuf { ret rustrt.str_buf(s); } -fn bytes(&str s) -> vec[u8] { - fn ith(str s, uint i) -> u8 { - ret s.(i); +fn bytes(str s) -> vec[u8] { + /* FIXME (issue #58): + * Should be... + * + * fn ith(str s, uint i) -> u8 { + * ret s.(i); + * } + * ret _vec.init_fn[u8](bind ith(s, _), byte_len(s)); + * + * but we do not correctly decrement refcount of s when + * the binding dies, so we have to do this manually. + */ + let uint n = _str.byte_len(s); + let vec[u8] v = _vec.alloc[u8](n); + let uint i = 0u; + while (i < n) { + v += vec(s.(i)); + i += 1u; } - ret _vec.init_fn[u8](bind ith(s, _), _str.byte_len(s)); + ret v; +} + +fn from_bytes(vec[u8] v) : is_utf8(v) -> str { + ret rustrt.str_from_vec(v); +} + +fn refcount(str s) -> uint { + // -1 because calling this function incremented the refcount. + ret rustrt.refcount[u8](s) - 1u; } diff --git a/src/rt/rust_builtin.cpp b/src/rt/rust_builtin.cpp index d8d9b8d6eb4..64b587c0d2d 100644 --- a/src/rt/rust_builtin.cpp +++ b/src/rt/rust_builtin.cpp @@ -2,19 +2,6 @@ #include "rust_internal.h" /* Native builtins. */ -extern "C" CDECL rust_str* -str_alloc(rust_task *task, size_t n_bytes) -{ - rust_dom *dom = task->dom; - size_t alloc = next_power_of_two(sizeof(rust_str) + n_bytes); - void *mem = dom->malloc(alloc); - if (!mem) { - task->fail(2); - return NULL; - } - rust_str *st = new (mem) rust_str(dom, alloc, 1, (uint8_t const *)""); - return st; -} extern "C" CDECL rust_str* last_os_error(rust_task *task) { @@ -109,6 +96,48 @@ vec_alloc(rust_task *task, type_desc *t, type_desc *elem_t, size_t n_elts) return vec; } +extern "C" CDECL void * +vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset) +{ + return (void *)&v->data[ty->size * offset]; +} + +extern "C" CDECL size_t +vec_len(rust_task *task, type_desc *ty, rust_vec *v) +{ + return v->fill / ty->size; +} + +/* Helper for str_alloc and str_from_vec. Returns NULL as failure. */ +static rust_str * +str_alloc_with_data(rust_task *task, + size_t n_bytes, + size_t fill, + uint8_t const *d) +{ + rust_dom *dom = task->dom; + size_t alloc = next_power_of_two(sizeof(rust_str) + n_bytes); + void *mem = dom->malloc(alloc); + if (!mem) + return NULL; + rust_str *st = new (mem) rust_str(dom, alloc, fill, d); + return st; +} + +extern "C" CDECL rust_str* +str_alloc(rust_task *task, size_t n_bytes) +{ + rust_str *st = str_alloc_with_data(task, + n_bytes + 1, // +1 to fit at least "" + 1, + (uint8_t const *)""); + if (!st) { + task->fail(2); + return NULL; + } + return st; +} + extern "C" CDECL char const * str_buf(rust_task *task, rust_str *s) { @@ -121,17 +150,37 @@ str_byte_len(rust_task *task, rust_str *s) return s->fill - 1; // -1 for the '\0' terminator. } -extern "C" CDECL void * -vec_buf(rust_task *task, type_desc *ty, rust_vec *v, size_t offset) +extern "C" CDECL rust_str * +str_from_vec(rust_task *task, rust_vec *v) { - return (void *)&v->data[ty->size * offset]; + rust_str *st = + str_alloc_with_data(task, + v->fill + 1, // +1 to fit at least '\0' + v->fill, + v->fill ? (uint8_t const *)v->data : NULL); + if (!st) { + task->fail(2); + return NULL; + } + st->data[st->fill++] = '\0'; + return st; } -extern "C" CDECL size_t -vec_len(rust_task *task, type_desc *ty, rust_vec *v) +/* +extern "C" CDECL rust_str* +str_alloc(rust_task *task, size_t n_bytes) { - return v->fill / ty->size; + rust_dom *dom = task->dom; + size_t alloc = next_power_of_two(sizeof(rust_str) + n_bytes); + void *mem = dom->malloc(alloc); + if (!mem) { + task->fail(2); + return NULL; + } + rust_str *st = new (mem) rust_str(dom, alloc, 1, (uint8_t const *)""); + return st; } +*/ extern "C" CDECL void * rand_new(rust_task *task) diff --git a/src/test/run-pass/lib-vec-str-conversions.rs b/src/test/run-pass/lib-vec-str-conversions.rs new file mode 100644 index 00000000000..1d6b61a1900 --- /dev/null +++ b/src/test/run-pass/lib-vec-str-conversions.rs @@ -0,0 +1,41 @@ +// -*- rust -*- + +use std; +import std._str; +import std._vec; + +fn test_simple() { + let str s1 = "All mimsy were the borogoves"; + + /* + * FIXME from_bytes(vec[u8] v) has constraint is_utf(v), which is + * unimplemented and thereby just fails. This doesn't stop us from + * using from_bytes for now since the constraint system isn't fully + * working, but we should implement is_utf8 before that happens. + */ + + let vec[u8] v = _str.bytes(s1); + let str s2 = _str.from_bytes(v); + + let uint i = 0u; + let uint n1 = _str.byte_len(s1); + let uint n2 = _vec.len[u8](v); + + check (n1 == n2); + + while (i < n1) { + let u8 a = s1.(i); + let u8 b = s2.(i); + log a; + log b; + check (a == b); + i += 1u; + } + + log "refcnt is"; + log _str.refcount(s1); +} + +fn main() { + test_simple(); +}