From e712a47a812b4605adeea6ec56ba06550c060c78 Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Fri, 10 Feb 2012 16:38:33 -0800 Subject: [PATCH] create serialization lib and update serializer to use it --- src/comp/metadata/decoder.rs | 6 +- src/comp/middle/ty.rs | 2 +- src/libstd/ebml.rs | 29 ++-- src/libstd/serialization.rs | 252 +++++++++++++++++++++++++++++++++++ src/serializer/serializer.rs | 54 +++++--- 5 files changed, 304 insertions(+), 39 deletions(-) create mode 100644 src/libstd/serialization.rs diff --git a/src/comp/metadata/decoder.rs b/src/comp/metadata/decoder.rs index cd007e22fc4..2b3ea15db61 100644 --- a/src/comp/metadata/decoder.rs +++ b/src/comp/metadata/decoder.rs @@ -43,7 +43,7 @@ fn lookup_hash(d: ebml::doc, eq_fn: fn@([u8]) -> bool, hash: uint) -> let table = ebml::get_doc(index, tag_index_table); let hash_pos = table.start + hash % 256u * 4u; let pos = ebml::be_uint_from_bytes(d.data, hash_pos, 4u); - let bucket = ebml::doc_at(d.data, pos); + let {tag:_, doc:bucket} = ebml::doc_at(d.data, pos); // Awkward logic because we can't ret from foreach yet let result: [ebml::doc] = []; @@ -51,7 +51,7 @@ fn lookup_hash(d: ebml::doc, eq_fn: fn@([u8]) -> bool, hash: uint) -> ebml::tagged_docs(bucket, belt) {|elt| let pos = ebml::be_uint_from_bytes(elt.data, elt.start, 4u); if eq_fn(vec::slice::(*elt.data, elt.start + 4u, elt.end)) { - result += [ebml::doc_at(d.data, pos)]; + result += [ebml::doc_at(d.data, pos).doc]; } }; ret result; @@ -500,7 +500,7 @@ fn iter_crate_items(bytes: @[u8], proc: fn(str, ast::def_id)) { let et = tag_index_buckets_bucket_elt; ebml::tagged_docs(bucket, et) {|elt| let data = read_path(elt); - let def = ebml::doc_at(bytes, data.pos); + let {tag:_, doc:def} = ebml::doc_at(bytes, data.pos); let did_doc = ebml::get_doc(def, tag_def_id); let did = parse_def_id(ebml::doc_data(did_doc)); proc(data.path, did); diff --git a/src/comp/middle/ty.rs b/src/comp/middle/ty.rs index 7976576720f..8d0740b6728 100644 --- a/src/comp/middle/ty.rs +++ b/src/comp/middle/ty.rs @@ -2395,7 +2395,7 @@ fn item_path(cx: ctxt, id: ast::def_id) -> ast_map::path { *path + [ast_map::path_name(nitem.ident)] } - ast_map::node_method(method, path) { + ast_map::node_method(method, _, path) { *path + [ast_map::path_name(method.ident)] } diff --git a/src/libstd/ebml.rs b/src/libstd/ebml.rs index bee73814580..41cec77851d 100644 --- a/src/libstd/ebml.rs +++ b/src/libstd/ebml.rs @@ -17,6 +17,8 @@ type ebml_state = {ebml_tag: ebml_tag, tag_pos: uint, data_pos: uint}; // ebml reading type doc = {data: @[u8], start: uint, end: uint}; +type tagged_doc = {tag: uint, doc: doc}; + fn vint_at(data: [u8], start: uint) -> {val: uint, next: uint} { let a = data[start]; if a & 0x80u8 != 0u8 { @@ -43,11 +45,12 @@ fn new_doc(data: @[u8]) -> doc { ret {data: data, start: 0u, end: vec::len::(*data)}; } -fn doc_at(data: @[u8], start: uint) -> doc { +fn doc_at(data: @[u8], start: uint) -> tagged_doc { let elt_tag = vint_at(*data, start); let elt_size = vint_at(*data, elt_tag.next); let end = elt_size.next + elt_size.val; - ret {data: data, start: elt_size.next, end: end}; + ret {tag: elt_tag.val, + doc: {data: data, start: elt_size.next, end: end}}; } fn maybe_get_doc(d: doc, tg: uint) -> option { @@ -120,18 +123,18 @@ fn doc_as_uint(d: doc) -> uint { // ebml writing type writer = {writer: io::writer, mutable size_positions: [uint]}; -fn write_sized_vint(w: io::writer, n: uint, size: uint) { +fn write_sized_vint(w: io::writer, n: u64, size: uint) { let buf: [u8]; alt size { 1u { buf = [0x80u8 | (n as u8)]; } - 2u { buf = [0x40u8 | ((n >> 8u) as u8), (n & 0xffu) as u8]; } + 2u { buf = [0x40u8 | ((n >> 8_u64) as u8), n as u8]; } 3u { - buf = [0x20u8 | ((n >> 16u) as u8), (n >> 8u & 0xffu) as u8, - (n & 0xffu) as u8]; + buf = [0x20u8 | ((n >> 16_u64) as u8), (n >> 8_u64) as u8, + n as u8]; } 4u { - buf = [0x10u8 | ((n >> 24u) as u8), (n >> 16u & 0xffu) as u8, - (n >> 8u & 0xffu) as u8, (n & 0xffu) as u8]; + buf = [0x10u8 | ((n >> 24_u64) as u8), (n >> 16_u64) as u8, + (n >> 8_u64) as u8, n as u8]; } _ { #error("vint to write too big"); fail; } } @@ -156,7 +159,7 @@ fn create_writer(w: io::writer) -> writer { // TODO: Provide a function to write the standard ebml header. fn start_tag(w: writer, tag_id: uint) { // Write the enum ID: - write_vint(w.writer, tag_id); + write_vint(w.writer, tag_id as u64); // Write a placeholder four-byte size. w.size_positions += [w.writer.tell()]; @@ -168,7 +171,7 @@ fn end_tag(w: writer) { let last_size_pos = vec::pop::(w.size_positions); let cur_pos = w.writer.tell(); w.writer.seek(last_size_pos as int, io::seek_set); - write_sized_vint(w.writer, cur_pos - last_size_pos - 4u, 4u); + write_sized_vint(w.writer, (cur_pos - last_size_pos - 4u) as u64, 4u); w.writer.seek(cur_pos as int, io::seek_set); } @@ -179,12 +182,12 @@ impl writer_util for writer { end_tag(self); } - fn wr_uint(id: u64) { + fn wr_u64(id: u64) { write_vint(self.writer, id); } - fn wr_int(id: uint) { - write_vint(self.writer, id); + fn wr_uint(id: uint) { + self.wr_u64(id as u64); } fn wr_bytes(b: [u8]) { diff --git a/src/libstd/serialization.rs b/src/libstd/serialization.rs new file mode 100644 index 00000000000..b71e057a5ad --- /dev/null +++ b/src/libstd/serialization.rs @@ -0,0 +1,252 @@ +/* +Module: serialization + +Support code for serialization. +*/ + +import list::list; + +iface serializer { + // Primitive types: + fn emit_nil(); + fn emit_u64(v: u64); + fn emit_i64(v: u64); + fn emit_bool(v: bool); + fn emit_f64(v: f64); + fn emit_str(v: str); + + // Compound types: + fn emit_enum(name: str, f: fn()); + fn emit_enum_variant(v_name: str, v_id: uint, sz: uint, f: fn()); + fn emit_enum_variant_arg(idx: uint, f: fn()); + fn emit_vec(len: uint, f: fn()); + fn emit_vec_elt(idx: uint, f: fn()); + fn emit_box(f: fn()); + fn emit_uniq(f: fn()); + fn emit_rec(f: fn()); + fn emit_rec_field(f_name: str, f_idx: uint, f: fn()); + fn emit_tup(sz: uint, f: fn()); + fn emit_tup_elt(idx: uint, f: fn()); +} + +iface deserializer { + // Primitive types: + fn read_nil() -> (); + fn read_u64() -> u64; + fn read_i64() -> i64; + fn read_bool() -> bool; + fn read_f64() -> f64; + fn read_str() -> str; + + // Compound types: + fn read_enum(name: str, f: fn() -> T) -> T; + fn read_enum_variant(f: fn(uint) -> T) -> T; + fn read_enum_variant_arg(idx: uint, f: fn() -> T) -> T; + fn read_vec(f: fn(uint) -> T) -> T; + fn read_vec_elt(idx: uint, f: fn() -> T) -> T; + fn read_box(f: fn() -> T) -> T; + fn read_uniq(f: fn() -> T) -> T; + fn read_rec(f: fn() -> T) -> T; + fn read_rec_field(f_name: str, f_idx: uint, f: fn() -> T) -> T; + fn read_tup(sz: uint, f: fn() -> T) -> T; + fn read_tup_elt(idx: uint, f: fn() -> T) -> T; +} + +/* +type ppserializer = { + writer: io::writer +}; + +impl serializer for ppserializer { + fn emit_nil() { self.writer.write_str("()") } + + fn emit_u64(v: u64) { self.writer.write_str(#fmt["%lu", v]); } + fn emit_i64(v: u64) { ebml::write_vint(self, v as uint) } + fn emit_bool(v: bool) { ebml::write_vint(self, v as uint) } + fn emit_f64(v: f64) { fail "float serialization not impl"; } + fn emit_str(v: str) { + self.wr_tag(es_str as uint) {|| self.wr_str(v) } + } + + fn emit_enum(name: str, f: fn()) { + self.wr_tag(es_enum as uint) {|| f() } + } + fn emit_enum_variant(v_name: str, v_id: uint, f: fn()) { + self.wr_tag(es_enum_vid as uint) {|| self.write_vint(v_id) } + self.wr_tag(es_enum_body as uint) {|| f() } + } + + fn emit_vec(len: uint, f: fn()) { + self.wr_tag(es_vec as uint) {|| + self.wr_tag(es_vec_len as uint) {|| self.write_vint(len) } + f() + } + } + + fn emit_vec_elt(idx: uint, f: fn()) { + self.wr_tag(es_vec_elt as uint) {|| f() } + } + + fn emit_vec_elt(idx: uint, f: fn()) { + self.wr_tag(es_vec_elt as uint) {|| f() } + } + + fn emit_box(f: fn()) { f() } + fn emit_uniq(f: fn()) { f() } + fn emit_rec_field(f_name: str, f_idx: uint, f: fn()) { f() } + fn emit_tup(sz: uint, f: fn()) { f() } + fn emit_tup_elt(idx: uint, f: fn()) { f() } +} +*/ + +enum ebml_serializer_tags { + es_str, + es_enum, es_enum_vid, es_enum_body, + es_vec, es_vec_len, es_vec_elt +} + +impl of serializer for ebml::writer { + fn emit_nil() {} + + fn emit_u64(v: u64) { ebml::write_vint(self, v) } + fn emit_i64(v: u64) { ebml::write_vint(self, v as uint) } + fn emit_bool(v: bool) { ebml::write_vint(self, v as uint) } + fn emit_f64(v: f64) { fail "float serialization not impl"; } + fn emit_str(v: str) { + self.wr_tag(es_str as uint) {|| self.wr_str(v) } + } + + fn emit_enum(name: str, f: fn()) { + self.wr_tag(es_enum as uint) {|| f() } + } + fn emit_enum_variant(v_name: str, v_id: uint, f: fn()) { + self.wr_tag(es_enum_vid as uint) {|| self.write_vint(v_id) } + self.wr_tag(es_enum_body as uint) {|| f() } + } + fn emit_enum_variant_arg(idx: uint, f: fn()) { f() } + + fn emit_vec(len: uint, f: fn()) { + self.wr_tag(es_vec as uint) {|| + self.wr_tag(es_vec_len as uint) {|| self.write_vint(len) } + f() + } + } + + fn emit_vec_elt(idx: uint, f: fn()) { + self.wr_tag(es_vec_elt as uint) {|| f() } + } + + fn emit_vec_elt(idx: uint, f: fn()) { + self.wr_tag(es_vec_elt as uint) {|| f() } + } + + fn emit_box(f: fn()) { f() } + fn emit_uniq(f: fn()) { f() } + fn emit_rec(f: fn()) { f() } + fn emit_rec_field(f_name: str, f_idx: uint, f: fn()) { f() } + fn emit_tup(sz: uint, f: fn()) { f() } + fn emit_tup_elt(idx: uint, f: fn()) { f() } +} + +type ebml_deserializer = {mutable parent: ebml::doc, + mutable pos: uint}; + +fn mk_ebml_deserializer(d: ebml::doc) -> ebml_deserializer { + {mutable parent: d, mutable pos: 0u} +} + +impl of deserializer for ebml_deserializer { + fn next_doc(exp_tag: uint) -> ebml::doc { + if self.pos >= self.parent.end { + fail "no more documents in current node!"; + } + let (r_tag, r_doc) = ebml::doc_at(self.parent.data, self.pos); + if r_tag != exp_tag { + fail #fmt["expected EMBL doc with tag %u but found tag %u", + exp_tag, r_tag]; + } + if r_doc.end >= self.parent.end { + fail #fmt["invalid EBML, child extends to 0x%x, parent to 0x%x", + r_doc.end, self.parent.end]; + } + self.pos = result.end; + ret result; + } + + fn push_doc(d: ebml::doc, f: fn() -> T) -> T{ + let old_parent = self.parent; + let old_pos = self.pos; + self.parent = d; + self.pos = 0u; + let r = f(); + self.parent = old_parent; + self.pos = old_pos; + ret r; + } + + fn next_u64(exp_tag: uint) { + ebml::doc_as_uint(self.next_doc(exp_tag)) + } + + fn read_nil() -> () { () } + fn read_u64() -> u64 { next_u64(es_u64) } + fn read_i64() -> i64 { next_u64(es_u64) as i64 } + fn read_bool() -> bool { next_u64(es_u64) as bool } + fn read_f64() -> f64 { fail "Float"; } + fn read_str() -> str { ebml::doc_str(self.next_doc(es_str)) } + + // Compound types: + fn read_enum(name: str, f: fn() -> T) -> T { + self.push_doc(self.next_doc(es_enum), f) + } + + fn read_enum_variant(f: fn(uint) -> T) -> T { + let idx = self.next_u64(es_enum_vid); + self.push_doc(self.next_doc(es_enum_body)) {|| + f(idx) + } + } + + fn read_enum_variant_arg(_idx: uint, f: fn() -> T) -> T { + f() + } + + fn read_vec(f: fn(uint) -> T) -> T { + self.push_doc(self.next_doc(es_vec)) {|| + let len = self.next_u64(es_vec_len) as uint; + f(len) + } + } + + fn read_vec_elt(idx: uint, f: fn() -> T) -> T { + self.push_doc(self.next_doc(es_vec_elt), f) + } + + fn read_box(f: fn() -> T) -> T { + f() + } + + fn read_uniq(f: fn() -> T) -> T { + f() + } + + fn read_rec(f: fn() -> T) -> T { + f() + } + + fn read_rec_field(f_name: str, f_idx: uint, f: fn() -> T) -> T { + f() + } + + fn read_tup(sz: uint, f: fn() -> T) -> T { + f() + } + + fn read_tup_elt(idx: uint, f: fn() -> T) -> T { + f() + } +} + +// ___________________________________________________________________________ +// Testing + diff --git a/src/serializer/serializer.rs b/src/serializer/serializer.rs index 80b51ba78a7..2c4b558f96f 100644 --- a/src/serializer/serializer.rs +++ b/src/serializer/serializer.rs @@ -166,24 +166,30 @@ impl serialize_ctx for serialize_ctx { let body_node = alt ty::get(ty0).struct { ty::ty_nil | ty::ty_bot { "()" } - ty::ty_int(_) { #fmt["serialize_i64(cx, %s as i64)", v] } - ty::ty_uint(_) { #fmt["serialize_u64(cx, %s as u64)", v] } - ty::ty_float(_) { #fmt["serialize_float(cx, %s as float)", v] } - ty::ty_bool { #fmt["serialize_bool(cx, %s)", v] } - ty::ty_str { #fmt["serialize_str(cx, %s)", v] } + ty::ty_int(_) { #fmt["s.emit_i64(%s as i64)", v] } + ty::ty_uint(_) { #fmt["s.emit_u64(%s as u64)", v] } + ty::ty_float(_) { #fmt["s.emit_f64(%s as f64)", v] } + ty::ty_bool { #fmt["s.emit_bool(%s)", v] } + ty::ty_str { #fmt["s.emit_str(%s)", v] } ty::ty_enum(def_id, tps) { self.serialize_enum(v, def_id, tps) } - ty::ty_box(mt) | ty::ty_uniq(mt) | ty::ty_ptr(mt) { - self.serialize_ty(mt.ty, #fmt["*%s", v]) + ty::ty_box(mt) { + let s = self.serialize_ty(mt.ty, #fmt["*%s", v]); + #fmt["s.emit_box({||%s})", s] + } + ty::ty_uniq(mt) { + let s = self.serialize_ty(mt.ty, #fmt["*%s", v]); + #fmt["s.emit_uniq({||%s})", s] } ty::ty_vec(mt) { let selem = self.serialize_ty(mt.ty, "i"); - #fmt["start_vec(cx); \ - vec::iter(v) {|i| \ - start_vec_item(cx); \ - %s; \ - end_vec_item(cx); \ - } \ - end_vec(cx);", selem] + #fmt["s.emit_vec(vec::len(v), {|| \ + uint::range(0, vec::len(v), {|i| \ + s.emit_vec_elt(i, {||\ + %s;\ + })})})", selem] + } + ty::ty_class(_, _) { + fail "TODO--implement class"; } ty::ty_rec(fields) { let stmts = vec::map(fields) {|field| @@ -191,17 +197,18 @@ impl serialize_ctx for serialize_ctx { let f_ty = field.mt.ty; self.serialize_ty(f_ty, #fmt["%s.%s", v, f_name]) }; - self.blk_expr(stmts) + #fmt["s.emit_rec({||%s})", self.blk_expr(stmts)] } ty::ty_tup(tys) { - let (pat, stmts) = self.serialize_arm("", tys); - #fmt["alt %s { \ + let (pat, stmts) = self.serialize_arm("", "emit_tup_elt", tys); + #fmt["s.emit_tup(%uu, {|| alt %s { \ %s %s \ - }", v, pat, self.blk_expr(stmts)] + }})", vec::len(tys), v, pat, self.blk_expr(stmts)] } ty::ty_constr(t, _) { self.serialize_ty(t, v) } + ty::ty_ptr(_) | ty::ty_fn(_) | ty::ty_iface(_, _) | ty::ty_res(_, _, _) | @@ -212,7 +219,8 @@ impl serialize_ctx for serialize_ctx { } }; - let item = #fmt["fn %s(cx: ctxt, v: %s) {\ + let item = #fmt["fn %s\ + (s: S, v: %s) {\ %s;\ }", name, ty0_str, body_node]; self.add_item(item); @@ -232,7 +240,8 @@ impl serialize_ctx for serialize_ctx { if n_args == 0u { (v_path, []) } else { - self.serialize_arm(v_path, variant.args) + self.serialize_arm(v_path, "emit_enum_variant_arg", + variant.args) } }; @@ -251,7 +260,8 @@ impl serialize_ctx for serialize_ctx { }", v, str::connect(arms, "\n")] } - fn serialize_arm(v_path: str, args: [ty::t]) -> (ast_pat, [ast_stmt]) { + fn serialize_arm(v_path: str, emit_fn: str, args: [ty::t]) + -> (ast_pat, [ast_stmt]) { let n_args = vec::len(args); let arg_nms = vec::init_fn(n_args) {|i| #fmt["v%u", i] }; let v_pat = @@ -260,7 +270,7 @@ impl serialize_ctx for serialize_ctx { let arg_ty = args[i]; let serialize_expr = self.serialize_ty(arg_ty, arg_nms[i]); - #fmt["%s;", serialize_expr] + #fmt["s.%s(%uu, {|| %s })", emit_fn, i, serialize_expr] }; (v_pat, stmts) }