diff --git a/src/comp/rustc.rc b/src/comp/rustc.rc index c9f0d2f067b..aa88d1d4a88 100644 --- a/src/comp/rustc.rc +++ b/src/comp/rustc.rc @@ -63,6 +63,7 @@ mod syntax { mod expand; mod fmt; + mod ifmt; mod env; mod simplext; mod concat_idents; diff --git a/src/comp/syntax/ext/base.rs b/src/comp/syntax/ext/base.rs index 24efb38be86..2dade422c75 100644 --- a/src/comp/syntax/ext/base.rs +++ b/src/comp/syntax/ext/base.rs @@ -23,6 +23,7 @@ tag syntax_extension { fn syntax_expander_table() -> hashmap { let syntax_expanders = new_str_hash::(); syntax_expanders.insert(~"fmt", normal(ext::fmt::expand_syntax_ext)); + syntax_expanders.insert(~"ifmt", normal(ext::ifmt::expand_syntax_ext)); syntax_expanders.insert(~"env", normal(ext::env::expand_syntax_ext)); syntax_expanders.insert(~"macro", macro_defining(ext::simplext::add_new_extension)); diff --git a/src/comp/syntax/ext/ifmt.rs b/src/comp/syntax/ext/ifmt.rs new file mode 100644 index 00000000000..c1bd13ac01e --- /dev/null +++ b/src/comp/syntax/ext/ifmt.rs @@ -0,0 +1,363 @@ + + +/* + * The compiler code necessary to support the #fmt extension. Eventually this + * should all get sucked into either the standard library extfmt module or the + * compiler syntax extension plugin interface. + */ +import std::vec; +import std::str; +import std::istr; +import std::option; +import std::option::none; +import std::option::some; +import std::extifmt::ct::*; +import base::*; +import codemap::span; +export expand_syntax_ext; + +fn expand_syntax_ext(cx: &ext_ctxt, sp: span, arg: @ast::expr, + _body: &option::t) -> @ast::expr { + let args: [@ast::expr] = + alt arg.node { + ast::expr_vec(elts, _) { elts } + _ { + cx.span_fatal(sp, ~"#fmt requires arguments of the form `[...]`.") + } + }; + if vec::len::<@ast::expr>(args) == 0u { + cx.span_fatal(sp, ~"#fmt requires a format string"); + } + let fmt = + expr_to_str(cx, args[0], + ~"first argument to #fmt must be a " + + ~"string literal."); + let fmtspan = args[0].span; + log "Format string:"; + log fmt; + fn parse_fmt_err_(cx: &ext_ctxt, sp: span, msg: &istr) -> ! { + cx.span_fatal(sp, msg); + } + let parse_fmt_err = bind parse_fmt_err_(cx, fmtspan, _); + let pieces = parse_fmt_string(fmt, parse_fmt_err); + ret pieces_to_expr(cx, sp, pieces, args); +} + +// FIXME: A lot of these functions for producing expressions can probably +// be factored out in common with other code that builds expressions. +// FIXME: Cleanup the naming of these functions +fn pieces_to_expr(cx: &ext_ctxt, sp: span, pieces: &[piece], + args: &[@ast::expr]) -> @ast::expr { + fn make_new_lit(cx: &ext_ctxt, sp: span, lit: ast::lit_) -> @ast::expr { + let sp_lit = @{node: lit, span: sp}; + ret @{id: cx.next_id(), node: ast::expr_lit(sp_lit), span: sp}; + } + fn make_new_str(cx: &ext_ctxt, sp: span, s: &istr) -> @ast::expr { + let lit = ast::lit_str(s, ast::sk_unique); + ret make_new_lit(cx, sp, lit); + } + fn make_new_int(cx: &ext_ctxt, sp: span, i: int) -> @ast::expr { + let lit = ast::lit_int(i); + ret make_new_lit(cx, sp, lit); + } + fn make_new_uint(cx: &ext_ctxt, sp: span, u: uint) -> @ast::expr { + let lit = ast::lit_uint(u); + ret make_new_lit(cx, sp, lit); + } + fn make_add_expr(cx: &ext_ctxt, sp: span, lhs: @ast::expr, + rhs: @ast::expr) -> @ast::expr { + let binexpr = ast::expr_binary(ast::add, lhs, rhs); + ret @{id: cx.next_id(), node: binexpr, span: sp}; + } + fn make_path_expr(cx: &ext_ctxt, sp: span, idents: &[ast::ident]) -> + @ast::expr { + let path = {global: false, idents: idents, types: []}; + let sp_path = {node: path, span: sp}; + let pathexpr = ast::expr_path(sp_path); + ret @{id: cx.next_id(), node: pathexpr, span: sp}; + } + fn make_vec_expr(cx: &ext_ctxt, sp: span, exprs: &[@ast::expr]) -> + @ast::expr { + let vecexpr = ast::expr_vec(exprs, ast::imm); + ret @{id: cx.next_id(), node: vecexpr, span: sp}; + } + fn make_call(cx: &ext_ctxt, sp: span, fn_path: &[ast::ident], + args: &[@ast::expr]) -> @ast::expr { + let pathexpr = make_path_expr(cx, sp, fn_path); + let callexpr = ast::expr_call(pathexpr, args); + ret @{id: cx.next_id(), node: callexpr, span: sp}; + } + fn make_rec_expr(cx: &ext_ctxt, sp: span, + fields: &[{ident: ast::ident, ex: @ast::expr}]) -> + @ast::expr { + let astfields: [ast::field] = []; + for field: {ident: ast::ident, ex: @ast::expr} in fields { + let ident = field.ident; + let val = field.ex; + let astfield = + {node: {mut: ast::imm, ident: ident, expr: val}, span: sp}; + astfields += [astfield]; + } + let recexpr = ast::expr_rec(astfields, option::none::<@ast::expr>); + ret @{id: cx.next_id(), node: recexpr, span: sp}; + } + fn make_path_vec(cx: &ext_ctxt, ident: &ast::ident) -> [ast::ident] { + fn compiling_std(cx: &ext_ctxt) -> bool { + ret istr::find(cx.crate_file_name(), ~"std.rc") >= 0; + } + if compiling_std(cx) { + ret [~"extifmt", ~"rt", ident]; + } else { ret [~"std", ~"extifmt", ~"rt", ident]; } + } + fn make_rt_path_expr(cx: &ext_ctxt, sp: span, + ident: &istr) -> @ast::expr { + let path = make_path_vec(cx, ident); + ret make_path_expr(cx, sp, path); + } + // Produces an AST expression that represents a RT::conv record, + // which tells the RT::conv* functions how to perform the conversion + + fn make_rt_conv_expr(cx: &ext_ctxt, sp: span, cnv: &conv) -> @ast::expr { + fn make_flags(cx: &ext_ctxt, sp: span, flags: &[flag]) -> @ast::expr { + let flagexprs: [@ast::expr] = []; + for f: flag in flags { + let fstr; + alt f { + flag_left_justify. { fstr = ~"flag_left_justify"; } + flag_left_zero_pad. { fstr = ~"flag_left_zero_pad"; } + flag_space_for_sign. { fstr = ~"flag_space_for_sign"; } + flag_sign_always. { fstr = ~"flag_sign_always"; } + flag_alternate. { fstr = ~"flag_alternate"; } + } + flagexprs += [make_rt_path_expr(cx, sp, fstr)]; + } + // FIXME: 0-length vectors can't have their type inferred + // through the rec that these flags are a member of, so + // this is a hack placeholder flag + + if vec::len::<@ast::expr>(flagexprs) == 0u { + flagexprs += [make_rt_path_expr(cx, sp, ~"flag_none")]; + } + ret make_vec_expr(cx, sp, flagexprs); + } + fn make_count(cx: &ext_ctxt, sp: span, cnt: &count) -> @ast::expr { + alt cnt { + count_implied. { + ret make_rt_path_expr(cx, sp, ~"count_implied"); + } + count_is(c) { + let count_lit = make_new_int(cx, sp, c); + let count_is_path = make_path_vec(cx, ~"count_is"); + let count_is_args = [count_lit]; + ret make_call(cx, sp, count_is_path, count_is_args); + } + _ { cx.span_unimpl(sp, ~"unimplemented #fmt conversion"); } + } + } + fn make_ty(cx: &ext_ctxt, sp: span, t: &ty) -> @ast::expr { + let rt_type; + alt t { + ty_hex(c) { + alt c { + case_upper. { rt_type = ~"ty_hex_upper"; } + case_lower. { rt_type = ~"ty_hex_lower"; } + } + } + ty_bits. { rt_type = ~"ty_bits"; } + ty_octal. { rt_type = ~"ty_octal"; } + _ { rt_type = ~"ty_default"; } + } + ret make_rt_path_expr(cx, sp, rt_type); + } + fn make_conv_rec(cx: &ext_ctxt, sp: span, flags_expr: @ast::expr, + width_expr: @ast::expr, precision_expr: @ast::expr, + ty_expr: @ast::expr) -> @ast::expr { + ret make_rec_expr(cx, sp, + [{ident: ~"flags", ex: flags_expr}, + {ident: ~"width", ex: width_expr}, + {ident: ~"precision", ex: precision_expr}, + {ident: ~"ty", ex: ty_expr}]); + } + let rt_conv_flags = make_flags(cx, sp, cnv.flags); + let rt_conv_width = make_count(cx, sp, cnv.width); + let rt_conv_precision = make_count(cx, sp, cnv.precision); + let rt_conv_ty = make_ty(cx, sp, cnv.ty); + ret make_conv_rec(cx, sp, rt_conv_flags, rt_conv_width, + rt_conv_precision, rt_conv_ty); + } + fn make_conv_call(cx: &ext_ctxt, sp: span, conv_type: &istr, + cnv: &conv, arg: @ast::expr) -> @ast::expr { + let fname = ~"conv_" + conv_type; + let path = make_path_vec(cx, fname); + let cnv_expr = make_rt_conv_expr(cx, sp, cnv); + let args = [cnv_expr, arg]; + ret make_call(cx, arg.span, path, args); + } + fn make_new_conv(cx: &ext_ctxt, sp: span, cnv: conv, arg: @ast::expr) -> + @ast::expr { + // FIXME: Extract all this validation into extfmt::ct + + fn is_signed_type(cnv: conv) -> bool { + alt cnv.ty { + ty_int(s) { + alt s { signed. { ret true; } unsigned. { ret false; } } + } + _ { ret false; } + } + } + let unsupported = ~"conversion not supported in #fmt string"; + alt cnv.param { + option::none. { } + _ { cx.span_unimpl(sp, unsupported); } + } + for f: flag in cnv.flags { + alt f { + flag_left_justify. { } + flag_sign_always. { + if !is_signed_type(cnv) { + cx.span_fatal(sp, + ~"+ flag only valid in " + + ~"signed #fmt conversion"); + } + } + flag_space_for_sign. { + if !is_signed_type(cnv) { + cx.span_fatal(sp, + ~"space flag only valid in " + + ~"signed #fmt conversions"); + } + } + flag_left_zero_pad. { } + _ { cx.span_unimpl(sp, unsupported); } + } + } + alt cnv.width { + count_implied. { } + count_is(_) { } + _ { cx.span_unimpl(sp, unsupported); } + } + alt cnv.precision { + count_implied. { } + count_is(_) { } + _ { cx.span_unimpl(sp, unsupported); } + } + alt cnv.ty { + ty_str. { ret make_conv_call(cx, arg.span, ~"str", cnv, arg); } + ty_int(sign) { + alt sign { + signed. { ret make_conv_call(cx, arg.span, ~"int", cnv, arg); } + unsigned. { + ret make_conv_call(cx, arg.span, ~"uint", cnv, arg); + } + } + } + ty_bool. { ret make_conv_call(cx, arg.span, ~"bool", cnv, arg); } + ty_char. { ret make_conv_call(cx, arg.span, ~"char", cnv, arg); } + ty_hex(_) { ret make_conv_call(cx, arg.span, ~"uint", cnv, arg); } + ty_bits. { ret make_conv_call(cx, arg.span, ~"uint", cnv, arg); } + ty_octal. { ret make_conv_call(cx, arg.span, ~"uint", cnv, arg); } + _ { cx.span_unimpl(sp, unsupported); } + } + } + fn log_conv(c: conv) { + alt c.param { + some(p) { + log "param: " + + istr::to_estr(std::int::to_str(p, 10u)); + } + _ { log "param: none"; } + } + for f: flag in c.flags { + alt f { + flag_left_justify. { log "flag: left justify"; } + flag_left_zero_pad. { log "flag: left zero pad"; } + flag_space_for_sign. { log "flag: left space pad"; } + flag_sign_always. { log "flag: sign always"; } + flag_alternate. { log "flag: alternate"; } + } + } + alt c.width { + count_is(i) { log "width: count is " + + istr::to_estr(std::int::to_str(i, 10u)); } + count_is_param(i) { + log "width: count is param " + + istr::to_estr(std::int::to_str(i, 10u)); + } + count_is_next_param. { log "width: count is next param"; } + count_implied. { log "width: count is implied"; } + } + alt c.precision { + count_is(i) { log "prec: count is " + + istr::to_estr(std::int::to_str(i, 10u)); } + count_is_param(i) { + log "prec: count is param " + + istr::to_estr(std::int::to_str(i, 10u)); + } + count_is_next_param. { log "prec: count is next param"; } + count_implied. { log "prec: count is implied"; } + } + alt c.ty { + ty_bool. { log "type: bool"; } + ty_str. { log "type: str"; } + ty_char. { log "type: char"; } + ty_int(s) { + alt s { + signed. { log "type: signed"; } + unsigned. { log "type: unsigned"; } + } + } + ty_bits. { log "type: bits"; } + ty_hex(cs) { + alt cs { + case_upper. { log "type: uhex"; } + case_lower. { log "type: lhex"; } + } + } + ty_octal. { log "type: octal"; } + } + } + let fmt_sp = args[0].span; + let n = 0u; + let tmp_expr = make_new_str(cx, sp, ~""); + let nargs = vec::len::<@ast::expr>(args); + for pc: piece in pieces { + alt pc { + piece_string(s) { + let s_expr = make_new_str(cx, fmt_sp, s); + tmp_expr = make_add_expr(cx, fmt_sp, tmp_expr, s_expr); + } + piece_conv(conv) { + n += 1u; + if n >= nargs { + cx.span_fatal(sp, + ~"not enough arguments to #fmt " + + ~"for the given format string"); + } + log "Building conversion:"; + log_conv(conv); + let arg_expr = args[n]; + let c_expr = make_new_conv(cx, fmt_sp, conv, arg_expr); + tmp_expr = make_add_expr(cx, fmt_sp, tmp_expr, c_expr); + } + } + } + let expected_nargs = n + 1u; // n conversions + the fmt string + + if expected_nargs < nargs { + cx.span_fatal( + sp, istr::from_estr( + #fmt["too many arguments to #fmt. found %u, expected %u", + nargs, expected_nargs])); + } + ret tmp_expr; +} +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: +// diff --git a/src/lib/extifmt.rs b/src/lib/extifmt.rs new file mode 100644 index 00000000000..ea602891868 --- /dev/null +++ b/src/lib/extifmt.rs @@ -0,0 +1,429 @@ + + +/* The 'fmt' extension is modeled on the posix printf system. + * + * A posix conversion ostensibly looks like this: + * + * %[parameter][flags][width][.precision][length]type + * + * Given the different numeric type bestiary we have, we omit the 'length' + * parameter and support slightly different conversions for 'type': + * + * %[parameter][flags][width][.precision]type + * + * we also only support translating-to-rust a tiny subset of the possible + * combinations at the moment. + */ +import option::none; +import option::some; + + +/* + * We have a 'ct' (compile-time) module that parses format strings into a + * sequence of conversions. From those conversions AST fragments are built + * that call into properly-typed functions in the 'rt' (run-time) module. + * Each of those run-time conversion functions accepts another conversion + * description that specifies how to format its output. + * + * The building of the AST is currently done in a module inside the compiler, + * but should migrate over here as the plugin interface is defined. + */ + +// Functions used by the fmt extension at compile time +mod ct { + tag signedness { signed; unsigned; } + tag caseness { case_upper; case_lower; } + tag ty { + ty_bool; + ty_str; + ty_char; + ty_int(signedness); + ty_bits; + ty_hex(caseness); + ty_octal; + // FIXME: More types + } + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + } + tag count { + count_is(int); + count_is_param(int); + count_is_next_param; + count_implied; + } + + // A formatted conversion from an expression to a string + type conv = + {param: option::t, + flags: [flag], + width: count, + precision: count, + ty: ty}; + + + // A fragment of the output sequence + tag piece { piece_string(istr); piece_conv(conv); } + type error_fn = fn(&istr) -> ! ; + + fn parse_fmt_string(s: &istr, error: error_fn) -> [piece] { + let pieces: [piece] = []; + let lim = istr::byte_len(s); + let buf = ~""; + fn flush_buf(buf: &istr, pieces: &mutable [piece]) -> istr { + if istr::byte_len(buf) > 0u { + let piece = piece_string(buf); + pieces += [piece]; + } + ret ~""; + } + let i = 0u; + while i < lim { + let curr = istr::substr(s, i, 1u); + if istr::eq(curr, ~"%") { + i += 1u; + if i >= lim { + error(~"unterminated conversion at end of string"); + } + let curr2 = istr::substr(s, i, 1u); + if istr::eq(curr2, ~"%") { + i += 1u; + } else { + buf = flush_buf(buf, pieces); + let rs = parse_conversion(s, i, lim, error); + pieces += [rs.piece]; + i = rs.next; + } + } else { buf += curr; i += 1u; } + } + buf = flush_buf(buf, pieces); + ret pieces; + } + fn peek_num(s: &istr, i: uint, lim: uint) -> + option::t<{num: uint, next: uint}> { + if i >= lim { ret none; } + let c = s[i]; + if !('0' as u8 <= c && c <= '9' as u8) { ret option::none; } + let n = c - ('0' as u8) as uint; + ret alt peek_num(s, i + 1u, lim) { + none. { some({num: n, next: i + 1u}) } + some(next) { + let m = next.num; + let j = next.next; + some({num: n * 10u + m, next: j}) + } + }; + } + fn parse_conversion(s: &istr, i: uint, lim: uint, error: error_fn) -> + {piece: piece, next: uint} { + let parm = parse_parameter(s, i, lim); + let flags = parse_flags(s, parm.next, lim); + let width = parse_count(s, flags.next, lim); + let prec = parse_precision(s, width.next, lim); + let ty = parse_type(s, prec.next, lim, error); + ret {piece: + piece_conv({param: parm.param, + flags: flags.flags, + width: width.count, + precision: prec.count, + ty: ty.ty}), + next: ty.next}; + } + fn parse_parameter(s: &istr, i: uint, lim: uint) -> + {param: option::t, next: uint} { + if i >= lim { ret {param: none, next: i}; } + let num = peek_num(s, i, lim); + ret alt num { + none. { {param: none, next: i} } + some(t) { + let n = t.num; + let j = t.next; + if j < lim && s[j] == '$' as u8 { + {param: some(n as int), next: j + 1u} + } else { {param: none, next: i} } + } + }; + } + fn parse_flags(s: &istr, i: uint, lim: uint) -> + {flags: [flag], next: uint} { + let noflags: [flag] = []; + if i >= lim { ret {flags: noflags, next: i}; } + + // FIXME: This recursion generates illegal instructions if the return + // value isn't boxed. Only started happening after the ivec conversion + fn more_(f: flag, s: &istr, i: uint, lim: uint) -> + @{flags: [flag], next: uint} { + let next = parse_flags(s, i + 1u, lim); + let rest = next.flags; + let j = next.next; + let curr: [flag] = [f]; + ret @{flags: curr + rest, next: j}; + } + let more = bind more_(_, s, i, lim); + let f = s[i]; + ret if f == '-' as u8 { + *more(flag_left_justify) + } else if f == '0' as u8 { + *more(flag_left_zero_pad) + } else if f == ' ' as u8 { + *more(flag_space_for_sign) + } else if f == '+' as u8 { + *more(flag_sign_always) + } else if f == '#' as u8 { + *more(flag_alternate) + } else { {flags: noflags, next: i} }; + } + fn parse_count(s: &istr, i: uint, + lim: uint) -> {count: count, next: uint} { + ret if i >= lim { + {count: count_implied, next: i} + } else if s[i] == '*' as u8 { + let param = parse_parameter(s, i + 1u, lim); + let j = param.next; + alt param.param { + none. { {count: count_is_next_param, next: j} } + some(n) { {count: count_is_param(n), next: j} } + } + } else { + let num = peek_num(s, i, lim); + alt num { + none. { {count: count_implied, next: i} } + some(num) { + {count: count_is(num.num as int), next: num.next} + } + } + }; + } + fn parse_precision(s: &istr, i: uint, lim: uint) -> + {count: count, next: uint} { + ret if i >= lim { + {count: count_implied, next: i} + } else if s[i] == '.' as u8 { + let count = parse_count(s, i + 1u, lim); + + + // If there were no digits specified, i.e. the precision + // was ".", then the precision is 0 + alt count.count { + count_implied. { {count: count_is(0), next: count.next} } + _ { count } + } + } else { {count: count_implied, next: i} }; + } + fn parse_type(s: &istr, i: uint, lim: uint, error: error_fn) -> + {ty: ty, next: uint} { + if i >= lim { error(~"missing type in conversion"); } + let tstr = istr::substr(s, i, 1u); + // TODO: Do we really want two signed types here? + // How important is it to be printf compatible? + let t = + if istr::eq(tstr, ~"b") { + ty_bool + } else if istr::eq(tstr, ~"s") { + ty_str + } else if istr::eq(tstr, ~"c") { + ty_char + } else if istr::eq(tstr, ~"d") || istr::eq(tstr, ~"i") { + ty_int(signed) + } else if istr::eq(tstr, ~"u") { + ty_int(unsigned) + } else if istr::eq(tstr, ~"x") { + ty_hex(case_lower) + } else if istr::eq(tstr, ~"X") { + ty_hex(case_upper) + } else if istr::eq(tstr, ~"t") { + ty_bits + } else if istr::eq(tstr, ~"o") { + ty_octal + } else { error(~"unknown type in conversion: " + tstr) }; + ret {ty: t, next: i + 1u}; + } +} + + +// Functions used by the fmt extension at runtime. For now there are a lot of +// decisions made a runtime. If it proves worthwhile then some of these +// conditions can be evaluated at compile-time. For now though it's cleaner to +// implement it this way, I think. +mod rt { + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + + + // FIXME: This is a hack to avoid creating 0-length vec exprs, + // which have some difficulty typechecking currently. See + // comments in front::extfmt::make_flags + flag_none; + } + tag count { count_is(int); count_implied; } + tag ty { ty_default; ty_bits; ty_hex_upper; ty_hex_lower; ty_octal; } + + // FIXME: May not want to use a vector here for flags; + // instead just use a bool per flag + type conv = {flags: [flag], width: count, precision: count, ty: ty}; + + fn conv_int(cv: &conv, i: int) -> istr { + let radix = 10u; + let prec = get_int_precision(cv); + let s = int_to_str_prec(i, radix, prec); + if 0 <= i { + if have_flag(cv.flags, flag_sign_always) { + s = ~"+" + s; + } else if have_flag(cv.flags, flag_space_for_sign) { + s = ~" " + s; + } + } + ret pad(cv, s, pad_signed); + } + fn conv_uint(cv: &conv, u: uint) -> istr { + let prec = get_int_precision(cv); + let rs = + alt cv.ty { + ty_default. { uint_to_str_prec(u, 10u, prec) } + ty_hex_lower. { uint_to_str_prec(u, 16u, prec) } + ty_hex_upper. { istr::to_upper(uint_to_str_prec(u, 16u, prec)) } + ty_bits. { uint_to_str_prec(u, 2u, prec) } + ty_octal. { uint_to_str_prec(u, 8u, prec) } + }; + ret pad(cv, rs, pad_unsigned); + } + fn conv_bool(cv: &conv, b: bool) -> istr { + let s = if b { ~"true" } else { ~"false" }; + // run the boolean conversion through the string conversion logic, + // giving it the same rules for precision, etc. + + ret conv_str(cv, s); + } + fn conv_char(cv: &conv, c: char) -> istr { + ret pad(cv, istr::from_char(c), pad_nozero); + } + fn conv_str(cv: &conv, s: &istr) -> istr { + // For strings, precision is the maximum characters + // displayed + + // FIXME: substr works on bytes, not chars! + let unpadded = + alt cv.precision { + count_implied. { s } + count_is(max) { + if max as uint < istr::char_len(s) { + istr::substr(s, 0u, max as uint) + } else { s } + } + }; + ret pad(cv, unpadded, pad_nozero); + } + + // Convert an int to string with minimum number of digits. If precision is + // 0 and num is 0 then the result is the empty string. + fn int_to_str_prec(num: int, radix: uint, prec: uint) -> istr { + ret if num < 0 { + ~"-" + uint_to_str_prec(-num as uint, radix, prec) + } else { uint_to_str_prec(num as uint, radix, prec) }; + } + + // Convert a uint to string with a minimum number of digits. If precision + // is 0 and num is 0 then the result is the empty string. Could move this + // to uint: but it doesn't seem all that useful. + fn uint_to_str_prec(num: uint, radix: uint, prec: uint) -> istr { + ret if prec == 0u && num == 0u { + ~"" + } else { + let s = uint::to_str(num, radix); + let len = istr::char_len(s); + if len < prec { + let diff = prec - len; + let pad = str_init_elt('0', diff); + pad + s + } else { s } + }; + } + fn get_int_precision(cv: &conv) -> uint { + ret alt cv.precision { + count_is(c) { c as uint } + count_implied. { 1u } + }; + } + + // FIXME: This might be useful in str: but needs to be utf8 safe first + fn str_init_elt(c: char, n_elts: uint) -> istr { + let svec = vec::init_elt::(c as u8, n_elts); + + ret istr::unsafe_from_bytes(svec); + } + tag pad_mode { pad_signed; pad_unsigned; pad_nozero; } + fn pad(cv: &conv, s: &istr, mode: pad_mode) -> istr { + let uwidth; + alt cv.width { + count_implied. { ret s; } + count_is(width) { + // FIXME: Maybe width should be uint + + uwidth = width as uint; + } + } + let strlen = istr::char_len(s); + if uwidth <= strlen { ret s; } + let padchar = ' '; + let diff = uwidth - strlen; + if have_flag(cv.flags, flag_left_justify) { + let padstr = str_init_elt(padchar, diff); + ret s + padstr; + } + let might_zero_pad = false; + let signed = false; + alt mode { + pad_nozero. { + // fallthrough + + } + pad_signed. { might_zero_pad = true; signed = true; } + pad_unsigned. { might_zero_pad = true; } + } + fn have_precision(cv: &conv) -> bool { + ret alt cv.precision { count_implied. { false } _ { true } }; + } + let zero_padding = false; + if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) && + !have_precision(cv) { + padchar = '0'; + zero_padding = true; + } + let padstr = str_init_elt(padchar, diff); + // This is completely heinous. If we have a signed value then + // potentially rip apart the intermediate result and insert some + // zeros. It may make sense to convert zero padding to a precision + // instead. + + if signed && zero_padding && istr::byte_len(s) > 0u { + let head = s[0]; + if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { + let headstr = istr::unsafe_from_bytes([head]); + let bytelen = istr::byte_len(s); + let numpart = istr::substr(s, 1u, bytelen - 1u); + ret headstr + padstr + numpart; + } + } + ret padstr + s; + } + fn have_flag(flags: &[flag], f: flag) -> bool { + for candidate: flag in flags { if candidate == f { ret true; } } + ret false; + } +} +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; +// End: diff --git a/src/lib/std.rc b/src/lib/std.rc index ffbf26b5928..d39f5004af1 100644 --- a/src/lib/std.rc +++ b/src/lib/std.rc @@ -86,6 +86,7 @@ mod sha1; mod ebml; mod ufind; mod extfmt; +mod extifmt; mod box; mod getopts; mod time; diff --git a/src/test/run-pass/syntax-extension-fmt.rs b/src/test/run-pass/syntax-extension-fmt.rs index c241a68e7f1..9e103adcf60 100644 --- a/src/test/run-pass/syntax-extension-fmt.rs +++ b/src/test/run-pass/syntax-extension-fmt.rs @@ -1,17 +1,17 @@ use std; -import std::str; +import std::istr; -fn test(actual: str, expected: str) { +fn test(actual: &istr, expected: &istr) { log actual; log expected; - assert (str::eq(actual, expected)); + assert (istr::eq(actual, expected)); } fn main() { - test(#fmt["hello %d friends and %s things", 10, "formatted"], - "hello 10 friends and formatted things"); + test(#ifmt[~"hello %d friends and %s things", 10, ~"formatted"], + ~"hello 10 friends and formatted things"); - test(#fmt["test"], "test"); + test(#ifmt[~"test"], ~"test"); // a quadratic optimization in LLVM (jump-threading) makes this test a // bit slow to compile unless we break it up @@ -26,192 +26,192 @@ fn main() { fn part1() { // Simple tests for types - test(#fmt["%d", 1], "1"); - test(#fmt["%i", 2], "2"); - test(#fmt["%i", -1], "-1"); - test(#fmt["%u", 10u], "10"); - test(#fmt["%s", "test"], "test"); - test(#fmt["%b", true], "true"); - test(#fmt["%b", false], "false"); - test(#fmt["%c", 'A'], "A"); - test(#fmt["%x", 0xff_u], "ff"); - test(#fmt["%X", 0x12ab_u], "12AB"); - test(#fmt["%o", 10u], "12"); - test(#fmt["%t", 0b11010101_u], "11010101"); + test(#ifmt[~"%d", 1], ~"1"); + test(#ifmt[~"%i", 2], ~"2"); + test(#ifmt[~"%i", -1], ~"-1"); + test(#ifmt[~"%u", 10u], ~"10"); + test(#ifmt[~"%s", ~"test"], ~"test"); + test(#ifmt[~"%b", true], ~"true"); + test(#ifmt[~"%b", false], ~"false"); + test(#ifmt[~"%c", 'A'], ~"A"); + test(#ifmt[~"%x", 0xff_u], ~"ff"); + test(#ifmt[~"%X", 0x12ab_u], ~"12AB"); + test(#ifmt[~"%o", 10u], ~"12"); + test(#ifmt[~"%t", 0b11010101_u], ~"11010101"); // 32-bit limits - test(#fmt["%i", -2147483648], "-2147483648"); - test(#fmt["%i", 2147483647], "2147483647"); - test(#fmt["%u", 4294967295u], "4294967295"); - test(#fmt["%x", 0xffffffff_u], "ffffffff"); - test(#fmt["%o", 0xffffffff_u], "37777777777"); - test(#fmt["%t", 0xffffffff_u], "11111111111111111111111111111111"); + test(#ifmt[~"%i", -2147483648], ~"-2147483648"); + test(#ifmt[~"%i", 2147483647], ~"2147483647"); + test(#ifmt[~"%u", 4294967295u], ~"4294967295"); + test(#ifmt[~"%x", 0xffffffff_u], ~"ffffffff"); + test(#ifmt[~"%o", 0xffffffff_u], ~"37777777777"); + test(#ifmt[~"%t", 0xffffffff_u], ~"11111111111111111111111111111111"); } fn part2() { // Widths - test(#fmt["%1d", 500], "500"); - test(#fmt["%10d", 500], " 500"); - test(#fmt["%10d", -500], " -500"); - test(#fmt["%10u", 500u], " 500"); - test(#fmt["%10s", "test"], " test"); - test(#fmt["%10b", true], " true"); - test(#fmt["%10x", 0xff_u], " ff"); - test(#fmt["%10X", 0xff_u], " FF"); - test(#fmt["%10o", 10u], " 12"); - test(#fmt["%10t", 0xff_u], " 11111111"); - test(#fmt["%10c", 'A'], " A"); + test(#ifmt[~"%1d", 500], ~"500"); + test(#ifmt[~"%10d", 500], ~" 500"); + test(#ifmt[~"%10d", -500], ~" -500"); + test(#ifmt[~"%10u", 500u], ~" 500"); + test(#ifmt[~"%10s", ~"test"], ~" test"); + test(#ifmt[~"%10b", true], ~" true"); + test(#ifmt[~"%10x", 0xff_u], ~" ff"); + test(#ifmt[~"%10X", 0xff_u], ~" FF"); + test(#ifmt[~"%10o", 10u], ~" 12"); + test(#ifmt[~"%10t", 0xff_u], ~" 11111111"); + test(#ifmt[~"%10c", 'A'], ~" A"); // Left justify - test(#fmt["%-10d", 500], "500 "); - test(#fmt["%-10d", -500], "-500 "); - test(#fmt["%-10u", 500u], "500 "); - test(#fmt["%-10s", "test"], "test "); - test(#fmt["%-10b", true], "true "); - test(#fmt["%-10x", 0xff_u], "ff "); - test(#fmt["%-10X", 0xff_u], "FF "); - test(#fmt["%-10o", 10u], "12 "); - test(#fmt["%-10t", 0xff_u], "11111111 "); - test(#fmt["%-10c", 'A'], "A "); + test(#ifmt[~"%-10d", 500], ~"500 "); + test(#ifmt[~"%-10d", -500], ~"-500 "); + test(#ifmt[~"%-10u", 500u], ~"500 "); + test(#ifmt[~"%-10s", ~"test"], ~"test "); + test(#ifmt[~"%-10b", true], ~"true "); + test(#ifmt[~"%-10x", 0xff_u], ~"ff "); + test(#ifmt[~"%-10X", 0xff_u], ~"FF "); + test(#ifmt[~"%-10o", 10u], ~"12 "); + test(#ifmt[~"%-10t", 0xff_u], ~"11111111 "); + test(#ifmt[~"%-10c", 'A'], ~"A "); } fn part3() { // Precision - test(#fmt["%.d", 0], ""); - test(#fmt["%.u", 0u], ""); - test(#fmt["%.x", 0u], ""); - test(#fmt["%.t", 0u], ""); - test(#fmt["%.d", 10], "10"); - test(#fmt["%.d", -10], "-10"); - test(#fmt["%.u", 10u], "10"); - test(#fmt["%.s", "test"], ""); - test(#fmt["%.x", 127u], "7f"); - test(#fmt["%.o", 10u], "12"); - test(#fmt["%.t", 3u], "11"); - test(#fmt["%.c", 'A'], "A"); - test(#fmt["%.0d", 0], ""); - test(#fmt["%.0u", 0u], ""); - test(#fmt["%.0x", 0u], ""); - test(#fmt["%.0t", 0u], ""); - test(#fmt["%.0d", 10], "10"); - test(#fmt["%.0d", -10], "-10"); - test(#fmt["%.0u", 10u], "10"); - test(#fmt["%.0s", "test"], ""); - test(#fmt["%.0x", 127u], "7f"); - test(#fmt["%.0o", 10u], "12"); - test(#fmt["%.0t", 3u], "11"); - test(#fmt["%.0c", 'A'], "A"); - test(#fmt["%.1d", 0], "0"); - test(#fmt["%.1u", 0u], "0"); - test(#fmt["%.1x", 0u], "0"); - test(#fmt["%.1t", 0u], "0"); - test(#fmt["%.1d", 10], "10"); - test(#fmt["%.1d", -10], "-10"); - test(#fmt["%.1u", 10u], "10"); - test(#fmt["%.1s", "test"], "t"); - test(#fmt["%.1x", 127u], "7f"); - test(#fmt["%.1o", 10u], "12"); - test(#fmt["%.1t", 3u], "11"); - test(#fmt["%.1c", 'A'], "A"); + test(#ifmt[~"%.d", 0], ~""); + test(#ifmt[~"%.u", 0u], ~""); + test(#ifmt[~"%.x", 0u], ~""); + test(#ifmt[~"%.t", 0u], ~""); + test(#ifmt[~"%.d", 10], ~"10"); + test(#ifmt[~"%.d", -10], ~"-10"); + test(#ifmt[~"%.u", 10u], ~"10"); + test(#ifmt[~"%.s", ~"test"], ~""); + test(#ifmt[~"%.x", 127u], ~"7f"); + test(#ifmt[~"%.o", 10u], ~"12"); + test(#ifmt[~"%.t", 3u], ~"11"); + test(#ifmt[~"%.c", 'A'], ~"A"); + test(#ifmt[~"%.0d", 0], ~""); + test(#ifmt[~"%.0u", 0u], ~""); + test(#ifmt[~"%.0x", 0u], ~""); + test(#ifmt[~"%.0t", 0u], ~""); + test(#ifmt[~"%.0d", 10], ~"10"); + test(#ifmt[~"%.0d", -10], ~"-10"); + test(#ifmt[~"%.0u", 10u], ~"10"); + test(#ifmt[~"%.0s", ~"test"], ~""); + test(#ifmt[~"%.0x", 127u], ~"7f"); + test(#ifmt[~"%.0o", 10u], ~"12"); + test(#ifmt[~"%.0t", 3u], ~"11"); + test(#ifmt[~"%.0c", 'A'], ~"A"); + test(#ifmt[~"%.1d", 0], ~"0"); + test(#ifmt[~"%.1u", 0u], ~"0"); + test(#ifmt[~"%.1x", 0u], ~"0"); + test(#ifmt[~"%.1t", 0u], ~"0"); + test(#ifmt[~"%.1d", 10], ~"10"); + test(#ifmt[~"%.1d", -10], ~"-10"); + test(#ifmt[~"%.1u", 10u], ~"10"); + test(#ifmt[~"%.1s", ~"test"], ~"t"); + test(#ifmt[~"%.1x", 127u], ~"7f"); + test(#ifmt[~"%.1o", 10u], ~"12"); + test(#ifmt[~"%.1t", 3u], ~"11"); + test(#ifmt[~"%.1c", 'A'], ~"A"); } fn part4() { - test(#fmt["%.5d", 0], "00000"); - test(#fmt["%.5u", 0u], "00000"); - test(#fmt["%.5x", 0u], "00000"); - test(#fmt["%.5t", 0u], "00000"); - test(#fmt["%.5d", 10], "00010"); - test(#fmt["%.5d", -10], "-00010"); - test(#fmt["%.5u", 10u], "00010"); - test(#fmt["%.5s", "test"], "test"); - test(#fmt["%.5x", 127u], "0007f"); - test(#fmt["%.5o", 10u], "00012"); - test(#fmt["%.5t", 3u], "00011"); - test(#fmt["%.5c", 'A'], "A"); + test(#ifmt[~"%.5d", 0], ~"00000"); + test(#ifmt[~"%.5u", 0u], ~"00000"); + test(#ifmt[~"%.5x", 0u], ~"00000"); + test(#ifmt[~"%.5t", 0u], ~"00000"); + test(#ifmt[~"%.5d", 10], ~"00010"); + test(#ifmt[~"%.5d", -10], ~"-00010"); + test(#ifmt[~"%.5u", 10u], ~"00010"); + test(#ifmt[~"%.5s", ~"test"], ~"test"); + test(#ifmt[~"%.5x", 127u], ~"0007f"); + test(#ifmt[~"%.5o", 10u], ~"00012"); + test(#ifmt[~"%.5t", 3u], ~"00011"); + test(#ifmt[~"%.5c", 'A'], ~"A"); // Bool precision. I'm not sure if it's good or bad to have bool // conversions support precision - it's not standard printf so we // can do whatever. For now I'm making it behave the same as string // conversions. - test(#fmt["%.b", true], ""); - test(#fmt["%.0b", true], ""); - test(#fmt["%.1b", true], "t"); + test(#ifmt[~"%.b", true], ~""); + test(#ifmt[~"%.0b", true], ~""); + test(#ifmt[~"%.1b", true], ~"t"); } fn part5() { // Explicit + sign. Only for signed conversions - test(#fmt["%+d", 0], "+0"); - test(#fmt["%+d", 1], "+1"); - test(#fmt["%+d", -1], "-1"); + test(#ifmt[~"%+d", 0], ~"+0"); + test(#ifmt[~"%+d", 1], ~"+1"); + test(#ifmt[~"%+d", -1], ~"-1"); // Leave space for sign - test(#fmt["% d", 0], " 0"); - test(#fmt["% d", 1], " 1"); - test(#fmt["% d", -1], "-1"); + test(#ifmt[~"% d", 0], ~" 0"); + test(#ifmt[~"% d", 1], ~" 1"); + test(#ifmt[~"% d", -1], ~"-1"); // Plus overrides space - test(#fmt["% +d", 0], "+0"); - test(#fmt["%+ d", 0], "+0"); + test(#ifmt[~"% +d", 0], ~"+0"); + test(#ifmt[~"%+ d", 0], ~"+0"); // 0-padding - test(#fmt["%05d", 0], "00000"); - test(#fmt["%05d", 1], "00001"); - test(#fmt["%05d", -1], "-0001"); - test(#fmt["%05u", 1u], "00001"); - test(#fmt["%05x", 127u], "0007f"); - test(#fmt["%05X", 127u], "0007F"); - test(#fmt["%05o", 10u], "00012"); - test(#fmt["%05t", 3u], "00011"); + test(#ifmt[~"%05d", 0], ~"00000"); + test(#ifmt[~"%05d", 1], ~"00001"); + test(#ifmt[~"%05d", -1], ~"-0001"); + test(#ifmt[~"%05u", 1u], ~"00001"); + test(#ifmt[~"%05x", 127u], ~"0007f"); + test(#ifmt[~"%05X", 127u], ~"0007F"); + test(#ifmt[~"%05o", 10u], ~"00012"); + test(#ifmt[~"%05t", 3u], ~"00011"); // 0-padding a string is undefined but glibc does this: - test(#fmt["%05s", "test"], " test"); - test(#fmt["%05c", 'A'], " A"); - test(#fmt["%05b", true], " true"); + test(#ifmt[~"%05s", ~"test"], ~" test"); + test(#ifmt[~"%05c", 'A'], ~" A"); + test(#ifmt[~"%05b", true], ~" true"); // Left-justify overrides 0-padding - test(#fmt["%-05d", 0], "0 "); - test(#fmt["%-05d", 1], "1 "); - test(#fmt["%-05d", -1], "-1 "); - test(#fmt["%-05u", 1u], "1 "); - test(#fmt["%-05x", 127u], "7f "); - test(#fmt["%-05X", 127u], "7F "); - test(#fmt["%-05o", 10u], "12 "); - test(#fmt["%-05t", 3u], "11 "); - test(#fmt["%-05s", "test"], "test "); - test(#fmt["%-05c", 'A'], "A "); - test(#fmt["%-05b", true], "true "); + test(#ifmt[~"%-05d", 0], ~"0 "); + test(#ifmt[~"%-05d", 1], ~"1 "); + test(#ifmt[~"%-05d", -1], ~"-1 "); + test(#ifmt[~"%-05u", 1u], ~"1 "); + test(#ifmt[~"%-05x", 127u], ~"7f "); + test(#ifmt[~"%-05X", 127u], ~"7F "); + test(#ifmt[~"%-05o", 10u], ~"12 "); + test(#ifmt[~"%-05t", 3u], ~"11 "); + test(#ifmt[~"%-05s", ~"test"], ~"test "); + test(#ifmt[~"%-05c", 'A'], ~"A "); + test(#ifmt[~"%-05b", true], ~"true "); } fn part6() { // Precision overrides 0-padding - test(#fmt["%06.5d", 0], " 00000"); - test(#fmt["%06.5u", 0u], " 00000"); - test(#fmt["%06.5x", 0u], " 00000"); - test(#fmt["%06.5d", 10], " 00010"); - test(#fmt["%06.5d", -10], "-00010"); - test(#fmt["%06.5u", 10u], " 00010"); - test(#fmt["%06.5s", "test"], " test"); - test(#fmt["%06.5c", 'A'], " A"); - test(#fmt["%06.5x", 127u], " 0007f"); - test(#fmt["%06.5X", 127u], " 0007F"); - test(#fmt["%06.5o", 10u], " 00012"); + test(#ifmt[~"%06.5d", 0], ~" 00000"); + test(#ifmt[~"%06.5u", 0u], ~" 00000"); + test(#ifmt[~"%06.5x", 0u], ~" 00000"); + test(#ifmt[~"%06.5d", 10], ~" 00010"); + test(#ifmt[~"%06.5d", -10], ~"-00010"); + test(#ifmt[~"%06.5u", 10u], ~" 00010"); + test(#ifmt[~"%06.5s", ~"test"], ~" test"); + test(#ifmt[~"%06.5c", 'A'], ~" A"); + test(#ifmt[~"%06.5x", 127u], ~" 0007f"); + test(#ifmt[~"%06.5X", 127u], ~" 0007F"); + test(#ifmt[~"%06.5o", 10u], ~" 00012"); // Signed combinations - test(#fmt["% 5d", 1], " 1"); - test(#fmt["% 5d", -1], " -1"); - test(#fmt["%+5d", 1], " +1"); - test(#fmt["%+5d", -1], " -1"); - test(#fmt["% 05d", 1], " 0001"); - test(#fmt["% 05d", -1], "-0001"); - test(#fmt["%+05d", 1], "+0001"); - test(#fmt["%+05d", -1], "-0001"); - test(#fmt["%- 5d", 1], " 1 "); - test(#fmt["%- 5d", -1], "-1 "); - test(#fmt["%-+5d", 1], "+1 "); - test(#fmt["%-+5d", -1], "-1 "); - test(#fmt["%- 05d", 1], " 1 "); - test(#fmt["%- 05d", -1], "-1 "); - test(#fmt["%-+05d", 1], "+1 "); - test(#fmt["%-+05d", -1], "-1 "); + test(#ifmt[~"% 5d", 1], ~" 1"); + test(#ifmt[~"% 5d", -1], ~" -1"); + test(#ifmt[~"%+5d", 1], ~" +1"); + test(#ifmt[~"%+5d", -1], ~" -1"); + test(#ifmt[~"% 05d", 1], ~" 0001"); + test(#ifmt[~"% 05d", -1], ~"-0001"); + test(#ifmt[~"%+05d", 1], ~"+0001"); + test(#ifmt[~"%+05d", -1], ~"-0001"); + test(#ifmt[~"%- 5d", 1], ~" 1 "); + test(#ifmt[~"%- 5d", -1], ~"-1 "); + test(#ifmt[~"%-+5d", 1], ~"+1 "); + test(#ifmt[~"%-+5d", -1], ~"-1 "); + test(#ifmt[~"%- 05d", 1], ~" 1 "); + test(#ifmt[~"%- 05d", -1], ~"-1 "); + test(#ifmt[~"%-+05d", 1], ~"+1 "); + test(#ifmt[~"%-+05d", -1], ~"-1 "); }