rust/src/lib/extfmt.rs



/* The 'fmt' extension is modeled on the posix printf system.
 *
 * A posix conversion ostensibly looks like this:
 *
 * %[parameter][flags][width][.precision][length]type
 *
 * Given the different numeric type bestiary we have, we omit the 'length'
 * parameter and support slightly different conversions for 'type':
 *
 * %[parameter][flags][width][.precision]type
 *
 * we also only support translating-to-rust a tiny subset of the possible
 * combinations at the moment.
 */
import option::none;
import option::some;


/*
 * We have a 'ct' (compile-time) module that parses format strings into a
 * sequence of conversions. From those conversions AST fragments are built
 * that call into properly-typed functions in the 'rt' (run-time) module.
 * Each of those run-time conversion functions accepts another conversion
 * description that specifies how to format its output.
 *
 * The building of the AST is currently done in a module inside the compiler,
 * but should migrate over here as the plugin interface is defined.
 */

// Functions used by the fmt extension at compile time
mod ct {
    tag signedness { signed; unsigned; }
    tag caseness { case_upper; case_lower; }
    tag ty {
        ty_bool;
        ty_str;
        ty_char;
        ty_int(signedness);
        ty_bits;
        ty_hex(caseness);
        ty_octal;
        // FIXME: More types

    }
    tag flag {
        flag_left_justify;
        flag_left_zero_pad;
        flag_space_for_sign;
        flag_sign_always;
        flag_alternate;
    }
    tag count {
        count_is(int);
        count_is_param(int);
        count_is_next_param;
        count_implied;
    }

    // A formatted conversion from an expression to a string
    type conv =
        rec(option::t[int] param,
            vec[flag] flags,
            count width,
            count precision,
            ty ty);


    // A fragment of the output sequence
    tag piece { piece_string(str); piece_conv(conv); }
    type error_fn = fn(str) -> !  ;

    fn parse_fmt_string(str s, error_fn error) -> vec[piece] {
        let vec[piece] pieces = [];
        auto lim = str::byte_len(s);
        auto buf = "";
        fn flush_buf(str buf, &mutable vec[piece] pieces) -> str {
            if (str::byte_len(buf) > 0u) {
                auto piece = piece_string(buf);
                pieces += [piece];
            }
            ret "";
        }
        auto i = 0u;
        while (i < lim) {
            auto curr = str::substr(s, i, 1u);
            if (str::eq(curr, "%")) {
                i += 1u;
                if (i >= lim) {
                    error("unterminated conversion at end of string");
                }
                auto curr2 = str::substr(s, i, 1u);
                if (str::eq(curr2, "%")) {
                    i += 1u;
                } else {
                    buf = flush_buf(buf, pieces);
                    auto rs = parse_conversion(s, i, lim, error);
                    pieces += [rs.piece];
                    i = rs.next;
                }
            } else { buf += curr; i += 1u; }
        }
        buf = flush_buf(buf, pieces);
        ret pieces;
    }
    fn peek_num(str s, uint i, uint lim)
        -> option::t[rec(uint num, uint next)] {
        if (i >= lim) { ret none; }
        auto c = s.(i);
        if (!('0' as u8 <= c && c <= '9' as u8)) {
            ret option::none;
        }
        auto n = c - ('0' as u8) as uint;
        ret alt (peek_num(s, i + 1u, lim)) {
                case (none) { some(rec(num=n, next=i + 1u)) }
                case (some(?next)) {
                    auto m = next.num;
                    auto j = next.next;
                    some(rec(num=n * 10u + m, next=j))
                }
            };
    }
    fn parse_conversion(str s, uint i, uint lim, error_fn error)
        -> rec(piece piece, uint next) {
        auto parm = parse_parameter(s, i, lim);
        auto flags = parse_flags(s, parm.next, lim);
        auto width = parse_count(s, flags.next, lim);
        auto prec = parse_precision(s, width.next, lim);
        auto ty = parse_type(s, prec.next, lim, error);
        ret rec(piece=piece_conv(rec(param=parm.param,
                                     flags=flags.flags,
                                     width=width.count,
                                     precision=prec.count,
                                     ty=ty.ty)),
                next=ty.next);
    }
    fn parse_parameter(str s, uint i, uint lim)
        -> rec(option::t[int] param, uint next) {
        if (i >= lim) { ret rec(param=none, next=i); }
        auto num = peek_num(s, i, lim);
        ret alt (num) {
                case (none) { rec(param=none, next=i) }
                case (some(?t)) {
                    auto n = t.num;
                    auto j = t.next;
                    if (j < lim && s.(j) == '$' as u8) {
                        rec(param=some(n as int), next=j + 1u)
                    } else { rec(param=none, next=i) }
                }
            };
    }
    fn parse_flags(str s, uint i, uint lim)
        -> rec(vec[flag] flags, uint next) {
        let vec[flag] noflags = [];
        if (i >= lim) { ret rec(flags=noflags, next=i); }
        fn more_(flag f, str s, uint i, uint lim)
            -> rec(vec[flag] flags, uint next) {
            auto next = parse_flags(s, i + 1u, lim);
            auto rest = next.flags;
            auto j = next.next;
            let vec[flag] curr = [f];
            ret rec(flags=curr + rest, next=j);
        }
        auto more = bind more_(_, s, i, lim);
        auto f = s.(i);
        ret if (f == '-' as u8) {
                more(flag_left_justify)
            } else if (f == '0' as u8) {
                more(flag_left_zero_pad)
            } else if (f == ' ' as u8) {
                more(flag_space_for_sign)
            } else if (f == '+' as u8) {
                more(flag_sign_always)
            } else if (f == '#' as u8) {
                more(flag_alternate)
            } else { rec(flags=noflags, next=i) };
    }
    fn parse_count(str s, uint i, uint lim)
        -> rec(count count, uint next) {
        ret if (i >= lim) {
                rec(count=count_implied, next=i)
            } else if (s.(i) == '*' as u8) {
                auto param = parse_parameter(s, i + 1u, lim);
                auto j = param.next;
                alt (param.param) {
                    case (none) { rec(count=count_is_next_param, next=j) }
                    case (some(?n)) { rec(count=count_is_param(n), next=j) }
                }
            } else {
                auto num = peek_num(s, i, lim);
                alt (num) {
                    case (none) { rec(count=count_implied, next=i) }
                    case (some(?num)) { rec(count=count_is(num.num as int),
                                            next=num.next) }
                }
            };
    }
    fn parse_precision(str s, uint i, uint lim)
        -> rec(count count, uint next) {
        ret if (i >= lim) {
                rec(count=count_implied, next=i)
            } else if (s.(i) == '.' as u8) {
                auto count = parse_count(s, i + 1u, lim);

                // If there were no digits specified, i.e. the precision
                // was ".", then the precision is 0
                alt (count.count) {
                    case (count_implied) { rec(count=count_is(0),
                                               next=count.next) }
                    case (_) { count }
                }
            } else { rec(count=count_implied, next=i) };
    }
    fn parse_type(str s, uint i, uint lim, error_fn error)
        -> rec(ty ty, uint next) {
        if (i >= lim) { error("missing type in conversion"); }
        auto tstr = str::substr(s, i, 1u);
        auto t =
            if (str::eq(tstr, "b")) {
                ty_bool
            } else if (str::eq(tstr, "s")) {
                ty_str
            } else if (str::eq(tstr, "c")) {
                ty_char
            } else if (str::eq(tstr, "d") || str::eq(tstr, "i")) {

                // TODO: Do we really want two signed types here?
                // How important is it to be printf compatible?
                ty_int(signed)
            } else if (str::eq(tstr, "u")) {
                ty_int(unsigned)
            } else if (str::eq(tstr, "x")) {
                ty_hex(case_lower)
            } else if (str::eq(tstr, "X")) {
                ty_hex(case_upper)
            } else if (str::eq(tstr, "t")) {
                ty_bits
            } else if (str::eq(tstr, "o")) {
                ty_octal
            } else { error("unknown type in conversion: " + tstr) };
        ret rec(ty=t, next=i + 1u);
    }
}


// Functions used by the fmt extension at runtime. For now there are a lot of
// decisions made a runtime. If it proves worthwhile then some of these
// conditions can be evaluated at compile-time. For now though it's cleaner to
// implement it this way, I think.
mod rt {
    tag flag {
        flag_left_justify;
        flag_left_zero_pad;
        flag_space_for_sign;
        flag_sign_always;
        flag_alternate;

        // FIXME: This is a hack to avoid creating 0-length vec exprs,
        // which have some difficulty typechecking currently. See
        // comments in front::extfmt::make_flags
        flag_none;
    }
    tag count { count_is(int); count_implied; }
    tag ty { ty_default; ty_bits; ty_hex_upper; ty_hex_lower; ty_octal; }

    // FIXME: May not want to use a vector here for flags;
    // instead just use a bool per flag
    type conv = rec(vec[flag] flags, count width, count precision, ty ty);

    fn conv_int(&conv cv, int i) -> str {
        auto radix = 10u;
        auto prec = get_int_precision(cv);
        auto s = int_to_str_prec(i, radix, prec);
        if (0 <= i) {
            if (have_flag(cv.flags, flag_sign_always)) {
                s = "+" + s;
            } else if (have_flag(cv.flags, flag_space_for_sign)) {
                s = " " + s;
            }
        }
        ret pad(cv, s, pad_signed);
    }
    fn conv_uint(&conv cv, uint u) -> str {
        auto prec = get_int_precision(cv);
        auto rs =
            alt (cv.ty) {
                case (ty_default) { uint_to_str_prec(u, 10u, prec) }
                case (ty_hex_lower) { uint_to_str_prec(u, 16u, prec) }
                case (ty_hex_upper) {
                    str::to_upper(uint_to_str_prec(u, 16u, prec))
                }
                case (ty_bits) { uint_to_str_prec(u, 2u, prec) }
                case (ty_octal) { uint_to_str_prec(u, 8u, prec) }
            };
        ret pad(cv, rs, pad_unsigned);
    }
    fn conv_bool(&conv cv, bool b) -> str {
        auto s = if (b) { "true" } else { "false" };
        // run the boolean conversion through the string conversion logic,
        // giving it the same rules for precision, etc.

        ret conv_str(cv, s);
    }
    fn conv_char(&conv cv, char c) -> str {
        ret pad(cv, str::from_char(c), pad_nozero);
    }
    fn conv_str(&conv cv, str s) -> str {
        auto unpadded =
            alt (cv.precision) {
                case (count_implied) { s }
                case (count_is(?max)) {

                    // For strings, precision is the maximum characters
                    // displayed
                    if (max as uint < str::char_len(s)) {

                        // FIXME: substr works on bytes, not chars!
                        str::substr(s, 0u, max as uint)
                    } else { s }
                }
            };
        ret pad(cv, unpadded, pad_nozero);
    }

    // Convert an int to string with minimum number of digits. If precision is
    // 0 and num is 0 then the result is the empty string.
    fn int_to_str_prec(int num, uint radix, uint prec) -> str {
        ret if (num < 0) {
                "-" + uint_to_str_prec(-num as uint, radix, prec)
            } else { uint_to_str_prec(num as uint, radix, prec) };
    }

    // Convert a uint to string with a minimum number of digits.  If precision
    // is 0 and num is 0 then the result is the empty string. Could move this
    // to uint: but it doesn't seem all that useful.
    fn uint_to_str_prec(uint num, uint radix, uint prec) -> str {
        ret if (prec == 0u && num == 0u) {
                ""
            } else {
                auto s = uint::to_str(num, radix);
                auto len = str::char_len(s);
                if (len < prec) {
                    auto diff = prec - len;
                    auto pad = str_init_elt('0', diff);
                    pad + s
                } else { s }
            };
    }
    fn get_int_precision(&conv cv) -> uint {
        ret alt (cv.precision) {
                case (count_is(?c)) { c as uint }
                case (count_implied) { 1u }
            };
    }

    // FIXME: This might be useful in str: but needs to be utf8 safe first
    fn str_init_elt(char c, uint n_elts) -> str {
        auto svec = vec::init_elt[u8](c as u8, n_elts);

        ret str::from_bytes(svec);
    }
    tag pad_mode { pad_signed; pad_unsigned; pad_nozero; }
    fn pad(&conv cv, str s, pad_mode mode) -> str {
        auto uwidth;
        alt (cv.width) {
            case (count_implied) { ret s; }
            case (count_is(?width)) {
                // FIXME: Maybe width should be uint

                uwidth = width as uint;
            }
        }
        auto strlen = str::char_len(s);
        if (uwidth <= strlen) { ret s; }
        auto padchar = ' ';
        auto diff = uwidth - strlen;
        if (have_flag(cv.flags, flag_left_justify)) {
            auto padstr = str_init_elt(padchar, diff);
            ret s + padstr;
        }
        auto might_zero_pad = false;
        auto signed = false;
        alt (mode) {
            case (pad_nozero) {
                // fallthrough

            }
            case (pad_signed) { might_zero_pad = true; signed = true; }
            case (pad_unsigned) { might_zero_pad = true; }
        }
        fn have_precision(&conv cv) -> bool {
            ret alt (cv.precision) {
                    case (count_implied) { false }
                    case (_) { true }
                };
        }
        auto zero_padding = false;
        if (might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) &&
                !have_precision(cv)) {
            padchar = '0';
            zero_padding = true;
        }
        auto padstr = str_init_elt(padchar, diff);
        // This is completely heinous. If we have a signed value then
        // potentially rip apart the intermediate result and insert some
        // zeros. It may make sense to convert zero padding to a precision
        // instead.

        if (signed && zero_padding && str::byte_len(s) > 0u) {
            auto head = s.(0);
            if (head == '+' as u8 || head == '-' as u8 || head == ' ' as u8) {
                auto headstr = str::unsafe_from_bytes([head]);
                auto bytelen = str::byte_len(s);
                auto numpart = str::substr(s, 1u, bytelen - 1u);
                ret headstr + padstr + numpart;
            }
        }
        ret padstr + s;
    }
    fn have_flag(vec[flag] flags, flag f) -> bool {
        for (flag candidate in flags) { if (candidate == f) { ret true; } }
        ret false;
    }
}
// Local Variables:
// mode: rust;
// fill-column: 78;
// indent-tabs-mode: nil
// c-basic-offset: 4
// buffer-file-coding-system: utf-8-unix
// compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
// End: