Implement formatting arguments for strings and integers

Closes #1651
This commit is contained in:
Alex Crichton 2013-08-10 00:28:47 -07:00
parent 44675ac6af
commit b820748ff5
6 changed files with 295 additions and 105 deletions

View file

@ -20,6 +20,15 @@ use unicode::{derived_property, general_category};
#[cfg(not(test))] use cmp::{Eq, Ord};
#[cfg(not(test))] use num::Zero;
// UTF-8 ranges and tags for encoding characters
static TAG_CONT: uint = 128u;
static MAX_ONE_B: uint = 128u;
static TAG_TWO_B: uint = 192u;
static MAX_TWO_B: uint = 2048u;
static TAG_THREE_B: uint = 224u;
static MAX_THREE_B: uint = 65536u;
static TAG_FOUR_B: uint = 240u;
/*
Lu Uppercase_Letter an uppercase letter
Ll Lowercase_Letter a lowercase letter
@ -278,6 +287,12 @@ pub trait Char {
fn escape_unicode(&self, f: &fn(char));
fn escape_default(&self, f: &fn(char));
fn len_utf8_bytes(&self) -> uint;
/// Encodes this character as utf-8 into the provided byte-buffer. The
/// buffer must be at least 4 bytes long or a runtime failure will occur.
///
/// This will then return the number of characters written to the slice.
fn encode_utf8(&self, dst: &mut [u8]) -> uint;
}
impl Char for char {
@ -308,6 +323,29 @@ impl Char for char {
fn escape_default(&self, f: &fn(char)) { escape_default(*self, f) }
fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
let code = *self as uint;
if code < MAX_ONE_B {
dst[0] = code as u8;
return 1;
} else if code < MAX_TWO_B {
dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
dst[1] = (code & 63u | TAG_CONT) as u8;
return 2;
} else if code < MAX_THREE_B {
dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
dst[2] = (code & 63u | TAG_CONT) as u8;
return 3;
} else {
dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
dst[3] = (code & 63u | TAG_CONT) as u8;
return 4;
}
}
}
#[cfg(not(test))]

View file

@ -11,7 +11,7 @@
use prelude::*;
use cast;
use int;
use char::Char;
use rt::io::Decorator;
use rt::io::mem::MemWriter;
use rt::io;
@ -122,6 +122,11 @@ pub unsafe fn sprintf(fmt: &[rt::Piece], args: &[Argument]) -> ~str {
}
impl<'self> Formatter<'self> {
// First up is the collection of functions used to execute a format string
// at runtime. This consumes all of the compile-time statics generated by
// the ifmt! syntax extension.
fn run(&mut self, piece: &rt::Piece, cur: Option<&str>) {
let setcount = |slot: &mut Option<uint>, cnt: &parse::Count| {
match *cnt {
@ -240,6 +245,118 @@ impl<'self> Formatter<'self> {
}
}
}
// Helper methods used for padding and processing formatting arguments that
// all formatting traits can use.
/// TODO: dox
pub fn pad_integral(&mut self, s: &[u8], alternate_prefix: &str,
positive: bool) {
use fmt::parse::{FlagAlternate, FlagSignPlus};
let mut actual_len = s.len();
if self.flags & 1 << (FlagAlternate as uint) != 0 {
actual_len += alternate_prefix.len();
}
if self.flags & 1 << (FlagSignPlus as uint) != 0 {
actual_len += 1;
}
if !positive {
actual_len += 1;
}
let emit = |this: &mut Formatter| {
if this.flags & 1 << (FlagSignPlus as uint) != 0 && positive {
this.buf.write(['+' as u8]);
} else if !positive {
this.buf.write(['-' as u8]);
}
if this.flags & 1 << (FlagAlternate as uint) != 0 {
this.buf.write(alternate_prefix.as_bytes());
}
this.buf.write(s);
};
match self.width {
None => { emit(self) }
Some(min) if actual_len >= min => { emit(self) }
Some(min) => {
do self.with_padding(min - actual_len) |me| {
emit(me);
}
}
}
}
/// This function takes a string slice and emits it to the internal buffer
/// after applying the relevant formatting flags specified. The flags
/// recognized for generic strings are:
///
/// * width - the minimum width of what to emit
/// * fill/alignleft - what to emit and where to emit it if the string
/// provided needs to be padded
/// * precision - the maximum length to emit, the string is truncated if it
/// is longer than this length
///
/// Notably this function ignored the `flag` parameters
pub fn pad(&mut self, s: &str) {
// Make sure there's a fast path up front
if self.width.is_none() && self.precision.is_none() {
self.buf.write(s.as_bytes());
return
}
// The `precision` field can be interpreted as a `max-width` for the
// string being formatted
match self.precision {
Some(max) => {
// If there's a maximum width and our string is longer than
// that, then we must always have truncation. This is the only
// case where the maximum length will matter.
let char_len = s.char_len();
if char_len >= max {
let nchars = uint::min(max, char_len);
self.buf.write(s.slice_chars(0, nchars).as_bytes());
return
}
}
None => {}
}
// The `width` field is more of a `min-width` parameter at this point.
match self.width {
// If we're under the maximum length, and there's no minimum length
// requirements, then we can just emit the string
None => { self.buf.write(s.as_bytes()) }
// If we're under the maximum width, check if we're over the minimum
// width, if so it's as easy as just emitting the string.
Some(width) if s.char_len() >= width => {
self.buf.write(s.as_bytes())
}
// If we're under both the maximum and the minimum width, then fill
// up the minimum width with the specified string + some alignment.
Some(width) => {
do self.with_padding(width - s.len()) |me| {
me.buf.write(s.as_bytes());
}
}
}
}
fn with_padding(&mut self, padding: uint, f: &fn(&mut Formatter)) {
if self.alignleft {
f(self);
}
let mut fill = [0u8, ..4];
let len = self.fill.encode_utf8(fill);
for _ in range(0, padding) {
self.buf.write(fill.slice_to(len));
}
if !self.alignleft {
f(self);
}
}
}
/// This is a function which calls are emitted to by the compiler itself to
@ -279,60 +396,53 @@ impl Bool for bool {
impl<'self> String for &'self str {
fn fmt(s: & &'self str, f: &mut Formatter) {
// XXX: formatting args
f.buf.write(s.as_bytes())
f.pad(*s);
}
}
impl Char for char {
fn fmt(c: &char, f: &mut Formatter) {
// XXX: formatting args
// XXX: shouldn't require an allocation
let mut s = ~"";
s.push_char(*c);
f.buf.write(s.as_bytes());
let mut utf8 = [0u8, ..4];
let amt = c.encode_utf8(utf8);
let s: &str = unsafe { cast::transmute(utf8.slice_to(amt)) };
String::fmt(&s, f);
}
}
impl Signed for int {
fn fmt(c: &int, f: &mut Formatter) {
// XXX: formatting args
do int::to_str_bytes(*c, 10) |buf| {
f.buf.write(buf);
do uint::to_str_bytes(c.abs() as uint, 10) |buf| {
f.pad_integral(buf, "", *c >= 0);
}
}
}
impl Unsigned for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 10) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "", true);
}
}
}
impl Octal for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 8) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "0o", true);
}
}
}
impl LowerHex for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 16) |buf| {
f.buf.write(buf);
f.pad_integral(buf, "0x", true);
}
}
}
impl UpperHex for uint {
fn fmt(c: &uint, f: &mut Formatter) {
// XXX: formatting args
do uint::to_str_bytes(*c, 16) |buf| {
let mut local = [0u8, ..16];
for (l, &b) in local.mut_iter().zip(buf.iter()) {
@ -341,16 +451,29 @@ impl UpperHex for uint {
_ => b,
};
}
f.buf.write(local.slice_to(buf.len()));
f.pad_integral(local.slice_to(buf.len()), "0x", true);
}
}
}
impl<T> Poly for T {
fn fmt(t: &T, f: &mut Formatter) {
// XXX: formatting args
let s = sys::log_str(t);
f.buf.write(s.as_bytes());
match (f.width, f.precision) {
(None, None) => {
// XXX: sys::log_str should have a variant which takes a stream
// and we should directly call that (avoids unnecessary
// allocations)
let s = sys::log_str(t);
f.buf.write(s.as_bytes());
}
// If we have a specified width for formatting, then we have to make
// this allocation of a new string
_ => {
let s = sys::log_str(t);
f.pad(s);
}
}
}
}

View file

@ -33,6 +33,7 @@ use ptr;
use ptr::RawPtr;
use to_str::ToStr;
use uint;
use unstable::raw::{Repr, Slice};
use vec;
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
@ -758,15 +759,7 @@ macro_rules! utf8_acc_cont_byte(
($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
)
// UTF-8 tags and ranges
static TAG_CONT_U8: u8 = 128u8;
static TAG_CONT: uint = 128u;
static MAX_ONE_B: uint = 128u;
static TAG_TWO_B: uint = 192u;
static MAX_TWO_B: uint = 2048u;
static TAG_THREE_B: uint = 224u;
static MAX_THREE_B: uint = 65536u;
static TAG_FOUR_B: uint = 240u;
static MAX_UNICODE: uint = 1114112u;
/// Unsafe operations
@ -1988,40 +1981,18 @@ impl OwnedStr for ~str {
#[inline]
fn push_char(&mut self, c: char) {
assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
let cur_len = self.len();
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
unsafe {
let code = c as uint;
let nb = if code < MAX_ONE_B { 1u }
else if code < MAX_TWO_B { 2u }
else if code < MAX_THREE_B { 3u }
else { 4u };
let len = self.len();
let new_len = len + nb;
self.reserve_at_least(new_len);
let off = len as int;
do self.as_mut_buf |buf, _len| {
match nb {
1u => {
*ptr::mut_offset(buf, off) = code as u8;
}
2u => {
*ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
}
3u => {
*ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
}
4u => {
*ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
*ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
*ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
}
_ => {}
}
}
raw::set_len(self, new_len);
let v = self.repr();
let len = c.encode_utf8(cast::transmute(Slice {
data: ((&(*v).data) as *u8).offset(cur_len as int),
len: 4,
}));
raw::set_len(self, cur_len + len);
}
}

View file

@ -56,6 +56,7 @@ impl<'self, T> Repr<Slice<T>> for &'self [T] {}
impl<'self> Repr<Slice<u8>> for &'self str {}
impl<T> Repr<*Box<T>> for @T {}
impl<T> Repr<*Box<Vec<T>>> for @[T] {}
impl Repr<*String> for ~str {}
// sure would be nice to have this
// impl<T> Repr<*Vec<T>> for ~[T] {}

View file

@ -127,7 +127,13 @@ impl Context {
}
}
parse::Argument(ref arg) => {
// argument first (it's first in the format string)
// width/precision first, if they have implicit positional
// parameters it makes more sense to consume them first.
self.verify_count(arg.format.width);
self.verify_count(arg.format.precision);
// argument second, if it's an implicit positional parameter
// it's written second, so it should come after width/precision.
let pos = match arg.position {
parse::ArgumentNext => {
let i = self.next_arg;
@ -144,10 +150,6 @@ impl Context {
} else { Known(arg.format.ty.to_managed()) };
self.verify_arg_type(pos, ty);
// width/precision next
self.verify_count(arg.format.width);
self.verify_count(arg.format.precision);
// and finally the method being applied
match arg.method {
None => {}

View file

@ -22,50 +22,105 @@ impl fmt::Signed for B {
}
pub fn main() {
fn t(a: ~str, b: &str) { assert_eq!(a, b.to_owned()); }
macro_rules! t(($a:expr, $b:expr) => { assert_eq!($a, $b.to_owned()) })
// Make sure there's a poly formatter that takes anything
t(ifmt!("{}", 1), "1");
t(ifmt!("{}", A), "{}");
t(ifmt!("{}", ()), "()");
t(ifmt!("{}", @(~1, "foo")), "@(~1, \"foo\")");
t!(ifmt!("{}", 1), "1");
t!(ifmt!("{}", A), "{}");
t!(ifmt!("{}", ()), "()");
t!(ifmt!("{}", @(~1, "foo")), "@(~1, \"foo\")");
// Various edge cases without formats
t(ifmt!(""), "");
t(ifmt!("hello"), "hello");
t(ifmt!("hello \\{"), "hello {");
t!(ifmt!(""), "");
t!(ifmt!("hello"), "hello");
t!(ifmt!("hello \\{"), "hello {");
// At least exercise all the formats
t(ifmt!("{:b}", true), "true");
t(ifmt!("{:c}", '☃'), "");
t(ifmt!("{:d}", 10), "10");
t(ifmt!("{:i}", 10), "10");
t(ifmt!("{:u}", 10u), "10");
t(ifmt!("{:o}", 10u), "12");
t(ifmt!("{:x}", 10u), "a");
t(ifmt!("{:X}", 10u), "A");
t(ifmt!("{:s}", "foo"), "foo");
t(ifmt!("{:p}", 0x1234 as *int), "0x1234");
t(ifmt!("{:p}", 0x1234 as *mut int), "0x1234");
t(ifmt!("{:d}", A), "aloha");
t(ifmt!("{:d}", B), "adios");
t(ifmt!("foo {:s} ☃☃☃☃☃☃", "bar"), "foo bar ☃☃☃☃☃☃");
t(ifmt!("{1} {0}", 0, 1), "1 0");
t(ifmt!("{foo} {bar}", foo=0, bar=1), "0 1");
t(ifmt!("{foo} {1} {bar} {0}", 0, 1, foo=2, bar=3), "2 1 3 0");
t(ifmt!("{} {0:s}", "a"), "a a");
t(ifmt!("{} {0}", "a"), "\"a\" \"a\"");
t!(ifmt!("{:b}", true), "true");
t!(ifmt!("{:c}", '☃'), "");
t!(ifmt!("{:d}", 10), "10");
t!(ifmt!("{:i}", 10), "10");
t!(ifmt!("{:u}", 10u), "10");
t!(ifmt!("{:o}", 10u), "12");
t!(ifmt!("{:x}", 10u), "a");
t!(ifmt!("{:X}", 10u), "A");
t!(ifmt!("{:s}", "foo"), "foo");
t!(ifmt!("{:p}", 0x1234 as *int), "0x1234");
t!(ifmt!("{:p}", 0x1234 as *mut int), "0x1234");
t!(ifmt!("{:d}", A), "aloha");
t!(ifmt!("{:d}", B), "adios");
t!(ifmt!("foo {:s} ☃☃☃☃☃☃", "bar"), "foo bar ☃☃☃☃☃☃");
t!(ifmt!("{1} {0}", 0, 1), "1 0");
t!(ifmt!("{foo} {bar}", foo=0, bar=1), "0 1");
t!(ifmt!("{foo} {1} {bar} {0}", 0, 1, foo=2, bar=3), "2 1 3 0");
t!(ifmt!("{} {0:s}", "a"), "a a");
t!(ifmt!("{} {0}", "a"), "\"a\" \"a\"");
// Methods should probably work
t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 0u), "c0");
t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 1u), "a1");
t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 2u), "b2");
t(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 3u), "d3");
t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "a"), "aa");
t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "b"), "bb");
t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "c"), "cc");
t(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "d"), "dd");
t(ifmt!("{1, select, a{#{0:s}} other{#{1}}}", "b", "a"), "ab");
t(ifmt!("{1, select, a{#{0}} other{#{1}}}", "c", "b"), "bb");
t!(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 0u), "c0");
t!(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 1u), "a1");
t!(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 2u), "b2");
t!(ifmt!("{0, plural, =1{a#} =2{b#} zero{c#} other{d#}}", 3u), "d3");
t!(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "a"), "aa");
t!(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "b"), "bb");
t!(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "c"), "cc");
t!(ifmt!("{0, select, a{a#} b{b#} c{c#} other{d#}}", "d"), "dd");
t!(ifmt!("{1, select, a{#{0:s}} other{#{1}}}", "b", "a"), "ab");
t!(ifmt!("{1, select, a{#{0}} other{#{1}}}", "c", "b"), "bb");
// Formatting strings and their arguments
t!(ifmt!("{:s}", "a"), "a");
t!(ifmt!("{:4s}", "a"), "a ");
t!(ifmt!("{:>4s}", "a"), " a");
t!(ifmt!("{:<4s}", "a"), "a ");
t!(ifmt!("{:.4s}", "a"), "a");
t!(ifmt!("{:4.4s}", "a"), "a ");
t!(ifmt!("{:4.4s}", "aaaaaaaaaaaaaaaaaa"), "aaaa");
t!(ifmt!("{:<4.4s}", "aaaaaaaaaaaaaaaaaa"), "aaaa");
t!(ifmt!("{:>4.4s}", "aaaaaaaaaaaaaaaaaa"), "aaaa");
t!(ifmt!("{:>10.4s}", "aaaaaaaaaaaaaaaaaa"), "aaaa");
t!(ifmt!("{:2.4s}", "aaaaa"), "aaaa");
t!(ifmt!("{:2.4s}", "aaaa"), "aaaa");
t!(ifmt!("{:2.4s}", "aaa"), "aaa");
t!(ifmt!("{:2.4s}", "aa"), "aa");
t!(ifmt!("{:2.4s}", "a"), "a ");
t!(ifmt!("{:0>2s}", "a"), "0a");
t!(ifmt!("{:.*s}", 4, "aaaaaaaaaaaaaaaaaa"), "aaaa");
t!(ifmt!("{:.1$s}", "aaaaaaaaaaaaaaaaaa", 4), "aaaa");
t!(ifmt!("{:1$s}", "a", 4), "a ");
t!(ifmt!("{:-#s}", "a"), "a");
t!(ifmt!("{:+#s}", "a"), "a");
// Precision overrides 0-padding
// FIXME #2481: Recent gcc's report some of these as warnings
/*t!(ifmt!("{:0>6.5d}", 0), ~" 00000");*/
/*t!(ifmt!("{:0>6.5u}", 0u), ~" 00000");*/
/*t!(ifmt!("{:0>6.5x}", 0u), ~" 00000");*/
/*t!(ifmt!("{:0>6.5d}", 10), ~" 00010");*/
/*t!(ifmt!("{:0>6.5d}", -10), ~"-00010");*/
/*t!(ifmt!("{:0>6.5u}", 10u), ~" 00010");*/
/*t!(ifmt!("{:0>6.5s}", ~"t!"), ~" t!");*/
/*t!(ifmt!("{:0>6.5c}", 'A'), ~" A");*/
/*t!(ifmt!("{:0>6.5x}", 127u), ~" 0007f");*/
/*t!(ifmt!("{:0>6.5X}", 127u), ~" 0007F");*/
/*t!(ifmt!("{:0>6.5o}", 10u), ~" 00012");*/
// Signed combinations
/*t!(ifmt!("{:5d}", 1), ~" 1");*/
/*t!(ifmt!("{: >5d}", -1), ~" -1");*/
/*t!(ifmt!("{:+5d}", 1), ~" +1");*/
/*t!(ifmt!("{:+5d}", -1), ~" -1");*/
/*t!(ifmt!("{:0>5d}", 1), ~" 0001");*/
/*t!(ifmt!("{:0>5d}", -1), ~"-0001");*/
/*t!(ifmt!("{:0>+5d}", 1), ~"+0001");*/
/*t!(ifmt!("{:0>+5d}", -1), ~"-0001");*/
/*t!(ifmt!("%- 5d", 1), ~" 1 ");*/
/*t!(ifmt!("%- 5d", -1), ~"-1 ");*/
/*t!(ifmt!("%-+5d", 1), ~"+1 ");*/
/*t!(ifmt!("%-+5d", -1), ~"-1 ");*/
/*t!(ifmt!("%- 05d", 1), ~" 1 ");*/
/*t!(ifmt!("%- 05d", -1), ~"-1 ");*/
/*t!(ifmt!("%-+05d", 1), ~"+1 ");*/
/*t!(ifmt!("%-+05d", -1), ~"-1 ");*/
}