use core::char::from_u32; #[test] fn test_is_ascii() { assert!(b"".is_ascii()); assert!(b"banana\0\x7F".is_ascii()); assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii())); assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii()); assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii())); assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii())); assert!("".is_ascii()); assert!("banana\0\u{7F}".is_ascii()); assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii())); assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii())); assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii())); } #[test] fn test_to_ascii_uppercase() { assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL"); assert_eq!("hıKß".to_ascii_uppercase(), "HıKß"); for i in 0..501 { let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } else { i }; assert_eq!( (from_u32(i).unwrap()).to_string().to_ascii_uppercase(), (from_u32(upper).unwrap()).to_string() ); } } #[test] fn test_to_ascii_lowercase() { assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl"); // Dotted capital I, Kelvin sign, Sharp S. assert_eq!("HİKß".to_ascii_lowercase(), "hİKß"); for i in 0..501 { let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i }; assert_eq!( (from_u32(i).unwrap()).to_string().to_ascii_lowercase(), (from_u32(lower).unwrap()).to_string() ); } } #[test] fn test_make_ascii_lower_case() { macro_rules! test { ($from: expr, $to: expr) => {{ let mut x = $from; x.make_ascii_lowercase(); assert_eq!(x, $to); }}; } test!(b'A', b'a'); test!(b'a', b'a'); test!(b'!', b'!'); test!('A', 'a'); test!('À', 'À'); test!('a', 'a'); test!('!', '!'); test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89"); test!("HİKß".to_string(), "hİKß"); } #[test] fn test_make_ascii_upper_case() { macro_rules! test { ($from: expr, $to: expr) => {{ let mut x = $from; x.make_ascii_uppercase(); assert_eq!(x, $to); }}; } test!(b'a', b'A'); test!(b'A', b'A'); test!(b'!', b'!'); test!('a', 'A'); test!('à', 'à'); test!('A', 'A'); test!('!', '!'); test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9"); test!("hıKß".to_string(), "HıKß"); let mut x = "Hello".to_string(); x[..3].make_ascii_uppercase(); // Test IndexMut on String. assert_eq!(x, "HELlo") } #[test] fn test_eq_ignore_ascii_case() { assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl")); assert!(!"Ürl".eq_ignore_ascii_case("ürl")); // Dotted capital I, Kelvin sign, Sharp S. assert!("HİKß".eq_ignore_ascii_case("hİKß")); assert!(!"İ".eq_ignore_ascii_case("i")); assert!(!"K".eq_ignore_ascii_case("k")); assert!(!"ß".eq_ignore_ascii_case("s")); for i in 0..501 { let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } else { i }; assert!( (from_u32(i).unwrap()) .to_string() .eq_ignore_ascii_case(&from_u32(lower).unwrap().to_string()) ); } } #[test] fn inference_works() { let x = "a".to_string(); x.eq_ignore_ascii_case("A"); } // Shorthands used by the is_ascii_* tests. macro_rules! assert_all { ($what:ident, $($str:tt),+) => {{ $( for b in $str.chars() { if !b.$what() { panic!("expected {}({}) but it isn't", stringify!($what), b); } } for b in $str.as_bytes().iter() { if !b.$what() { panic!("expected {}(0x{:02x})) but it isn't", stringify!($what), b); } } )+ }}; ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+)) } macro_rules! assert_none { ($what:ident, $($str:tt),+) => {{ $( for b in $str.chars() { if b.$what() { panic!("expected not-{}({}) but it is", stringify!($what), b); } } for b in $str.as_bytes().iter() { if b.$what() { panic!("expected not-{}(0x{:02x})) but it is", stringify!($what), b); } } )+ }}; ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+)) } #[test] fn test_is_ascii_alphabetic() { assert_all!( is_ascii_alphabetic, "", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", ); assert_none!( is_ascii_alphabetic, "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_uppercase() { assert_all!(is_ascii_uppercase, "", "ABCDEFGHIJKLMNOQPRSTUVWXYZ",); assert_none!( is_ascii_uppercase, "abcdefghijklmnopqrstuvwxyz", "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_lowercase() { assert_all!(is_ascii_lowercase, "abcdefghijklmnopqrstuvwxyz",); assert_none!( is_ascii_lowercase, "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_alphanumeric() { assert_all!( is_ascii_alphanumeric, "", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", ); assert_none!( is_ascii_alphanumeric, "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_digit() { assert_all!(is_ascii_digit, "", "0123456789",); assert_none!( is_ascii_digit, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_hexdigit() { assert_all!(is_ascii_hexdigit, "", "0123456789", "abcdefABCDEF",); assert_none!( is_ascii_hexdigit, "ghijklmnopqrstuvwxyz", "GHIJKLMNOQPRSTUVWXYZ", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_punctuation() { assert_all!(is_ascii_punctuation, "", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",); assert_none!( is_ascii_punctuation, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_graphic() { assert_all!( is_ascii_graphic, "", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", ); assert_none!( is_ascii_graphic, " \t\n\x0c\r", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_whitespace() { assert_all!(is_ascii_whitespace, "", " \t\n\x0c\r",); assert_none!( is_ascii_whitespace, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x0b\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); } #[test] fn test_is_ascii_control() { assert_all!( is_ascii_control, "", "\x00\x01\x02\x03\x04\x05\x06\x07", "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", "\x10\x11\x12\x13\x14\x15\x16\x17", "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", "\x7f", ); assert_none!( is_ascii_control, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOQPRSTUVWXYZ", "0123456789", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", " ", ); } // `is_ascii` does a good amount of pointer manipulation and has // alignment-dependent computation. This is all sanity-checked via // `debug_assert!`s, so we test various sizes/alignments thoroughly versus an // "obviously correct" baseline function. #[test] fn test_is_ascii_align_size_thoroughly() { // The "obviously-correct" baseline mentioned above. fn is_ascii_baseline(s: &[u8]) -> bool { s.iter().all(|b| b.is_ascii()) } // Helper to repeat `l` copies of `b0` followed by `l` copies of `b1`. fn repeat_concat(b0: u8, b1: u8, l: usize) -> Vec { use core::iter::repeat; repeat(b0).take(l).chain(repeat(b1).take(l)).collect() } // Miri is too slow for much of this, and in miri `align_offset` always // returns `usize::max_value()` anyway (at the moment), so we just test // lightly. let iter = if cfg!(miri) { 0..5 } else { 0..100 }; for i in iter { #[cfg(not(miri))] let cases = &[ b"a".repeat(i), b"\0".repeat(i), b"\x7f".repeat(i), b"\x80".repeat(i), b"\xff".repeat(i), repeat_concat(b'a', 0x80u8, i), repeat_concat(0x80u8, b'a', i), ]; #[cfg(miri)] let cases = &[repeat_concat(b'a', 0x80u8, i)]; for case in cases { for pos in 0..=case.len() { // Potentially misaligned head let prefix = &case[pos..]; assert_eq!(is_ascii_baseline(prefix), prefix.is_ascii(),); // Potentially misaligned tail let suffix = &case[..case.len() - pos]; assert_eq!(is_ascii_baseline(suffix), suffix.is_ascii(),); // Both head and tail are potentially misaligned let mid = &case[(pos / 2)..(case.len() - (pos / 2))]; assert_eq!(is_ascii_baseline(mid), mid.is_ascii(),); } } } }