unicode: Make statics legal

The tables in libunicode are far too large to want to be inlined into any other
program, so these tables are all going to remain `static`. For them to be legal,
they cannot reference one another by value, but instead use references now.

This commit also modifies the src/etc/unicode.py script to generate the right
tables.
This commit is contained in:
Alex Crichton 2014-10-06 16:14:38 -07:00
parent 1a433770e3
commit 34d66de52a
3 changed files with 113 additions and 112 deletions

View file

@ -333,14 +333,14 @@ def emit_property_module(f, mod, tbl, emit_fn):
def emit_regex_module(f, cats, w_data):
f.write("pub mod regex {\n")
regex_class = "&'static [(char, char)]"
class_table = "&'static [(&'static str, %s)]" % regex_class
class_table = "&'static [(&'static str, &'static %s)]" % regex_class
emit_table(f, "UNICODE_CLASSES", cats, class_table,
pfun=lambda x: "(\"%s\",super::%s::%s_table)" % (x[0], x[1], x[0]))
pfun=lambda x: "(\"%s\",&super::%s::%s_table)" % (x[0], x[1], x[0]))
f.write(" pub static PERLD: %s = super::general_category::Nd_table;\n\n"
f.write(" pub static PERLD: &'static %s = &super::general_category::Nd_table;\n\n"
% regex_class)
f.write(" pub static PERLS: %s = super::property::White_Space_table;\n\n"
f.write(" pub static PERLS: &'static %s = &super::property::White_Space_table;\n\n"
% regex_class)
emit_table(f, "PERLW", w_data, regex_class)

View file

@ -100,15 +100,15 @@ pub fn compose(a: char, b: char) -> Option<char> {
}
// Constants from Unicode 6.3.0 Section 3.12 Conjoining Jamo Behavior
static S_BASE: u32 = 0xAC00;
static L_BASE: u32 = 0x1100;
static V_BASE: u32 = 0x1161;
static T_BASE: u32 = 0x11A7;
static L_COUNT: u32 = 19;
static V_COUNT: u32 = 21;
static T_COUNT: u32 = 28;
static N_COUNT: u32 = (V_COUNT * T_COUNT);
static S_COUNT: u32 = (L_COUNT * N_COUNT);
const S_BASE: u32 = 0xAC00;
const L_BASE: u32 = 0x1100;
const V_BASE: u32 = 0x1161;
const T_BASE: u32 = 0x11A7;
const L_COUNT: u32 = 19;
const V_COUNT: u32 = 21;
const T_COUNT: u32 = 28;
const N_COUNT: u32 = (V_COUNT * T_COUNT);
const S_COUNT: u32 = (L_COUNT * N_COUNT);
// Decompose a precomposed Hangul syllable
#[inline(always)]

View file

@ -3636,108 +3636,109 @@ pub mod property {
}
pub mod regex {
pub static UNICODE_CLASSES: &'static [(&'static str, &'static [(char, char)])] = &[
("Alphabetic", super::derived_property::Alphabetic_table), ("Arabic",
super::script::Arabic_table), ("Armenian", super::script::Armenian_table), ("Avestan",
super::script::Avestan_table), ("Balinese", super::script::Balinese_table), ("Bamum",
super::script::Bamum_table), ("Bassa_Vah", super::script::Bassa_Vah_table), ("Batak",
super::script::Batak_table), ("Bengali", super::script::Bengali_table), ("Bopomofo",
super::script::Bopomofo_table), ("Brahmi", super::script::Brahmi_table), ("Braille",
super::script::Braille_table), ("Buginese", super::script::Buginese_table), ("Buhid",
super::script::Buhid_table), ("C", super::general_category::C_table),
("Canadian_Aboriginal", super::script::Canadian_Aboriginal_table), ("Carian",
super::script::Carian_table), ("Caucasian_Albanian",
super::script::Caucasian_Albanian_table), ("Cc", super::general_category::Cc_table), ("Cf",
super::general_category::Cf_table), ("Chakma", super::script::Chakma_table), ("Cham",
super::script::Cham_table), ("Cherokee", super::script::Cherokee_table), ("Cn",
super::general_category::Cn_table), ("Co", super::general_category::Co_table), ("Common",
super::script::Common_table), ("Coptic", super::script::Coptic_table), ("Cuneiform",
super::script::Cuneiform_table), ("Cypriot", super::script::Cypriot_table), ("Cyrillic",
super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret",
super::script::Deseret_table), ("Devanagari", super::script::Devanagari_table), ("Duployan",
super::script::Duployan_table), ("Egyptian_Hieroglyphs",
super::script::Egyptian_Hieroglyphs_table), ("Elbasan", super::script::Elbasan_table),
("Ethiopic", super::script::Ethiopic_table), ("Georgian", super::script::Georgian_table),
("Glagolitic", super::script::Glagolitic_table), ("Gothic", super::script::Gothic_table),
("Grantha", super::script::Grantha_table), ("Greek", super::script::Greek_table),
("Gujarati", super::script::Gujarati_table), ("Gurmukhi", super::script::Gurmukhi_table),
("Han", super::script::Han_table), ("Hangul", super::script::Hangul_table), ("Hanunoo",
super::script::Hanunoo_table), ("Hebrew", super::script::Hebrew_table), ("Hiragana",
super::script::Hiragana_table), ("Imperial_Aramaic", super::script::Imperial_Aramaic_table),
("Inherited", super::script::Inherited_table), ("Inscriptional_Pahlavi",
super::script::Inscriptional_Pahlavi_table), ("Inscriptional_Parthian",
super::script::Inscriptional_Parthian_table), ("Javanese", super::script::Javanese_table),
("Join_Control", super::property::Join_Control_table), ("Kaithi",
super::script::Kaithi_table), ("Kannada", super::script::Kannada_table), ("Katakana",
super::script::Katakana_table), ("Kayah_Li", super::script::Kayah_Li_table), ("Kharoshthi",
super::script::Kharoshthi_table), ("Khmer", super::script::Khmer_table), ("Khojki",
super::script::Khojki_table), ("Khudawadi", super::script::Khudawadi_table), ("L",
super::general_category::L_table), ("LC", super::general_category::LC_table), ("Lao",
super::script::Lao_table), ("Latin", super::script::Latin_table), ("Lepcha",
super::script::Lepcha_table), ("Limbu", super::script::Limbu_table), ("Linear_A",
super::script::Linear_A_table), ("Linear_B", super::script::Linear_B_table), ("Lisu",
super::script::Lisu_table), ("Ll", super::general_category::Ll_table), ("Lm",
super::general_category::Lm_table), ("Lo", super::general_category::Lo_table), ("Lowercase",
super::derived_property::Lowercase_table), ("Lt", super::general_category::Lt_table), ("Lu",
super::general_category::Lu_table), ("Lycian", super::script::Lycian_table), ("Lydian",
super::script::Lydian_table), ("M", super::general_category::M_table), ("Mahajani",
super::script::Mahajani_table), ("Malayalam", super::script::Malayalam_table), ("Mandaic",
super::script::Mandaic_table), ("Manichaean", super::script::Manichaean_table), ("Mc",
super::general_category::Mc_table), ("Me", super::general_category::Me_table),
("Meetei_Mayek", super::script::Meetei_Mayek_table), ("Mende_Kikakui",
super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
super::script::Meroitic_Hieroglyphs_table), ("Miao", super::script::Miao_table), ("Mn",
super::general_category::Mn_table), ("Modi", super::script::Modi_table), ("Mongolian",
super::script::Mongolian_table), ("Mro", super::script::Mro_table), ("Myanmar",
super::script::Myanmar_table), ("N", super::general_category::N_table), ("Nabataean",
super::script::Nabataean_table), ("Nd", super::general_category::Nd_table), ("New_Tai_Lue",
super::script::New_Tai_Lue_table), ("Nko", super::script::Nko_table), ("Nl",
super::general_category::Nl_table), ("No", super::general_category::No_table),
("Noncharacter_Code_Point", super::property::Noncharacter_Code_Point_table), ("Ogham",
super::script::Ogham_table), ("Ol_Chiki", super::script::Ol_Chiki_table), ("Old_Italic",
super::script::Old_Italic_table), ("Old_North_Arabian",
super::script::Old_North_Arabian_table), ("Old_Permic", super::script::Old_Permic_table),
("Old_Persian", super::script::Old_Persian_table), ("Old_South_Arabian",
super::script::Old_South_Arabian_table), ("Old_Turkic", super::script::Old_Turkic_table),
("Oriya", super::script::Oriya_table), ("Osmanya", super::script::Osmanya_table), ("P",
super::general_category::P_table), ("Pahawh_Hmong", super::script::Pahawh_Hmong_table),
("Palmyrene", super::script::Palmyrene_table), ("Pau_Cin_Hau",
super::script::Pau_Cin_Hau_table), ("Pc", super::general_category::Pc_table), ("Pd",
super::general_category::Pd_table), ("Pe", super::general_category::Pe_table), ("Pf",
super::general_category::Pf_table), ("Phags_Pa", super::script::Phags_Pa_table),
("Phoenician", super::script::Phoenician_table), ("Pi", super::general_category::Pi_table),
("Po", super::general_category::Po_table), ("Ps", super::general_category::Ps_table),
("Psalter_Pahlavi", super::script::Psalter_Pahlavi_table), ("Rejang",
super::script::Rejang_table), ("Runic", super::script::Runic_table), ("S",
super::general_category::S_table), ("Samaritan", super::script::Samaritan_table),
("Saurashtra", super::script::Saurashtra_table), ("Sc", super::general_category::Sc_table),
("Sharada", super::script::Sharada_table), ("Shavian", super::script::Shavian_table),
("Siddham", super::script::Siddham_table), ("Sinhala", super::script::Sinhala_table), ("Sk",
super::general_category::Sk_table), ("Sm", super::general_category::Sm_table), ("So",
super::general_category::So_table), ("Sora_Sompeng", super::script::Sora_Sompeng_table),
("Sundanese", super::script::Sundanese_table), ("Syloti_Nagri",
super::script::Syloti_Nagri_table), ("Syriac", super::script::Syriac_table), ("Tagalog",
super::script::Tagalog_table), ("Tagbanwa", super::script::Tagbanwa_table), ("Tai_Le",
super::script::Tai_Le_table), ("Tai_Tham", super::script::Tai_Tham_table), ("Tai_Viet",
super::script::Tai_Viet_table), ("Takri", super::script::Takri_table), ("Tamil",
super::script::Tamil_table), ("Telugu", super::script::Telugu_table), ("Thaana",
super::script::Thaana_table), ("Thai", super::script::Thai_table), ("Tibetan",
super::script::Tibetan_table), ("Tifinagh", super::script::Tifinagh_table), ("Tirhuta",
super::script::Tirhuta_table), ("Ugaritic", super::script::Ugaritic_table), ("Uppercase",
super::derived_property::Uppercase_table), ("Vai", super::script::Vai_table),
("Warang_Citi", super::script::Warang_Citi_table), ("White_Space",
super::property::White_Space_table), ("XID_Continue",
super::derived_property::XID_Continue_table), ("XID_Start",
super::derived_property::XID_Start_table), ("Yi", super::script::Yi_table), ("Z",
super::general_category::Z_table), ("Zl", super::general_category::Zl_table), ("Zp",
super::general_category::Zp_table), ("Zs", super::general_category::Zs_table)
pub static UNICODE_CLASSES: &'static [(&'static str, &'static &'static [(char, char)])] = &[
("Alphabetic", &super::derived_property::Alphabetic_table), ("Arabic",
&super::script::Arabic_table), ("Armenian", &super::script::Armenian_table), ("Avestan",
&super::script::Avestan_table), ("Balinese", &super::script::Balinese_table), ("Bamum",
&super::script::Bamum_table), ("Bassa_Vah", &super::script::Bassa_Vah_table), ("Batak",
&super::script::Batak_table), ("Bengali", &super::script::Bengali_table), ("Bopomofo",
&super::script::Bopomofo_table), ("Brahmi", &super::script::Brahmi_table), ("Braille",
&super::script::Braille_table), ("Buginese", &super::script::Buginese_table), ("Buhid",
&super::script::Buhid_table), ("C", &super::general_category::C_table),
("Canadian_Aboriginal", &super::script::Canadian_Aboriginal_table), ("Carian",
&super::script::Carian_table), ("Caucasian_Albanian",
&super::script::Caucasian_Albanian_table), ("Cc", &super::general_category::Cc_table),
("Cf", &super::general_category::Cf_table), ("Chakma", &super::script::Chakma_table),
("Cham", &super::script::Cham_table), ("Cherokee", &super::script::Cherokee_table), ("Cn",
&super::general_category::Cn_table), ("Co", &super::general_category::Co_table), ("Common",
&super::script::Common_table), ("Coptic", &super::script::Coptic_table), ("Cuneiform",
&super::script::Cuneiform_table), ("Cypriot", &super::script::Cypriot_table), ("Cyrillic",
&super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
&super::derived_property::Default_Ignorable_Code_Point_table), ("Deseret",
&super::script::Deseret_table), ("Devanagari", &super::script::Devanagari_table),
("Duployan", &super::script::Duployan_table), ("Egyptian_Hieroglyphs",
&super::script::Egyptian_Hieroglyphs_table), ("Elbasan", &super::script::Elbasan_table),
("Ethiopic", &super::script::Ethiopic_table), ("Georgian", &super::script::Georgian_table),
("Glagolitic", &super::script::Glagolitic_table), ("Gothic", &super::script::Gothic_table),
("Grantha", &super::script::Grantha_table), ("Greek", &super::script::Greek_table),
("Gujarati", &super::script::Gujarati_table), ("Gurmukhi", &super::script::Gurmukhi_table),
("Han", &super::script::Han_table), ("Hangul", &super::script::Hangul_table), ("Hanunoo",
&super::script::Hanunoo_table), ("Hebrew", &super::script::Hebrew_table), ("Hiragana",
&super::script::Hiragana_table), ("Imperial_Aramaic",
&super::script::Imperial_Aramaic_table), ("Inherited", &super::script::Inherited_table),
("Inscriptional_Pahlavi", &super::script::Inscriptional_Pahlavi_table),
("Inscriptional_Parthian", &super::script::Inscriptional_Parthian_table), ("Javanese",
&super::script::Javanese_table), ("Join_Control", &super::property::Join_Control_table),
("Kaithi", &super::script::Kaithi_table), ("Kannada", &super::script::Kannada_table),
("Katakana", &super::script::Katakana_table), ("Kayah_Li", &super::script::Kayah_Li_table),
("Kharoshthi", &super::script::Kharoshthi_table), ("Khmer", &super::script::Khmer_table),
("Khojki", &super::script::Khojki_table), ("Khudawadi", &super::script::Khudawadi_table),
("L", &super::general_category::L_table), ("LC", &super::general_category::LC_table),
("Lao", &super::script::Lao_table), ("Latin", &super::script::Latin_table), ("Lepcha",
&super::script::Lepcha_table), ("Limbu", &super::script::Limbu_table), ("Linear_A",
&super::script::Linear_A_table), ("Linear_B", &super::script::Linear_B_table), ("Lisu",
&super::script::Lisu_table), ("Ll", &super::general_category::Ll_table), ("Lm",
&super::general_category::Lm_table), ("Lo", &super::general_category::Lo_table),
("Lowercase", &super::derived_property::Lowercase_table), ("Lt",
&super::general_category::Lt_table), ("Lu", &super::general_category::Lu_table), ("Lycian",
&super::script::Lycian_table), ("Lydian", &super::script::Lydian_table), ("M",
&super::general_category::M_table), ("Mahajani", &super::script::Mahajani_table),
("Malayalam", &super::script::Malayalam_table), ("Mandaic", &super::script::Mandaic_table),
("Manichaean", &super::script::Manichaean_table), ("Mc",
&super::general_category::Mc_table), ("Me", &super::general_category::Me_table),
("Meetei_Mayek", &super::script::Meetei_Mayek_table), ("Mende_Kikakui",
&super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
&super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
&super::script::Meroitic_Hieroglyphs_table), ("Miao", &super::script::Miao_table), ("Mn",
&super::general_category::Mn_table), ("Modi", &super::script::Modi_table), ("Mongolian",
&super::script::Mongolian_table), ("Mro", &super::script::Mro_table), ("Myanmar",
&super::script::Myanmar_table), ("N", &super::general_category::N_table), ("Nabataean",
&super::script::Nabataean_table), ("Nd", &super::general_category::Nd_table),
("New_Tai_Lue", &super::script::New_Tai_Lue_table), ("Nko", &super::script::Nko_table),
("Nl", &super::general_category::Nl_table), ("No", &super::general_category::No_table),
("Noncharacter_Code_Point", &super::property::Noncharacter_Code_Point_table), ("Ogham",
&super::script::Ogham_table), ("Ol_Chiki", &super::script::Ol_Chiki_table), ("Old_Italic",
&super::script::Old_Italic_table), ("Old_North_Arabian",
&super::script::Old_North_Arabian_table), ("Old_Permic", &super::script::Old_Permic_table),
("Old_Persian", &super::script::Old_Persian_table), ("Old_South_Arabian",
&super::script::Old_South_Arabian_table), ("Old_Turkic", &super::script::Old_Turkic_table),
("Oriya", &super::script::Oriya_table), ("Osmanya", &super::script::Osmanya_table), ("P",
&super::general_category::P_table), ("Pahawh_Hmong", &super::script::Pahawh_Hmong_table),
("Palmyrene", &super::script::Palmyrene_table), ("Pau_Cin_Hau",
&super::script::Pau_Cin_Hau_table), ("Pc", &super::general_category::Pc_table), ("Pd",
&super::general_category::Pd_table), ("Pe", &super::general_category::Pe_table), ("Pf",
&super::general_category::Pf_table), ("Phags_Pa", &super::script::Phags_Pa_table),
("Phoenician", &super::script::Phoenician_table), ("Pi",
&super::general_category::Pi_table), ("Po", &super::general_category::Po_table), ("Ps",
&super::general_category::Ps_table), ("Psalter_Pahlavi",
&super::script::Psalter_Pahlavi_table), ("Rejang", &super::script::Rejang_table), ("Runic",
&super::script::Runic_table), ("S", &super::general_category::S_table), ("Samaritan",
&super::script::Samaritan_table), ("Saurashtra", &super::script::Saurashtra_table), ("Sc",
&super::general_category::Sc_table), ("Sharada", &super::script::Sharada_table), ("Shavian",
&super::script::Shavian_table), ("Siddham", &super::script::Siddham_table), ("Sinhala",
&super::script::Sinhala_table), ("Sk", &super::general_category::Sk_table), ("Sm",
&super::general_category::Sm_table), ("So", &super::general_category::So_table),
("Sora_Sompeng", &super::script::Sora_Sompeng_table), ("Sundanese",
&super::script::Sundanese_table), ("Syloti_Nagri", &super::script::Syloti_Nagri_table),
("Syriac", &super::script::Syriac_table), ("Tagalog", &super::script::Tagalog_table),
("Tagbanwa", &super::script::Tagbanwa_table), ("Tai_Le", &super::script::Tai_Le_table),
("Tai_Tham", &super::script::Tai_Tham_table), ("Tai_Viet", &super::script::Tai_Viet_table),
("Takri", &super::script::Takri_table), ("Tamil", &super::script::Tamil_table), ("Telugu",
&super::script::Telugu_table), ("Thaana", &super::script::Thaana_table), ("Thai",
&super::script::Thai_table), ("Tibetan", &super::script::Tibetan_table), ("Tifinagh",
&super::script::Tifinagh_table), ("Tirhuta", &super::script::Tirhuta_table), ("Ugaritic",
&super::script::Ugaritic_table), ("Uppercase", &super::derived_property::Uppercase_table),
("Vai", &super::script::Vai_table), ("Warang_Citi", &super::script::Warang_Citi_table),
("White_Space", &super::property::White_Space_table), ("XID_Continue",
&super::derived_property::XID_Continue_table), ("XID_Start",
&super::derived_property::XID_Start_table), ("Yi", &super::script::Yi_table), ("Z",
&super::general_category::Z_table), ("Zl", &super::general_category::Zl_table), ("Zp",
&super::general_category::Zp_table), ("Zs", &super::general_category::Zs_table)
];
pub static PERLD: &'static [(char, char)] = super::general_category::Nd_table;
pub static PERLD: &'static &'static [(char, char)] = &super::general_category::Nd_table;
pub static PERLS: &'static [(char, char)] = super::property::White_Space_table;
pub static PERLS: &'static &'static [(char, char)] = &super::property::White_Space_table;
pub static PERLW: &'static [(char, char)] = &[
('\x30', '\x39'), ('\x41', '\x5a'), ('\x5f', '\x5f'), ('\x61', '\x7a'), ('\xaa', '\xaa'),