From 8c465b4efab9dec55434193acacf0a10e83c3ae0 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 May 2019 11:45:56 +1000 Subject: [PATCH 1/5] Add `InternedString::with2`. This lets comparisons occur with a single access to the interner, instead of two. --- src/libsyntax_pos/symbol.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 20759217b54..107cf8360c4 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -725,6 +725,15 @@ impl InternedString { unsafe { f(&*str) } } + fn with2 R, R>(self, other: &InternedString, f: F) -> R { + let (self_str, other_str) = with_interner(|interner| { + (interner.get(self.symbol) as *const str, + interner.get(other.symbol) as *const str) + }); + // This is safe for the same reason that `with` is safe. + unsafe { f(&*self_str, &*other_str) } + } + pub fn as_symbol(self) -> Symbol { self.symbol } @@ -745,7 +754,7 @@ impl PartialOrd for InternedString { if self.symbol == other.symbol { return Some(Ordering::Equal); } - self.with(|self_str| other.with(|other_str| self_str.partial_cmp(other_str))) + self.with2(other, |self_str, other_str| self_str.partial_cmp(other_str)) } } @@ -754,7 +763,7 @@ impl Ord for InternedString { if self.symbol == other.symbol { return Ordering::Equal; } - self.with(|self_str| other.with(|other_str| self_str.cmp(&other_str))) + self.with2(other, |self_str, other_str| self_str.cmp(other_str)) } } From c2cae7bbc3880ec2874b32756c7c5dc52997a552 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 May 2019 11:54:47 +1000 Subject: [PATCH 2/5] Avoid recursion in de-gensym functions. --- src/libsyntax_pos/symbol.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 107cf8360c4..e8472e1b9f1 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -492,7 +492,7 @@ impl Interner { if (symbol.0.as_usize()) < self.strings.len() { symbol } else { - self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]) + self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize] } } @@ -513,7 +513,10 @@ impl Interner { pub fn get(&self, symbol: Symbol) -> &str { match self.strings.get(symbol.0.as_usize()) { Some(string) => string, - None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]), + None => { + let symbol = self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]; + self.strings[symbol.0.as_usize()] + } } } } From 0e27c36145594dcf3f00707543f11f591a63ec95 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 May 2019 12:09:57 +1000 Subject: [PATCH 3/5] Add various comments. Lots of details I wish I'd known when I first looked at this code. --- src/libsyntax_pos/symbol.rs | 52 ++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index e8472e1b9f1..ee4a581123f 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -344,9 +344,22 @@ impl Decodable for Ident { } } -/// A symbol is an interned or gensymed string. The use of `newtype_index!` means -/// that `Option` only takes up 4 bytes, because `newtype_index!` reserves -/// the last 256 values for tagging purposes. +/// A symbol is an interned or gensymed string. A gensym is a symbol that is +/// never equal to any other symbol. E.g.: +/// ``` +/// assert_eq!(Symbol::intern("x"), Symbol::intern("x")) +/// assert_ne!(Symbol::gensym("x"), Symbol::intern("x")) +/// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x")) +/// ``` +/// Conceptually, a gensym can be thought of as a normal symbol with an +/// invisible unique suffix. Gensyms are useful when creating new identifiers +/// that must not match any existing identifiers, e.g. during macro expansion +/// and syntax desugaring. +/// +/// Internally, a Symbol is implemented as an index, and all operations +/// (including hashing, equality, and ordering) operate on that index. The use +/// of `newtype_index!` means that `Option` only takes up 4 bytes, +/// because `newtype_index!` reserves the last 256 values for tagging purposes. /// /// Note that `Symbol` cannot directly be a `newtype_index!` because it implements /// `fmt::Debug`, `Encodable`, and `Decodable` in special ways. @@ -380,6 +393,7 @@ impl Symbol { with_interner(|interner| interner.gensymed(self)) } + // WARNING: this function is deprecated and will be removed in the future. pub fn is_gensymed(self) -> bool { with_interner(|interner| interner.is_gensymed(self)) } @@ -510,6 +524,8 @@ impl Interner { symbol.0.as_usize() >= self.strings.len() } + // Get the symbol as a string. `Symbol::as_str()` should be used in + // preference to this function. pub fn get(&self, symbol: Symbol) -> &str { match self.strings.get(symbol.0.as_usize()) { Some(string) => string, @@ -614,11 +630,17 @@ fn with_interner T>(f: F) -> T { GLOBALS.with(|globals| f(&mut *globals.symbol_interner.lock())) } -/// Represents a string stored in the interner. Because the interner outlives any thread -/// which uses this type, we can safely treat `string` which points to interner data, -/// as an immortal string, as long as this type never crosses between threads. -// FIXME: ensure that the interner outlives any thread which uses `LocalInternedString`, -// by creating a new thread right after constructing the interner. +/// An alternative to `Symbol` and `InternedString`, useful when the chars +/// within the symbol need to be accessed. It is best used for temporary +/// values. +/// +/// Because the interner outlives any thread which uses this type, we can +/// safely treat `string` which points to interner data, as an immortal string, +/// as long as this type never crosses between threads. +// +// FIXME: ensure that the interner outlives any thread which uses +// `LocalInternedString`, by creating a new thread right after constructing the +// interner. #[derive(Clone, Copy, Hash, PartialOrd, Eq, Ord)] pub struct LocalInternedString { string: &'static str, @@ -711,7 +733,19 @@ impl Encodable for LocalInternedString { } } -/// Represents a string stored in the string interner. +/// An alternative to `Symbol` that is focused on string contents. It has two +/// main differences to `Symbol`. +/// +/// First, its implementations of `Hash`, `PartialOrd` and `Ord` work with the +/// string chars rather than the symbol integer. This is useful when hash +/// stability is required across compile sessions, or a guaranteed sort +/// ordering is required. +/// +/// Second, gensym-ness is irrelevant. E.g.: +/// ``` +/// assert_ne!(Symbol::gensym("x"), Symbol::gensym("x")) +/// assert_eq!(Symbol::gensym("x").as_interned_str(), Symbol::gensym("x").as_interned_str()) +/// ``` #[derive(Clone, Copy, Eq)] pub struct InternedString { symbol: Symbol, From cb7eacb1d2f856f52f2daf3b58325e667f654eca Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 May 2019 12:14:30 +1000 Subject: [PATCH 4/5] Remove the `From for String` impl. It's not used. --- src/libsyntax_pos/symbol.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index ee4a581123f..231cfb793f8 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -840,12 +840,6 @@ impl<'a> PartialEq for &'a String { } } -impl std::convert::From for String { - fn from(val: InternedString) -> String { - val.as_symbol().to_string() - } -} - impl fmt::Debug for InternedString { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.with(|str| fmt::Debug::fmt(&str, f)) From e53bb1aefb0aba8fcadaf73b2c0f334dd36c2b23 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 May 2019 14:06:10 +1000 Subject: [PATCH 5/5] Reduce `Symbol`'s interface slightly. --- src/libsyntax_pos/symbol.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/libsyntax_pos/symbol.rs b/src/libsyntax_pos/symbol.rs index 231cfb793f8..d0ba09af30b 100644 --- a/src/libsyntax_pos/symbol.rs +++ b/src/libsyntax_pos/symbol.rs @@ -380,10 +380,6 @@ impl Symbol { with_interner(|interner| interner.intern(string)) } - pub fn interned(self) -> Self { - with_interner(|interner| interner.interned(self)) - } - /// Gensyms a new `usize`, using the current interner. pub fn gensym(string: &str) -> Self { with_interner(|interner| interner.gensym(string)) @@ -502,7 +498,7 @@ impl Interner { name } - pub fn interned(&self, symbol: Symbol) -> Symbol { + fn interned(&self, symbol: Symbol) -> Symbol { if (symbol.0.as_usize()) < self.strings.len() { symbol } else {