Miri: replace canonical_alloc_id mechanism by extern_static_alloc_id which is called only when a pointer is 'imported' into the machine

This commit is contained in:
Ralf Jung 2020-07-26 11:11:17 +02:00
parent 2bbfa02b1b
commit 1d9efbbd8f
7 changed files with 78 additions and 97 deletions

View file

@ -502,8 +502,6 @@ impl fmt::Display for UndefinedBehaviorInfo<'_> {
pub enum UnsupportedOpInfo { pub enum UnsupportedOpInfo {
/// Free-form case. Only for errors that are never caught! /// Free-form case. Only for errors that are never caught!
Unsupported(String), Unsupported(String),
/// Accessing an unsupported foreign static.
ReadForeignStatic(DefId),
/// Could not find MIR for a function. /// Could not find MIR for a function.
NoMirFor(DefId), NoMirFor(DefId),
/// Encountered a pointer where we needed raw bytes. /// Encountered a pointer where we needed raw bytes.
@ -515,6 +513,8 @@ pub enum UnsupportedOpInfo {
ReadBytesAsPointer, ReadBytesAsPointer,
/// Accessing thread local statics /// Accessing thread local statics
ThreadLocalStatic(DefId), ThreadLocalStatic(DefId),
/// Accessing an unsupported extern static.
ReadExternStatic(DefId),
} }
impl fmt::Display for UnsupportedOpInfo { impl fmt::Display for UnsupportedOpInfo {
@ -522,8 +522,8 @@ impl fmt::Display for UnsupportedOpInfo {
use UnsupportedOpInfo::*; use UnsupportedOpInfo::*;
match self { match self {
Unsupported(ref msg) => write!(f, "{}", msg), Unsupported(ref msg) => write!(f, "{}", msg),
ReadForeignStatic(did) => { ReadExternStatic(did) => {
write!(f, "cannot read from foreign (extern) static ({:?})", did) write!(f, "cannot read from extern static ({:?})", did)
} }
NoMirFor(did) => write!(f, "no MIR body is available for {:?}", did), NoMirFor(did) => write!(f, "no MIR body is available for {:?}", did),
ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes",), ReadPointerAsBytes => write!(f, "unable to turn pointer into raw bytes",),

View file

@ -323,14 +323,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
} }
/// Call this to turn untagged "global" pointers (obtained via `tcx`) into /// Call this to turn untagged "global" pointers (obtained via `tcx`) into
/// the *canonical* machine pointer to the allocation. Must never be used /// the machine pointer to the allocation. Must never be used
/// for any other pointers! /// for any other pointers, nor for TLS statics.
/// ///
/// This represents a *direct* access to that memory, as opposed to access /// Using the resulting pointer represents a *direct* access to that memory
/// through a pointer that was created by the program. /// (e.g. by directly using a `static`),
/// as opposed to access through a pointer that was created by the program.
///
/// This function can fail only if `ptr` points to an `extern static`.
#[inline(always)] #[inline(always)]
pub fn tag_global_base_pointer(&self, ptr: Pointer) -> Pointer<M::PointerTag> { pub fn global_base_pointer(&self, ptr: Pointer) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
self.memory.tag_global_base_pointer(ptr) self.memory.global_base_pointer(ptr)
} }
#[inline(always)] #[inline(always)]

View file

@ -238,45 +238,30 @@ pub trait Machine<'mir, 'tcx>: Sized {
Ok(()) Ok(())
} }
/// Called for *every* memory access to determine the real ID of the given allocation. /// Return the `AllocId` for the given thread-local static in the current thread.
/// This provides a way for the machine to "redirect" certain allocations as it sees fit. fn thread_local_static_alloc_id(
/// _ecx: &mut InterpCx<'mir, 'tcx, Self>,
/// This is used by Miri to redirect extern statics to real allocations. def_id: DefId,
/// ) -> InterpResult<'tcx, AllocId> {
/// This function must be idempotent. throw_unsup!(ThreadLocalStatic(def_id))
#[inline]
fn canonical_alloc_id(_mem: &Memory<'mir, 'tcx, Self>, id: AllocId) -> AllocId {
id
} }
/// Called when converting a `ty::Const` to an operand (in /// Return the `AllocId` backing the given `extern static`.
/// `eval_const_to_op`). fn extern_static_alloc_id(
/// mem: &Memory<'mir, 'tcx, Self>,
/// Miri uses this callback for creating per thread allocations for thread def_id: DefId,
/// locals. In Rust, one way of creating a thread local is by marking a ) -> InterpResult<'tcx, AllocId> {
/// static with `#[thread_local]`. On supported platforms this gets // Use the `AllocId` associated with the `DefId`. Any actual *access* will fail.
/// translated to a LLVM thread local for which LLVM automatically ensures Ok(mem.tcx.create_static_alloc(def_id))
/// that each thread gets its own copy. Since LLVM automatically handles
/// thread locals, the Rust compiler just treats thread local statics as
/// regular statics even though accessing a thread local static should be an
/// effectful computation that depends on the current thread. The long term
/// plan is to change MIR to make accesses to thread locals explicit
/// (https://github.com/rust-lang/rust/issues/70685). While the issue 70685
/// is not fixed, our current workaround in Miri is to use this function to
/// make per-thread copies of thread locals. Please note that we cannot make
/// these copies in `canonical_alloc_id` because that is too late: for
/// example, if one created a pointer in thread `t1` to a thread local and
/// sent it to another thread `t2`, resolving the access in
/// `canonical_alloc_id` would result in pointer pointing to `t2`'s thread
/// local and not `t1` as it should.
#[inline]
fn adjust_global_const(
_ecx: &InterpCx<'mir, 'tcx, Self>,
val: mir::interpret::ConstValue<'tcx>,
) -> InterpResult<'tcx, mir::interpret::ConstValue<'tcx>> {
Ok(val)
} }
/// Return the "base" tag for the given *global* allocation: the one that is used for direct
/// accesses to this static/const/fn allocation. If `id` is not a global allocation,
/// this will return an unusable tag (i.e., accesses will be UB)!
///
/// Called on the id returned by `thread_local_static_alloc_id` and `extern_static_alloc_id`, if needed.
fn tag_global_base_pointer(memory_extra: &Self::MemoryExtra, id: AllocId) -> Self::PointerTag;
/// Called to initialize the "extra" state of an allocation and make the pointers /// Called to initialize the "extra" state of an allocation and make the pointers
/// it contains (in relocations) tagged. The way we construct allocations is /// it contains (in relocations) tagged. The way we construct allocations is
/// to always first construct it without extra and then add the extra. /// to always first construct it without extra and then add the extra.
@ -309,13 +294,6 @@ pub trait Machine<'mir, 'tcx>: Sized {
Ok(()) Ok(())
} }
/// Return the "base" tag for the given *global* allocation: the one that is used for direct
/// accesses to this static/const/fn allocation. If `id` is not a global allocation,
/// this will return an unusable tag (i.e., accesses will be UB)!
///
/// Expects `id` to be already canonical, if needed.
fn tag_global_base_pointer(memory_extra: &Self::MemoryExtra, id: AllocId) -> Self::PointerTag;
/// Executes a retagging operation /// Executes a retagging operation
#[inline] #[inline]
fn retag( fn retag(
@ -375,13 +353,6 @@ pub trait Machine<'mir, 'tcx>: Sized {
_mem: &Memory<'mir, 'tcx, Self>, _mem: &Memory<'mir, 'tcx, Self>,
_ptr: Pointer<Self::PointerTag>, _ptr: Pointer<Self::PointerTag>,
) -> InterpResult<'tcx, u64>; ) -> InterpResult<'tcx, u64>;
fn thread_local_alloc_id(
_ecx: &mut InterpCx<'mir, 'tcx, Self>,
did: DefId,
) -> InterpResult<'tcx, AllocId> {
throw_unsup!(ThreadLocalStatic(did))
}
} }
// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines // A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines

View file

@ -137,15 +137,33 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
} }
/// Call this to turn untagged "global" pointers (obtained via `tcx`) into /// Call this to turn untagged "global" pointers (obtained via `tcx`) into
/// the *canonical* machine pointer to the allocation. Must never be used /// the machine pointer to the allocation. Must never be used
/// for any other pointers! /// for any other pointers, nor for TLS statics.
/// ///
/// This represents a *direct* access to that memory, as opposed to access /// Using the resulting pointer represents a *direct* access to that memory
/// through a pointer that was created by the program. /// (e.g. by directly using a `static`),
/// as opposed to access through a pointer that was created by the program.
///
/// This function can fail only if `ptr` points to an `extern static`.
#[inline] #[inline]
pub fn tag_global_base_pointer(&self, ptr: Pointer) -> Pointer<M::PointerTag> { pub fn global_base_pointer(&self, mut ptr: Pointer) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
let id = M::canonical_alloc_id(self, ptr.alloc_id); // We need to handle `extern static`.
ptr.with_tag(M::tag_global_base_pointer(&self.extra, id)) let ptr = match self.tcx.get_global_alloc(ptr.alloc_id) {
Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
bug!("global memory cannot point to thread-local static")
}
Some(GlobalAlloc::Static(def_id)) if self.tcx.is_foreign_item(def_id) => {
ptr.alloc_id = M::extern_static_alloc_id(self, def_id)?;
ptr
}
_ => {
// No need to change the `AllocId`.
ptr
}
};
// And we need to get the tag.
let tag = M::tag_global_base_pointer(&self.extra, ptr.alloc_id);
Ok(ptr.with_tag(tag))
} }
pub fn create_fn_alloc( pub fn create_fn_alloc(
@ -162,7 +180,9 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
id id
} }
}; };
self.tag_global_base_pointer(Pointer::from(id)) // Functions are global allocations, so make sure we get the right base pointer.
// We know this is not an `extern static` so this cannmot fail.
self.global_base_pointer(Pointer::from(id)).unwrap()
} }
pub fn allocate( pub fn allocate(
@ -195,6 +215,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
M::GLOBAL_KIND.map(MemoryKind::Machine), M::GLOBAL_KIND.map(MemoryKind::Machine),
"dynamically allocating global memory" "dynamically allocating global memory"
); );
// This is a new allocation, not a new global one, so no `global_base_ptr`.
let (alloc, tag) = M::init_allocation_extra(&self.extra, id, Cow::Owned(alloc), Some(kind)); let (alloc, tag) = M::init_allocation_extra(&self.extra, id, Cow::Owned(alloc), Some(kind));
self.alloc_map.insert(id, (kind, alloc.into_owned())); self.alloc_map.insert(id, (kind, alloc.into_owned()));
Pointer::from(id).with_tag(tag) Pointer::from(id).with_tag(tag)
@ -437,6 +458,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)), Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)),
None => throw_ub!(PointerUseAfterFree(id)), None => throw_ub!(PointerUseAfterFree(id)),
Some(GlobalAlloc::Static(def_id)) => { Some(GlobalAlloc::Static(def_id)) => {
assert!(tcx.is_static(def_id));
assert!(!tcx.is_thread_local_static(def_id)); assert!(!tcx.is_thread_local_static(def_id));
// Notice that every static has two `AllocId` that will resolve to the same // Notice that every static has two `AllocId` that will resolve to the same
// thing here: one maps to `GlobalAlloc::Static`, this is the "lazy" ID, // thing here: one maps to `GlobalAlloc::Static`, this is the "lazy" ID,
@ -448,24 +470,15 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
// The `GlobalAlloc::Memory` branch here is still reachable though; when a static // The `GlobalAlloc::Memory` branch here is still reachable though; when a static
// contains a reference to memory that was created during its evaluation (i.e., not // contains a reference to memory that was created during its evaluation (i.e., not
// to another static), those inner references only exist in "resolved" form. // to another static), those inner references only exist in "resolved" form.
//
// Assumes `id` is already canonical.
if tcx.is_foreign_item(def_id) { if tcx.is_foreign_item(def_id) {
trace!("get_global_alloc: foreign item {:?}", def_id); throw_unsup!(ReadExternStatic(def_id));
throw_unsup!(ReadForeignStatic(def_id))
} }
trace!("get_global_alloc: Need to compute {:?}", def_id); trace!("get_global_alloc: Need to compute {:?}", def_id);
let instance = Instance::mono(tcx, def_id); let instance = Instance::mono(tcx, def_id);
let gid = GlobalId { instance, promoted: None }; let gid = GlobalId { instance, promoted: None };
// Use the raw query here to break validation cycles. Later uses of the static // Use the raw query here to break validation cycles. Later uses of the static
// will call the full query anyway. // will call the full query anyway.
let raw_const = let raw_const = tcx.const_eval_raw(ty::ParamEnv::reveal_all().and(gid))?;
tcx.const_eval_raw(ty::ParamEnv::reveal_all().and(gid)).map_err(|err| {
// no need to report anything, the const_eval call takes care of that
// for statics
assert!(tcx.is_static(def_id));
err
})?;
// Make sure we use the ID of the resolved memory, not the lazy one! // Make sure we use the ID of the resolved memory, not the lazy one!
let id = raw_const.alloc_id; let id = raw_const.alloc_id;
let allocation = tcx.global_alloc(id).unwrap_memory(); let allocation = tcx.global_alloc(id).unwrap_memory();
@ -482,6 +495,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
alloc, alloc,
M::GLOBAL_KIND.map(MemoryKind::Machine), M::GLOBAL_KIND.map(MemoryKind::Machine),
); );
// Sanity check that this is the same pointer we would have gotten via `global_base_pointer`.
debug_assert_eq!(tag, M::tag_global_base_pointer(memory_extra, id)); debug_assert_eq!(tag, M::tag_global_base_pointer(memory_extra, id));
Ok(alloc) Ok(alloc)
} }
@ -492,7 +506,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
&self, &self,
id: AllocId, id: AllocId,
) -> InterpResult<'tcx, &Allocation<M::PointerTag, M::AllocExtra>> { ) -> InterpResult<'tcx, &Allocation<M::PointerTag, M::AllocExtra>> {
let id = M::canonical_alloc_id(self, id);
// The error type of the inner closure here is somewhat funny. We have two // The error type of the inner closure here is somewhat funny. We have two
// ways of "erroring": An actual error, or because we got a reference from // ways of "erroring": An actual error, or because we got a reference from
// `get_global_alloc` that we can actually use directly without inserting anything anywhere. // `get_global_alloc` that we can actually use directly without inserting anything anywhere.
@ -529,7 +542,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
&mut self, &mut self,
id: AllocId, id: AllocId,
) -> InterpResult<'tcx, &mut Allocation<M::PointerTag, M::AllocExtra>> { ) -> InterpResult<'tcx, &mut Allocation<M::PointerTag, M::AllocExtra>> {
let id = M::canonical_alloc_id(self, id);
let tcx = self.tcx; let tcx = self.tcx;
let memory_extra = &self.extra; let memory_extra = &self.extra;
let a = self.alloc_map.get_mut_or(id, || { let a = self.alloc_map.get_mut_or(id, || {
@ -568,7 +580,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
id: AllocId, id: AllocId,
liveness: AllocCheck, liveness: AllocCheck,
) -> InterpResult<'static, (Size, Align)> { ) -> InterpResult<'static, (Size, Align)> {
let id = M::canonical_alloc_id(self, id);
// # Regular allocations // # Regular allocations
// Don't use `self.get_raw` here as that will // Don't use `self.get_raw` here as that will
// a) cause cycles in case `id` refers to a static // a) cause cycles in case `id` refers to a static
@ -621,7 +632,6 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
} }
} }
/// Assumes `id` is already canonical.
fn get_fn_alloc(&self, id: AllocId) -> Option<FnVal<'tcx, M::ExtraFnVal>> { fn get_fn_alloc(&self, id: AllocId) -> Option<FnVal<'tcx, M::ExtraFnVal>> {
trace!("reading fn ptr: {}", id); trace!("reading fn ptr: {}", id);
if let Some(extra) = self.extra_fn_ptr_map.get(&id) { if let Some(extra) = self.extra_fn_ptr_map.get(&id) {
@ -642,8 +652,7 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
if ptr.offset.bytes() != 0 { if ptr.offset.bytes() != 0 {
throw_ub!(InvalidFunctionPointer(ptr.erase_tag())) throw_ub!(InvalidFunctionPointer(ptr.erase_tag()))
} }
let id = M::canonical_alloc_id(self, ptr.alloc_id); self.get_fn_alloc(ptr.alloc_id).ok_or_else(|| err_ub!(InvalidFunctionPointer(ptr.erase_tag())).into())
self.get_fn_alloc(id).ok_or_else(|| err_ub!(InvalidFunctionPointer(ptr.erase_tag())).into())
} }
pub fn mark_immutable(&mut self, id: AllocId) -> InterpResult<'tcx> { pub fn mark_immutable(&mut self, id: AllocId) -> InterpResult<'tcx> {

View file

@ -541,9 +541,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
val: &ty::Const<'tcx>, val: &ty::Const<'tcx>,
layout: Option<TyAndLayout<'tcx>>, layout: Option<TyAndLayout<'tcx>>,
) -> InterpResult<'tcx, OpTy<'tcx, M::PointerTag>> { ) -> InterpResult<'tcx, OpTy<'tcx, M::PointerTag>> {
let tag_scalar = |scalar| match scalar { let tag_scalar = |scalar| -> InterpResult<'tcx, _> {
Scalar::Ptr(ptr) => Scalar::Ptr(self.tag_global_base_pointer(ptr)), Ok(match scalar {
Scalar::Raw { data, size } => Scalar::Raw { data, size }, Scalar::Ptr(ptr) => Scalar::Ptr(self.global_base_pointer(ptr)?),
Scalar::Raw { data, size } => Scalar::Raw { data, size },
})
}; };
// Early-return cases. // Early-return cases.
let val_val = match val.val { let val_val = match val.val {
@ -570,10 +572,6 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
} }
ty::ConstKind::Value(val_val) => val_val, ty::ConstKind::Value(val_val) => val_val,
}; };
// This call allows the machine to create fresh allocation ids for
// thread-local statics (see the `adjust_global_const` function
// documentation).
let val_val = M::adjust_global_const(self, val_val)?;
// Other cases need layout. // Other cases need layout.
let layout = let layout =
from_known_layout(self.tcx, self.param_env, layout, || self.layout_of(val.ty))?; from_known_layout(self.tcx, self.param_env, layout, || self.layout_of(val.ty))?;
@ -582,10 +580,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let id = self.tcx.create_memory_alloc(alloc); let id = self.tcx.create_memory_alloc(alloc);
// We rely on mutability being set correctly in that allocation to prevent writes // We rely on mutability being set correctly in that allocation to prevent writes
// where none should happen. // where none should happen.
let ptr = self.tag_global_base_pointer(Pointer::new(id, offset)); let ptr = self.global_base_pointer(Pointer::new(id, offset))?;
Operand::Indirect(MemPlace::from_ptr(ptr, layout.align.abi)) Operand::Indirect(MemPlace::from_ptr(ptr, layout.align.abi))
} }
ConstValue::Scalar(x) => Operand::Immediate(tag_scalar(x).into()), ConstValue::Scalar(x) => Operand::Immediate(tag_scalar(x)?.into()),
ConstValue::Slice { data, start, end } => { ConstValue::Slice { data, start, end } => {
// We rely on mutability being set correctly in `data` to prevent writes // We rely on mutability being set correctly in `data` to prevent writes
// where none should happen. // where none should happen.
@ -594,7 +592,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
Size::from_bytes(start), // offset: `start` Size::from_bytes(start), // offset: `start`
); );
Operand::Immediate(Immediate::new_slice( Operand::Immediate(Immediate::new_slice(
self.tag_global_base_pointer(ptr).into(), self.global_base_pointer(ptr)?.into(),
u64::try_from(end.checked_sub(start).unwrap()).unwrap(), // len: `end - start` u64::try_from(end.checked_sub(start).unwrap()).unwrap(), // len: `end - start`
self, self,
)) ))

View file

@ -1126,7 +1126,7 @@ where
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::PointerTag>> { ) -> InterpResult<'tcx, MPlaceTy<'tcx, M::PointerTag>> {
// This must be an allocation in `tcx` // This must be an allocation in `tcx`
let _ = self.tcx.global_alloc(raw.alloc_id); let _ = self.tcx.global_alloc(raw.alloc_id);
let ptr = self.tag_global_base_pointer(Pointer::from(raw.alloc_id)); let ptr = self.global_base_pointer(Pointer::from(raw.alloc_id))?;
let layout = self.layout_of(raw.ty)?; let layout = self.layout_of(raw.ty)?;
Ok(MPlaceTy::from_aligned_ptr(ptr, layout)) Ok(MPlaceTy::from_aligned_ptr(ptr, layout))
} }

View file

@ -141,8 +141,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
use rustc_middle::mir::Rvalue::*; use rustc_middle::mir::Rvalue::*;
match *rvalue { match *rvalue {
ThreadLocalRef(did) => { ThreadLocalRef(did) => {
let id = M::thread_local_alloc_id(self, did)?; let id = M::thread_local_static_alloc_id(self, did)?;
let val = Scalar::Ptr(self.tag_global_base_pointer(id.into())); let val = self.global_base_pointer(id.into())?;
self.write_scalar(val, dest)?; self.write_scalar(val, dest)?;
} }