rt: Add lots of documentation to __morestack

This commit is contained in:
Brian Anderson 2011-12-18 02:02:35 -08:00
parent 7359fa422b
commit 1a1fdf34b1
2 changed files with 138 additions and 41 deletions

View file

@ -1,9 +1,70 @@
.text
/*
__morestack
// __morestack
//
// LLVM generates a call to this to allocate more stack space in a function
// prolog when we run out.
This function implements stack growth using the mechanism
devised by Ian Lance Taylor for gccgo, described here:
http://gcc.gnu.org/wiki/SplitStacks
The Rust stack is composed of a linked list of stack segments,
and each stack segment contains two parts: the work area,
where Rust functions are allowed to execute; and the red zone,
where no Rust code can execute, but where short runtime
functions (including __morestack), the dynamic linker, signal
handlers, and the unwinder can run.
Each Rust function contains an LLVM-generated prologue that
compares the stack space required for the current function to
the space space remaining in the current stack segment,
maintained in a platform-specific TLS slot. The stack limit
is strategically maintained by the Rust runtime so that it is
always in place whenever a Rust function is running.
When there is not enough room to run the function, the function
prologue makes a call to __morestack to allocate a new stack
segment, copy any stack-based arguments to it, switch stacks,
then resume execution of the original function.
-- The __morestack calling convention --
For reasons of efficiency the __morestack calling convention
is bizarre. The calling function does not attempt to align the
stack for the call, and on x86_64 the arguments to __morestack
are passed in scratch registers in order to preserve the
original function's arguments.
Once __morestack has switched to the new stack, instead of
returning, it then calls into the original function, resuming
execution at the instruction following the call to
__morestack. Thus, when the original function returns it
actually returns to __morestack, which then deallocates the
stack and returns again to the original function's caller.
-- Unwinding --
All this trickery causes hell when it comes time for the
unwinder to navigate it's way through this function. What
will happen is the original function will be unwound first
without any special effort, then the unwinder encounters
the __morestack frame, which is sitting just above a
tiny fraction of a frame (containing just a return pointer
and, on 32-bit, the arguments to __morestack).
We deal with this by claiming that that little bit of stack
is actually part of the __morestack frame, encoded as
DWARF call frame instructions (CFI) by .cfi assembler
pseudo-ops.
One final complication (that took me a week to figure out)
is that OS X 10.6+ uses its own 'compact unwind info',
an undocumented format generated by the linker from
the DWARF CFI. This compact unwind info doesn't correctly
capture the nuance of the __morestack frame, and as a
result all of our linking on OS X uses the -no_compact_unwind
flag.
*/
.text
#if defined(__APPLE__)
#define RUST_GET_TASK L_rust_get_task$stub
@ -51,13 +112,31 @@ MORESTACK:
.cfi_startproc
#endif
// This base pointer setup differs from most in that we are
// telling the unwinder to consider the Canonical Frame
// Address (CFA) for this frame to be the value of the stack
// pointer prior to entry to the original function, whereas
// the CFA would typically be the the value of the stack
// pointer prior to entry to this function. This will allow
// the unwinder to understand how to skip the tiny partial
// frame that the original function created by calling
// __morestack.
// In practical terms, our CFA is 12 bytes greater than it
// would normally be, accounting for the two arguments to
// __morestack, and an extra return address.
pushl %ebp
#if defined(__linux__) || defined(__APPLE__)
// The CFA is 20 bytes above the register that it is
// associated with for this frame (which will be %ebp)
.cfi_def_cfa_offset 20
// %ebp is -20 bytes from the CFA
.cfi_offset %ebp, -20
#endif
movl %esp, %ebp
#if defined(__linux__) || defined(__APPLE__)
// Calculate the CFA as an offset from %ebp
.cfi_def_cfa_register %ebp
#endif
@ -81,17 +160,25 @@ MORESTACK:
// Save the the correct %esp value for our grandparent frame,
// for the unwinder
// FIXME: This isn't used
leal 20(%ebp), %eax
movl %eax, -4(%ebp)
// The arguments to rust_new_stack2
movl 56(%esp),%eax // Size of stack arguments
// The arguments to upcall_new_stack
// The size of the stack arguments to copy to the new stack,
// ane of the the arguments to __morestack
movl 56(%esp),%eax
movl %eax,20(%esp)
leal 64(%esp),%eax // Address of stack arguments
// The address of the stack arguments to the original function
leal 64(%esp),%eax
movl %eax,16(%esp)
// The amount of stack needed for the original function,
// the other argument to __morestack
movl 52(%esp),%eax // The amount of stack needed
movl %eax,12(%esp)
movl $0, 8(%esp) // Out pointer
// Out pointer to the new stack
movl $0, 8(%esp)
#ifdef __APPLE__
call 1f
@ -106,18 +193,22 @@ MORESTACK:
movl %eax,(%esp)
call UPCALL_CALL_C
movl 48(%esp),%eax // Grab the return pointer.
inc %eax // Skip past the ret instruction in the parent fn
// Grab the __morestack return pointer
movl 48(%esp),%eax
// Skip past the ret instruction in the parent fn
inc %eax
// Restore fastcc arguments
// Restore the fastcc arguments to the original function
movl 28(%esp), %ecx
movl 24(%esp), %edx
movl 8(%esp),%esp // Switch stacks.
call *%eax // Re-enter the function that called us.
// Switch stacks
movl 8(%esp),%esp
// Re-enter the function that called us
call *%eax
// Now the function that called us has returned, so we need to delete the
// old stack space.
// Now the function that called us has returned, so we need to
// delete the old stack space
// Switch back to the rust stack
movl %ebp, %esp
@ -127,8 +218,8 @@ MORESTACK:
subl $4, %esp
// Now that we're on the return path we want to avoid
// stomping on %eax. FIXME: Need to save and restore
// eax to actually preserve it across the call to delete the stack
// stomping on %eax. FIXME: Need to save and restore %eax to
// actually preserve it across the call to delete the stack
#ifdef __APPLE__
call 1f
1: popl %ecx
@ -144,8 +235,14 @@ MORESTACK:
addl $12,%esp
popl %ebp
// FIXME: I don't think these rules are necessary
// since the unwinder should never encounter an instruction
// pointer pointing here.
#if defined(__linux__) || defined(__APPLE__)
// Restore the rule for how to find %ebp
.cfi_restore %ebp
// Tell the unwinder how to find the CFA in terms of %esp
.cfi_def_cfa %esp, 16
#endif
retl $8

View file

@ -1,9 +1,10 @@
.text
/*
__morestack
// __morestack
//
// LLVM generates a call to this to allocate more stack space in a functiono
// prolog when we run out.
See i386/morestack.S for the lengthy, general explanation.
*/
.text
#if defined(__APPLE__) || defined(_WIN32)
#define UPCALL_NEW_STACK _upcall_new_stack
@ -15,19 +16,6 @@
#define UPCALL_DEL_STACK upcall_del_stack
#define UPCALL_CALL_C upcall_call_shim_on_c_stack
#define MORESTACK __morestack
#endif
// Naturally, nobody can agree as to
// which arguments should go in which
// registers:
#if defined(_WIN32)
# define ARG0 %rcx
# define ARG1 %rdx
# define ARG2 %r8
#else
# define ARG0 %rdi
# define ARG1 %rsi
# define ARG2 %rdx
#endif
.globl UPCALL_NEW_STACK
@ -35,7 +23,6 @@
.globl UPCALL_CALL_C
.globl MORESTACK
// FIXME: What about _WIN32?
#if defined(__linux__)
.hidden MORESTACK
#else
@ -48,24 +35,31 @@
.type MORESTACK,@function
#endif
#if defined(__linux__) || defined(__APPLE__)
MORESTACK:
.cfi_startproc
// Set up a normal backtrace
pushq %rbp
// The CFA is 24 bytes above the register that it will
// be associated with for this frame (%rbp). That is 8
// bytes greater than a normal frame, to allow the unwinder
// to skip the partial frame of the original function.
.cfi_def_cfa_offset 24
// %rbp is -24 bytes from the CFA
.cfi_offset %rbp, -24
movq %rsp, %rbp
// Calculate the CFA as on offset from %ebp
.cfi_def_cfa_register %rbp
// Save the grandparent stack pointer for the unwinder
// FIXME: This isn't used
leaq 24(%rbp), %rax
pushq %rax
// FIXME: libgcc also saves rax. not sure if we need to
// Save argument registers
// Save argument registers of the original function
pushq %rdi
pushq %rsi
pushq %rdx
@ -79,6 +73,8 @@ MORESTACK:
movq %rbp, %rcx
addq $24, %rcx // Base pointer, return address x2
// The arguments to __morestack are passed in %r10 & %r11
pushq %r11 // Size of stack arguments
pushq %rcx // Address of stack arguments
pushq %r10 // The amount of stack needed
@ -119,7 +115,8 @@ MORESTACK:
// Align the stack again
pushq $0
// FIXME: Should preserve %rax here
movq UPCALL_DEL_STACK@GOTPCREL(%rip), %rsi
movq $0, %rdi
#ifdef __APPLE__
@ -131,6 +128,9 @@ MORESTACK:
addq $8, %rsp
popq %rbp
// FIXME: I don't think these rules are necessary
// since the unwinder should never encounter an instruction
// pointer pointing here.
.cfi_restore %rbp
.cfi_def_cfa %rsp, 16
ret