Kernel: Implement and use the syscall/sysret instruction pair on x86_64

This commit is contained in:
Owen Smith 2021-07-23 21:52:25 +01:00 committed by Andreas Kling
parent d36c84c331
commit e6df1c9988
Notes: sideshowbarker 2024-07-17 22:02:32 +09:00
6 changed files with 169 additions and 1 deletions

View file

@ -483,10 +483,17 @@ int sync();
inline uintptr_t invoke(Function function)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function)
: "memory");
# else
asm volatile("syscall"
: "=a"(result)
: "a"(function)
: "rcx", "r11", "memory");
# endif
return result;
}
@ -494,10 +501,17 @@ template<typename T1>
inline uintptr_t invoke(Function function, T1 arg1)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1)
: "memory");
# else
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1)
: "rcx", "r11", "memory");
# endif
return result;
}
@ -505,10 +519,17 @@ template<typename T1, typename T2>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2)
: "memory");
# else
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2)
: "rcx", "r11", "memory");
# endif
return result;
}
@ -516,10 +537,17 @@ template<typename T1, typename T2, typename T3>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2, T3 arg3)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2), "b"((uintptr_t)arg3)
: "memory");
# else
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2), "b"((uintptr_t)arg3)
: "rcx", "r11", "memory");
# endif
return result;
}
@ -527,10 +555,17 @@ template<typename T1, typename T2, typename T3, typename T4>
inline uintptr_t invoke(Function function, T1 arg1, T2 arg2, T3 arg3, T4 arg4)
{
uintptr_t result;
# if ARCH(I386)
asm volatile("int $0x82"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "c"((uintptr_t)arg2), "b"((uintptr_t)arg3), "S"((uintptr_t)arg4)
: "memory");
# else
asm volatile("syscall"
: "=a"(result)
: "a"(function), "d"((uintptr_t)arg1), "D"((uintptr_t)arg2), "b"((uintptr_t)arg3), "S"((uintptr_t)arg4)
: "memory");
# endif
return result;
}
# endif

View file

@ -30,6 +30,10 @@ struct ProcessorMessage;
struct ProcessorMessageEntry;
#if ARCH(X86_64)
# define MSR_EFER 0xc0000080
# define MSR_STAR 0xc0000081
# define MSR_LSTAR 0xc0000082
# define MSR_SFMASK 0xc0000084
# define MSR_FS_BASE 0xc0000100
# define MSR_GS_BASE 0xc0000101
#endif
@ -58,6 +62,11 @@ class Processor {
Processor* m_self;
#if ARCH(X86_64)
// Saved user stack for the syscall instruction.
void* m_user_stack;
#endif
DescriptorTablePointer m_gdtr;
Descriptor m_gdt[256];
u32 m_gdt_length;
@ -205,6 +214,17 @@ public:
static bool is_smp_enabled();
#if ARCH(X86_64)
static constexpr u64 user_stack_offset()
{
return __builtin_offsetof(Processor, m_user_stack);
}
static constexpr u64 kernel_stack_offset()
{
return __builtin_offsetof(Processor, m_tss) + __builtin_offsetof(TSS, rsp0l);
}
#endif
ALWAYS_INLINE static Processor& current()
{
return *(Processor*)read_gs_ptr(__builtin_offsetof(Processor, m_self));

View file

@ -110,9 +110,10 @@ struct [[gnu::packed]] RegisterState {
arg3 = ebx;
arg4 = esi;
#else
// The syscall instruction clobbers rcx, so we must use a different calling convention to 32-bit.
function = rax;
arg1 = rdx;
arg2 = rcx;
arg2 = rdi;
arg3 = rbx;
arg4 = rsi;
#endif

View file

@ -45,6 +45,7 @@ Atomic<u32> Processor::s_idle_cpu_mask { 0 };
extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
extern "C" void syscall_entry();
bool Processor::is_smp_enabled()
{
@ -220,6 +221,28 @@ UNMAP_AFTER_INIT void Processor::cpu_setup()
write_xcr0(read_xcr0() | 0x7);
}
}
#if ARCH(X86_64)
// x86_64 processors must have the syscall feature.
VERIFY(has_feature(CPUFeature::SYSCALL));
MSR efer_msr(MSR_EFER);
efer_msr.set(efer_msr.get() | 1u);
// Write code and stack selectors to the STAR MSR. The first value stored in bits 63:48 controls the sysret CS (value + 0x10) and SS (value + 0x8),
// and the value stored in bits 47:32 controls the syscall CS (value) and SS (value + 0x8).
u64 star = 0;
star |= 0x13ul << 48u;
star |= 0x08ul << 32u;
MSR star_msr(MSR_STAR);
star_msr.set(star);
// Write the syscall entry point to the LSTAR MSR, and write the SFMASK MSR to clear rflags upon entry.
// The userspace rflags will be preserved in r11.
MSR lstar_msr(MSR_LSTAR);
MSR sfmask_msr(MSR_SFMASK);
lstar_msr.set(reinterpret_cast<u64>(&syscall_entry));
sfmask_msr.set(~0x2);
#endif
}
String Processor::features_string() const

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2021, Owen Smith <yeeetari@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <Kernel/Arch/x86/DescriptorTable.h>
#include <Kernel/Arch/x86/Processor.h>
#include <Kernel/Arch/x86/TrapFrame.h>
extern "C" void syscall_entry();
extern "C" [[gnu::naked]] void syscall_entry()
{
// clang-format off
asm(
// Store the user stack, then switch to the kernel stack.
" movq %%rsp, %%gs:%c[user_stack] \n"
" movq %%gs:%c[kernel_stack], %%rsp \n"
// Build RegisterState.
" pushq $0x1b \n" // User ss
" pushq %%gs:%c[user_stack] \n" // User rsp
" sti \n" // It's now safe to enable interrupts, but we can't index into gs after this point
" pushq %%r11 \n" // The CPU preserves the user rflags in r11
" pushq $0x23 \n" // User cs
" pushq %%rcx \n" // The CPU preserves the user IP in rcx
" pushq $0 \n"
" pushq %%r15 \n"
" pushq %%r14 \n"
" pushq %%r13 \n"
" pushq %%r12 \n"
" pushq %%r11 \n"
" pushq %%r10 \n"
" pushq %%r9 \n"
" pushq %%r8 \n"
" pushq %%rax \n"
" pushq %%rcx \n"
" pushq %%rdx \n"
" pushq %%rbx \n"
" pushq %%rsp \n"
" pushq %%rbp \n"
" pushq %%rsi \n"
" pushq %%rdi \n"
" pushq %%rsp \n" // TrapFrame::regs
" subq $" __STRINGIFY(TRAP_FRAME_SIZE - 8) ", %%rsp \n"
" movq %%rsp, %%rdi \n"
" call enter_trap_no_irq \n"
" movq %%rsp, %%rdi \n"
" call syscall_handler \n"
" movq %%rsp, %%rdi \n"
" call exit_trap \n"
" addq $" __STRINGIFY(TRAP_FRAME_SIZE) ", %%rsp \n" // Pop TrapFrame
" popq %%rdi \n"
" popq %%rsi \n"
" popq %%rbp \n"
" addq $8, %%rsp \n" // Skip restoring kernel rsp
" popq %%rbx \n"
" popq %%rdx \n"
" popq %%rcx \n"
" popq %%rax \n"
" popq %%r8 \n"
" popq %%r9 \n"
" popq %%r10 \n"
" popq %%r11 \n"
" popq %%r12 \n"
" popq %%r13 \n"
" popq %%r14 \n"
" popq %%r15 \n"
" addq $8, %%rsp \n"
" popq %%rcx \n"
" addq $16, %%rsp \n"
// Disable interrupts before we restore the user stack pointer. sysret will re-enable interrupts when it restores
// rflags.
" cli \n"
" popq %%rsp \n"
" sysretq \n"
:: [user_stack] "i"(Kernel::Processor::user_stack_offset()), [kernel_stack] "i"(Kernel::Processor::kernel_stack_offset()));
// clang-format on
}

View file

@ -311,6 +311,13 @@ if ("${SERENITY_ARCH}" STREQUAL "i686" OR "${SERENITY_ARCH}" STREQUAL "x86_64")
${CMAKE_CURRENT_SOURCE_DIR}/Arch/x86/common/SafeMem.cpp
${CMAKE_CURRENT_SOURCE_DIR}/Arch/x86/common/TrapFrame.cpp
)
if("${SERENITY_ARCH}" STREQUAL "x86_64")
set(KERNEL_SOURCES
${KERNEL_SOURCES}
${CMAKE_CURRENT_SOURCE_DIR}/Arch/x86/${KERNEL_ARCH}/SyscallEntry.cpp
)
endif()
endif()
set(AK_SOURCES