updating to mainline 4.14.11

This commit is contained in:
Jake Day 2018-01-05 19:48:46 -05:00
parent 1190c7714e
commit 8b09af0093
173 changed files with 2571 additions and 673 deletions

3
config
View file

@ -1,6 +1,6 @@
#
# Automatically generated file; DO NOT EDIT.
# Linux/x86_64 4.14.10-jakeday Kernel Configuration
# Linux/x86_64 4.14.11-jakeday Kernel Configuration
#
CONFIG_64BIT=y
CONFIG_X86_64=y
@ -8945,6 +8945,7 @@ CONFIG_SECURITY=y
# CONFIG_SECURITY_WRITABLE_HOOKS is not set
CONFIG_SECURITYFS=y
CONFIG_SECURITY_NETWORK=y
CONFIG_PAGE_TABLE_ISOLATION=y
# CONFIG_SECURITY_INFINIBAND is not set
CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_PATH=y

View file

@ -2685,6 +2685,8 @@
steal time is computed, but won't influence scheduler
behaviour
nopti [X86-64] Disable kernel page table isolation
nolapic [X86-32,APIC] Do not enable or use the local APIC.
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
@ -3253,6 +3255,12 @@
pt. [PARIDE]
See Documentation/blockdev/paride.txt.
pti= [X86_64]
Control user/kernel address space isolation:
on - enable
off - disable
auto - default setting
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.

View file

@ -12,6 +12,7 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
... unused hole ...
fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ...
@ -29,8 +30,8 @@ Virtual memory map with 5 level page tables:
hole caused by [56:63] sign extension
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ...

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 14
SUBLEVEL = 10
SUBLEVEL = 11
EXTRAVERSION =
NAME = Petit Gorille
@ -802,6 +802,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
# conserve stack if available
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)

View file

@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
.previous
ENTRY(__arch_hweight64)
sethi %hi(__sw_hweight16), %g1
jmpl %g1 + %lo(__sw_hweight16), %g0
sethi %hi(__sw_hweight64), %g1
jmpl %g1 + %lo(__sw_hweight64), %g0
nop
ENDPROC(__arch_hweight64)
EXPORT_SYMBOL(__arch_hweight64)

View file

@ -23,6 +23,9 @@
*/
#undef CONFIG_AMD_MEM_ENCRYPT
/* No PAGE_TABLE_ISOLATION support needed either: */
#undef CONFIG_PAGE_TABLE_ISOLATION
#include "misc.h"
/* These actually do the work of building the kernel identity maps. */

View file

@ -1,6 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/jump_label.h>
#include <asm/unwind_hints.h>
#include <asm/cpufeatures.h>
#include <asm/page_types.h>
#include <asm/percpu.h>
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
/*
@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
#endif
.endm
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
* halves:
*/
#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
.macro SET_NOFLUSH_BIT reg:req
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
.endm
.macro ADJUST_KERNEL_CR3 reg:req
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
andq $(~PTI_SWITCH_MASK), \reg
.endm
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
mov %cr3, \scratch_reg
ADJUST_KERNEL_CR3 \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
#define THIS_CPU_user_pcid_flush_mask \
PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
mov %cr3, \scratch_reg
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* Test if the ASID needs a flush.
*/
movq \scratch_reg, \scratch_reg2
andq $(0x7FF), \scratch_reg /* mask ASID */
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
/* Flush needed, clear the bit */
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
movq \scratch_reg2, \scratch_reg
jmp .Lwrcr3_\@
.Lnoflush_\@:
movq \scratch_reg2, \scratch_reg
SET_NOFLUSH_BIT \scratch_reg
.Lwrcr3_\@:
/* Flip the PGD and ASID to the user version */
orq $(PTI_SWITCH_MASK), \scratch_reg
mov \scratch_reg, %cr3
.Lend_\@:
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
pushq %rax
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
popq %rax
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
movq %cr3, \scratch_reg
movq \scratch_reg, \save_reg
/*
* Is the "switch mask" all zero? That means that both of
* these are zero:
*
* 1. The user/kernel PCID bit, and
* 2. The user/kernel "bit" that points CR3 to the
* bottom half of the 8k PGD
*
* That indicates a kernel CR3 value, not a user CR3.
*/
testq $(PTI_SWITCH_MASK), \scratch_reg
jz .Ldone_\@
ADJUST_KERNEL_CR3 \scratch_reg
movq \scratch_reg, %cr3
.Ldone_\@:
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
/*
* KERNEL pages can always resume with NOFLUSH as we do
* explicit flushes.
*/
bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
jnc .Lnoflush_\@
/*
* Check if there's a pending flush for the user ASID we're
* about to set.
*/
movq \save_reg, \scratch_reg
andq $(0x7FF), \scratch_reg
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
jnc .Lnoflush_\@
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
jmp .Lwrcr3_\@
.Lnoflush_\@:
SET_NOFLUSH_BIT \save_reg
.Lwrcr3_\@:
/*
* The CR3 write could be avoided when not changing its value,
* but would require a CR3 read *and* a scratch register.
*/
movq \save_reg, %cr3
.Lend_\@:
.endm
#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
.endm
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
.endm
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
.endm
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
.endm
.macro RESTORE_CR3 scratch_reg:req save_reg:req
.endm
#endif
#endif /* CONFIG_X86_64 */
/*

View file

@ -23,7 +23,6 @@
#include <asm/segment.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/msr.h>
#include <asm/unistd.h>
@ -40,6 +39,8 @@
#include <asm/frame.h>
#include <linux/err.h>
#include "calling.h"
.code64
.section .entry.text, "ax"
@ -164,6 +165,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
/* Stash the user RSP. */
movq %rsp, RSP_SCRATCH
/* Note: using %rsp as a scratch reg. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
/* Load the top of the task stack into RSP */
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
@ -203,6 +207,10 @@ ENTRY(entry_SYSCALL_64)
*/
swapgs
/*
* This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
* is not required to switch CR3.
*/
movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@ -399,6 +407,7 @@ syscall_return_via_sysret:
* We are on the trampoline stack. All regs except RDI are live.
* We can do future final exit work right here.
*/
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
popq %rdi
popq %rsp
@ -736,6 +745,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
* We can do future final exit work right here.
*/
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
/* Restore RDI. */
popq %rdi
SWAPGS
@ -818,7 +829,9 @@ native_irq_return_ldt:
*/
pushq %rdi /* Stash user RDI */
SWAPGS
SWAPGS /* to kernel GS */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
movq PER_CPU_VAR(espfix_waddr), %rdi
movq %rax, (0*8)(%rdi) /* user RAX */
movq (1*8)(%rsp), %rax /* user RIP */
@ -834,7 +847,6 @@ native_irq_return_ldt:
/* Now RAX == RSP. */
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
popq %rdi /* Restore user RDI */
/*
* espfix_stack[31:16] == 0. The page tables are set up such that
@ -845,7 +857,11 @@ native_irq_return_ldt:
* still points to an RO alias of the ESPFIX stack.
*/
orq PER_CPU_VAR(espfix_stack), %rax
SWAPGS
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
SWAPGS /* to user GS */
popq %rdi /* Restore user RDI */
movq %rax, %rsp
UNWIND_HINT_IRET_REGS offset=8
@ -945,6 +961,8 @@ ENTRY(switch_to_thread_stack)
UNWIND_HINT_FUNC
pushq %rdi
/* Need to switch before accessing the thread stack. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
@ -1244,7 +1262,11 @@ ENTRY(paranoid_entry)
js 1f /* negative -> in kernel */
SWAPGS
xorl %ebx, %ebx
1: ret
1:
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
ret
END(paranoid_entry)
/*
@ -1266,6 +1288,7 @@ ENTRY(paranoid_exit)
testl %ebx, %ebx /* swapgs needed? */
jnz .Lparanoid_exit_no_swapgs
TRACE_IRQS_IRETQ
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
jmp .Lparanoid_exit_restore
.Lparanoid_exit_no_swapgs:
@ -1293,6 +1316,8 @@ ENTRY(error_entry)
* from user mode due to an IRET fault.
*/
SWAPGS
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
.Lerror_entry_from_usermode_after_swapgs:
/* Put us onto the real thread stack. */
@ -1339,6 +1364,7 @@ ENTRY(error_entry)
* .Lgs_change's error handler with kernel gsbase.
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
jmp .Lerror_entry_done
.Lbstep_iret:
@ -1348,10 +1374,11 @@ ENTRY(error_entry)
.Lerror_bad_iret:
/*
* We came from an IRET to user mode, so we have user gsbase.
* Switch to kernel gsbase:
* We came from an IRET to user mode, so we have user
* gsbase and CR3. Switch to kernel gsbase and CR3:
*/
SWAPGS
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
/*
* Pretend that the exception came from user mode: set up pt_regs
@ -1383,6 +1410,10 @@ END(error_exit)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
* so we can use real assembly here.
*
* Registers:
* %r14: Used to save/restore the CR3 of the interrupted context
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
ENTRY(nmi)
UNWIND_HINT_IRET_REGS
@ -1446,6 +1477,7 @@ ENTRY(nmi)
swapgs
cld
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
movq %rsp, %rdx
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
UNWIND_HINT_IRET_REGS base=%rdx offset=8
@ -1698,6 +1730,8 @@ end_repeat_nmi:
movq $-1, %rsi
call do_nmi
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
testl %ebx, %ebx /* swapgs needed? */
jnz nmi_restore
nmi_swapgs:

View file

@ -49,6 +49,10 @@
ENTRY(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS
/* We are about to clobber %rsp anyway, clobbering here is OK */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
/*
@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq $0 /* pt_regs->r14 = 0 */
pushq $0 /* pt_regs->r15 = 0 */
/*
* We just saved %rdi so it is safe to clobber. It is not
* preserved during the C calls inside TRACE_IRQS_OFF anyway.
*/
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
/*
* User mode is traced as though IRQs are on, and SYSENTER
* turned them off.
@ -256,10 +266,22 @@ sysret32_from_system_call:
* when the system call started, which is already known to user
* code. We zero R8-R10 to avoid info leaks.
*/
movq RSP-ORIG_RAX(%rsp), %rsp
/*
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
* on the process stack which is not mapped to userspace and
* not readable after we SWITCH_TO_USER_CR3. Delay the CR3
* switch until after after the last reference to the process
* stack.
*
* %r8/%r9 are zeroed before the sysret, thus safe to clobber.
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
xorq %r8, %r8
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
swapgs
sysretl
END(entry_SYSCALL_compat)

View file

@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr)
* vsyscalls but leave the page not present. If so, we skip calling
* this.
*/
static void __init set_vsyscall_pgtable_user_bits(void)
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
{
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset_k(VSYSCALL_ADDR);
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
@ -373,7 +373,7 @@ void __init map_vsyscall(void)
vsyscall_mode == NATIVE
? PAGE_KERNEL_VSYSCALL
: PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits();
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=

View file

@ -3,16 +3,18 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/insn.h>
#include "../perf_event.h"
/* Waste a full page so it can be mapped into the cpu_entry_area */
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_FIXUP_SIZE PAGE_SIZE
/*
@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
static DEFINE_PER_CPU(void *, insn_buffer);
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
{
phys_addr_t pa;
size_t msz = 0;
pa = virt_to_phys(addr);
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, pa, prot);
}
static void ds_clear_cea(void *cea, size_t size)
{
size_t msz = 0;
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
}
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
{
unsigned int order = get_order(size);
int node = cpu_to_node(cpu);
struct page *page;
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
return page ? page_address(page) : NULL;
}
static void dsfree_pages(const void *buffer, size_t size)
{
if (buffer)
free_pages((unsigned long)buffer, get_order(size));
}
static int alloc_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max;
void *buffer, *ibuffer;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
size_t bsiz = x86_pmu.pebs_buffer_size;
int max, node = cpu_to_node(cpu);
void *buffer, *ibuffer, *cea;
if (!x86_pmu.pebs)
return 0;
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
if (unlikely(!buffer))
return -ENOMEM;
@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
if (x86_pmu.intel_cap.pebs_format < 2) {
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
if (!ibuffer) {
kfree(buffer);
dsfree_pages(buffer, bsiz);
return -ENOMEM;
}
per_cpu(insn_buffer, cpu) = ibuffer;
}
max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
hwev->ds_pebs_vaddr = buffer;
/* Update the cpu entry area mapping */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds->pebs_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
ds->pebs_index = ds->pebs_buffer_base;
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
max * x86_pmu.pebs_record_size;
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
return 0;
}
static void release_pebs_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea;
if (!ds || !x86_pmu.pebs)
return;
@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
kfree(per_cpu(insn_buffer, cpu));
per_cpu(insn_buffer, cpu) = NULL;
kfree((void *)(unsigned long)ds->pebs_buffer_base);
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
ds->pebs_buffer_base = 0;
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
hwev->ds_pebs_vaddr = NULL;
}
static int alloc_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
int node = cpu_to_node(cpu);
int max, thresh;
void *buffer;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *buffer, *cea;
int max;
if (!x86_pmu.bts)
return 0;
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
if (unlikely(!buffer)) {
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
return -ENOMEM;
}
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
thresh = max / 16;
ds->bts_buffer_base = (u64)(unsigned long)buffer;
hwev->ds_bts_vaddr = buffer;
/* Update the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds->bts_buffer_base = (unsigned long) cea;
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
ds->bts_index = ds->bts_buffer_base;
ds->bts_absolute_maximum = ds->bts_buffer_base +
max * BTS_RECORD_SIZE;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
thresh * BTS_RECORD_SIZE;
max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
ds->bts_absolute_maximum = ds->bts_buffer_base + max;
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
return 0;
}
static void release_bts_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
struct debug_store *ds = hwev->ds;
void *cea;
if (!ds || !x86_pmu.bts)
return;
kfree((void *)(unsigned long)ds->bts_buffer_base);
/* Clear the fixmap */
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
ds_clear_cea(cea, BTS_BUFFER_SIZE);
ds->bts_buffer_base = 0;
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
hwev->ds_bts_vaddr = NULL;
}
static int alloc_ds_buffer(int cpu)
{
int node = cpu_to_node(cpu);
struct debug_store *ds;
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
if (unlikely(!ds))
return -ENOMEM;
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
memset(ds, 0, sizeof(*ds));
per_cpu(cpu_hw_events, cpu).ds = ds;
return 0;
}
static void release_ds_buffer(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
return;
per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree(ds);
}
void release_ds_buffers(void)

View file

@ -14,6 +14,8 @@
#include <linux/perf_event.h>
#include <asm/intel_ds.h>
/* To enable MSR tracing please use the generic trace points. */
/*
@ -77,8 +79,6 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
/*
@ -95,23 +95,6 @@ struct amd_nb {
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
*/
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
#define PEBS_REGS \
(PERF_REG_X86_AX | \
PERF_REG_X86_BX | \
@ -216,6 +199,8 @@ struct cpu_hw_events {
* Intel DebugStore bits
*/
struct debug_store *ds;
void *ds_pebs_vaddr;
void *ds_bts_vaddr;
u64 pebs_enabled;
int n_pebs;
int n_large_pebs;

View file

@ -5,6 +5,7 @@
#include <linux/percpu-defs.h>
#include <asm/processor.h>
#include <asm/intel_ds.h>
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
@ -40,6 +41,18 @@ struct cpu_entry_area {
*/
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
*/
struct debug_store cpu_debug_store;
/*
* The actual PEBS/BTS buffers must be mapped to user space
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
#endif
};
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))

View file

@ -197,11 +197,12 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
@ -340,5 +341,6 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_INSECURE X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
#endif /* _ASM_X86_CPUFEATURES_H */

View file

@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->type = (info->read_exec_only ^ 1) << 1;
desc->type |= info->contents << 2;
/* Set the ACCESS bit so it can be mapped RO */
desc->type |= 1;
desc->s = 1;
desc->dpl = 0x3;

View file

@ -44,6 +44,12 @@
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
#endif
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define DISABLE_PTI 0
#else
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
/*
* Make sure to add features to the correct mask
*/
@ -54,7 +60,7 @@
#define DISABLED_MASK4 (DISABLE_PCID)
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 0
#define DISABLED_MASK7 (DISABLE_PTI)
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0

View file

@ -0,0 +1,36 @@
#ifndef _ASM_INTEL_DS_H
#define _ASM_INTEL_DS_H
#include <linux/percpu-defs.h>
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
/*
* A debug store configuration.
*
* We only support architectures that use 64bit fields.
*/
struct debug_store {
u64 bts_buffer_base;
u64 bts_index;
u64 bts_absolute_maximum;
u64 bts_interrupt_threshold;
u64 pebs_buffer_base;
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
u64 pebs_event_reset[MAX_PEBS_EVENTS];
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
struct debug_store_buffers {
char bts_buffer[BTS_BUFFER_SIZE];
char pebs_buffer[PEBS_BUFFER_SIZE];
};
#endif

View file

@ -50,10 +50,33 @@ struct ldt_struct {
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
unsigned int nr_entries;
struct desc_struct *entries;
unsigned int nr_entries;
/*
* If PTI is in use, then the entries array is not mapped while we're
* in user mode. The whole array will be aliased at the addressed
* given by ldt_slot_va(slot). We use two slots so that we can allocate
* and map, and enable a new LDT without invalidating the mapping
* of an older, still-in-use LDT.
*
* slot will be -1 if this LDT doesn't have an alias mapping.
*/
int slot;
};
/* This is a multiple of PAGE_SIZE. */
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
static inline void *ldt_slot_va(int slot)
{
#ifdef CONFIG_X86_64
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
#else
BUG();
#endif
}
/*
* Used for LDT copy/destruction.
*/
@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm)
}
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
void ldt_arch_exit_mmap(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline void init_new_context_ldt(struct mm_struct *mm) { }
static inline int ldt_dup_context(struct mm_struct *oldmm,
@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm,
{
return 0;
}
static inline void destroy_context_ldt(struct mm_struct *mm) {}
static inline void destroy_context_ldt(struct mm_struct *mm) { }
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->nr_entries);
else
if (unlikely(ldt)) {
if (static_cpu_has(X86_FEATURE_PTI)) {
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
/*
* Whoops -- either the new LDT isn't mapped
* (if slot == -1) or is mapped into a bogus
* slot (if slot > 1).
*/
clear_LDT();
return;
}
/*
* If page table isolation is enabled, ldt->entries
* will not be mapped in the userspace pagetables.
* Tell the CPU to access the LDT through the alias
* at ldt_slot_va(ldt->slot).
*/
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
} else {
set_ldt(ldt->entries, ldt->nr_entries);
}
} else {
clear_LDT();
}
#else
clear_LDT();
#endif
@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
static inline void arch_exit_mmap(struct mm_struct *mm)
{
paravirt_arch_exit_mmap(mm);
ldt_arch_exit_mmap(mm);
}
#ifdef CONFIG_X86_64

View file

@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/
extern gfp_t __userpte_alloc_gfp;
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
* in a pointer to swap between the two 4k halves.
*/
#define PGD_ALLOCATION_ORDER 1
#else
#define PGD_ALLOCATION_ORDER 0
#endif
/*
* Allocate and free page tables.
*/

View file

@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
void ptdump_walk_pgd_level_checkwx(void);
#ifdef CONFIG_DEBUG_WX
@ -846,7 +847,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
static inline int p4d_bad(p4d_t p4d)
{
return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (p4d_flags(p4d) & ~ignore_flags) != 0;
}
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
@ -880,7 +886,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
static inline int pgd_bad(pgd_t pgd)
{
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
unsigned long ignore_flags = _PAGE_USER;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
}
static inline int pgd_none(pgd_t pgd)
@ -909,7 +920,11 @@ static inline int pgd_none(pgd_t pgd)
* pgd_offset() returns a (pgd_t *)
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
*/
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
/*
* a shortcut to get a pgd_t in a given mm
*/
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
/*
* a shortcut which implies the use of the kernel's pgd, instead
* of a process's
@ -1111,7 +1126,14 @@ static inline int pud_write(pud_t pud)
*/
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t));
memcpy(dst, src, count * sizeof(pgd_t));
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/* Clone the user space pgd as well */
memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
count * sizeof(pgd_t));
#endif
}
#define PTE_SHIFT ilog2(PTRS_PER_PTE)

View file

@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
#endif
}
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
* the user one is in the last 4k. To switch between them, you
* just need to flip the 12th bit in their addresses.
*/
#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
/*
* This generates better code than the inline assembly in
* __set_bit().
*/
static inline void *ptr_set_bit(void *ptr, int bit)
{
unsigned long __ptr = (unsigned long)ptr;
__ptr |= BIT(bit);
return (void *)__ptr;
}
static inline void *ptr_clear_bit(void *ptr, int bit)
{
unsigned long __ptr = (unsigned long)ptr;
__ptr &= ~BIT(bit);
return (void *)__ptr;
}
static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
{
return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}
static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
{
return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
}
static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
{
return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
{
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
}
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
*
* Returns true for parts of the PGD that map userspace and
* false for the parts that map the kernel.
*/
static inline bool pgdp_maps_userspace(void *__ptr)
{
unsigned long ptr = (unsigned long)__ptr;
return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
}
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
/*
* Take a PGD location (pgdp) and a pgd value that needs to be set there.
* Populates the user and returns the resulting PGD that must be set in
* the kernel copy of the page tables.
*/
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
if (!static_cpu_has(X86_FEATURE_PTI))
return pgd;
return __pti_set_user_pgd(pgdp, pgd);
}
#else
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
return pgd;
}
#endif
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
#else
*p4dp = p4d;
#endif
}
static inline void native_p4d_clear(p4d_t *p4d)
@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
*pgdp = pti_set_user_pgd(pgdp, pgd);
#else
*pgdp = pgd;
#endif
}
static inline void native_pgd_clear(pgd_t *pgd)

View file

@ -79,13 +79,17 @@ typedef struct { pteval_t pte; } pte_t;
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
#ifdef CONFIG_X86_5LEVEL
# define VMALLOC_SIZE_TB _AC(16384, UL)
# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
# define VMALLOC_SIZE_TB _AC(12800, UL)
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
# define LDT_PGD_ENTRY _AC(-112, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#else
# define VMALLOC_SIZE_TB _AC(32, UL)
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
# define LDT_PGD_ENTRY _AC(-4, UL)
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#endif
#ifdef CONFIG_RANDOMIZE_MEMORY

View file

@ -38,6 +38,11 @@
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
#define CR3_PCID_MASK 0xFFFull
#define CR3_NOFLUSH BIT_ULL(63)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define X86_CR3_PTI_SWITCH_BIT 11
#endif
#else
/*
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save

View file

@ -851,13 +851,22 @@ static inline void spin_lock_prefetch(const void *x)
#else
/*
* User space process size. 47bits minus one guard page. The guard
* page is necessary on Intel CPUs: if a SYSCALL instruction is at
* the highest possible canonical userspace address, then that
* syscall will enter the kernel with a non-canonical return
* address, and SYSRET will explode dangerously. We avoid this
* particular problem by preventing anything from being mapped
* at the maximum canonical address.
* User space process size. This is the first address outside the user range.
* There are a few constraints that determine this:
*
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
* CPUs malfunction if they execute code from the highest canonical page.
* They'll speculate right off the end of the canonical space, and
* bad things happen. This is worked around in the same way as the
* Intel problem.
*
* With page table isolation enabled, we map the LDT in ... [stay tuned]
*/
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)

View file

@ -0,0 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
#ifdef CONFIG_PAGE_TABLE_ISOLATION
extern void pti_init(void);
extern void pti_check_boottime_disable(void);
#else
static inline void pti_check_boottime_disable(void) { }
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PTI_H */

View file

@ -10,38 +10,90 @@
#include <asm/special_insns.h>
#include <asm/smp.h>
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
* Bump the generation count. This also serves as a full barrier
* that synchronizes with switch_mm(): callers are required to order
* their read of mm_cpumask after their writes to the paging
* structures.
*/
return atomic64_inc_return(&mm->context.tlb_gen);
}
/*
* The x86 feature is called PCID (Process Context IDentifier). It is similar
* to what is traditionally called ASID on the RISC processors.
*
* We don't use the traditional ASID implementation, where each process/mm gets
* its own ASID and flush/restart when we run out of ASID space.
*
* Instead we have a small per-cpu array of ASIDs and cache the last few mm's
* that came by on this CPU, allowing cheaper switch_mm between processes on
* this CPU.
*
* We end up with different spaces for different things. To avoid confusion we
* use different names for each of them:
*
* ASID - [0, TLB_NR_DYN_ASIDS-1]
* the canonical identifier for an mm
*
* kPCID - [1, TLB_NR_DYN_ASIDS]
* the value we write into the PCID part of CR3; corresponds to the
* ASID+1, because PCID 0 is special.
*
* uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
* for KPTI each mm has two address spaces and thus needs two
* PCID values, but we can still do with a single ASID denomination
* for each mm. Corresponds to kPCID + 2048.
*
*/
/* There are 12 bits of space for ASIDS in CR3 */
#define CR3_HW_ASID_BITS 12
/*
* When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
* user/kernel switches
*/
#define PTI_CONSUMED_ASID_BITS 0
#ifdef CONFIG_PAGE_TABLE_ISOLATION
# define PTI_CONSUMED_PCID_BITS 1
#else
# define PTI_CONSUMED_PCID_BITS 0
#endif
#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
/*
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
* for them being zero-based. Another -1 is because ASID 0 is reserved for
* for them being zero-based. Another -1 is because PCID 0 is reserved for
* use by non-PCID-aware users.
*/
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
/*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
* lines.
*/
#define TLB_NR_DYN_ASIDS 6
/*
* Given @asid, compute kPCID
*/
static inline u16 kern_pcid(u16 asid)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Make sure that the dynamic ASID space does not confict with the
* bit we are using to switch between user and kernel ASIDs.
*/
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
/*
* The ASID being passed in here should have respected the
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
*/
VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
#endif
/*
* The dynamically-assigned ASIDs that get passed in are small
* (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
* so do not bother to clear it.
*
* If PCID is on, ASID-aware code paths put the ASID+1 into the
* PCID bits. This serves two purposes. It prevents a nasty
* situation in which PCID-unaware code saves CR3, loads some other
@ -53,6 +105,18 @@ static inline u16 kern_pcid(u16 asid)
return asid + 1;
}
/*
* Given @asid, compute uPCID
*/
static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
#endif
return ret;
}
struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
@ -95,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
return !static_cpu_has(X86_FEATURE_PCID);
}
/*
* 6 because 6 should be plenty and struct tlb_state will fit in
* two cache lines.
*/
#define TLB_NR_DYN_ASIDS 6
struct tlb_context {
u64 ctx_id;
u64 tlb_gen;
@ -134,6 +192,24 @@ struct tlb_state {
*/
bool is_lazy;
/*
* If set we changed the page tables in such a way that we
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
* This tells us to go invalidate all the non-loaded ctxs[]
* on the next context switch.
*
* The current ctx was kept up-to-date as it ran and does not
* need to be invalidated.
*/
bool invalidate_other;
/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
* switch to it; see SWITCH_TO_USER_CR3.
*/
unsigned short user_pcid_flush_mask;
/*
* Access to this CR4 shadow and to H/W CR4 is protected by
* disabling interrupts when modifying either one.
@ -211,6 +287,14 @@ static inline unsigned long cr4_read_shadow(void)
return this_cpu_read(cpu_tlbstate.cr4);
}
/*
* Mark all other ASIDs as invalid, preserves the current.
*/
static inline void invalidate_other_asid(void)
{
this_cpu_write(cpu_tlbstate.invalidate_other, true);
}
/*
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
* enable and PPro Global page enable), so that any CPU's that boot
@ -230,19 +314,48 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
extern void initialize_tlbstate_and_flush(void);
/*
* Given an ASID, flush the corresponding user ASID. We can delay this
* until the next time we switch to it.
*
* See SWITCH_TO_USER_CR3.
*/
static inline void invalidate_user_asid(u16 asid)
{
/* There is no user ASID if address space separation is off */
if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
return;
/*
* We only have a single ASID if PCID is off and the CR3
* write will have flushed it.
*/
if (!cpu_feature_enabled(X86_FEATURE_PCID))
return;
if (!static_cpu_has(X86_FEATURE_PTI))
return;
__set_bit(kern_pcid(asid),
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}
/*
* flush the entire current user mapping
*/
static inline void __native_flush_tlb(void)
{
/*
* If current->mm == NULL then we borrow a mm which may change during a
* task switch and therefore we must not be preempted while we write CR3
* back:
* Preemption or interrupts must be disabled to protect the access
* to the per CPU variable and to prevent being preempted between
* read_cr3() and write_cr3().
*/
preempt_disable();
WARN_ON_ONCE(preemptible());
invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
/* If current->mm == NULL then the read_cr3() "borrows" an mm */
native_write_cr3(__native_read_cr3());
preempt_enable();
}
/*
@ -256,6 +369,8 @@ static inline void __native_flush_tlb_global(void)
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
*
* Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
@ -282,7 +397,21 @@ static inline void __native_flush_tlb_global(void)
*/
static inline void __native_flush_tlb_single(unsigned long addr)
{
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
* Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
* Just use invalidate_user_asid() in case we are called early.
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
invalidate_user_asid(loaded_mm_asid);
else
invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}
/*
@ -298,14 +427,6 @@ static inline void __flush_tlb_all(void)
*/
__flush_tlb();
}
/*
* Note: if we somehow had PCID but not PGE, then this wouldn't work --
* we'd end up flushing kernel translations for the current ASID but
* we might fail to flush kernel translations for other cached ASIDs.
*
* To avoid this issue, we force PCID off if PGE is off.
*/
}
/*
@ -315,6 +436,16 @@ static inline void __flush_tlb_one(unsigned long addr)
{
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
__flush_tlb_single(addr);
if (!static_cpu_has(X86_FEATURE_PTI))
return;
/*
* __flush_tlb_single() will have cleared the TLB entry for this ASID,
* but since kernel space is replicated across all, we must also
* invalidate all others.
*/
invalidate_other_asid();
}
#define TLB_FLUSH_ALL -1UL
@ -375,6 +506,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
void native_flush_tlb_others(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
{
/*
* Bump the generation count. This also serves as a full barrier
* that synchronizes with switch_mm(): callers are required to order
* their read of mm_cpumask after their writes to the paging
* structures.
*/
return atomic64_inc_return(&mm->context.tlb_gen);
}
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
struct mm_struct *mm)
{

View file

@ -7,6 +7,7 @@
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
/*
* Called on instruction fetch fault in vsyscall page.

View file

@ -78,7 +78,12 @@
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
#define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
/*
* Intel CPU features in CR4

View file

@ -17,6 +17,7 @@
#include <asm/sigframe.h>
#include <asm/bootparam.h>
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
@ -94,6 +95,9 @@ void common(void) {
BLANK();
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
/* TLB state for the entry code */
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
/* Layout info for cpu_entry_area */
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);

View file

@ -898,6 +898,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
/* Assume for now that ALL x86 CPUs are insecure */
setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
fpu__init_system(c);
#ifdef CONFIG_X86_32
@ -1335,7 +1339,10 @@ void syscall_init(void)
(entry_SYSCALL_64_trampoline - _entry_trampoline);
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
if (static_cpu_has(X86_FEATURE_PTI))
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
else
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);

View file

@ -297,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned long sp;
#endif
printk(KERN_DEFAULT
"%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)

View file

@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
.balign PAGE_SIZE; \
GLOBAL(name)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Each PGD needs to be 8k long and 8k aligned. We do not
* ever go out to userspace with these, so we do not
* strictly *need* the second page, but this allows us to
* have a single set_pgd() implementation that does not
* need to worry about whether it has 4k or 8k to work
* with.
*
* This ensures PGDs are 8k long:
*/
#define PTI_USER_PGD_FILL 512
/* This ensures they are 8k-aligned: */
#define NEXT_PGD_PAGE(name) \
.balign 2 * PAGE_SIZE; \
GLOBAL(name)
#else
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
#define PTI_USER_PGD_FILL 0
#endif
/* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \
i = 0 ; \
@ -350,13 +371,14 @@ GLOBAL(name)
.endr
__INITDATA
NEXT_PAGE(early_top_pgt)
NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0
#ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
.data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PAGE(init_top_pgt)
NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else
NEXT_PAGE(init_top_pgt)
NEXT_PGD_PAGE(init_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
#endif
#ifdef CONFIG_X86_5LEVEL

View file

@ -24,6 +24,7 @@
#include <linux/uaccess.h>
#include <asm/ldt.h>
#include <asm/tlb.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
@ -51,13 +52,11 @@ static void refresh_ldt_segments(void)
static void flush_ldt(void *__mm)
{
struct mm_struct *mm = __mm;
mm_context_t *pc;
if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
return;
pc = &mm->context;
set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
load_mm_ldt(mm);
refresh_ldt_segments();
}
@ -94,10 +93,126 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
return NULL;
}
/* The new LDT isn't aliased for PTI yet. */
new_ldt->slot = -1;
new_ldt->nr_entries = num_entries;
return new_ldt;
}
/*
* If PTI is enabled, this maps the LDT into the kernelmode and
* usermode tables for the given mm.
*
* There is no corresponding unmap function. Even if the LDT is freed, we
* leave the PTEs around until the slot is reused or the mm is destroyed.
* This is harmless: the LDT is always in ordinary memory, and no one will
* access the freed slot.
*
* If we wanted to unmap freed LDTs, we'd also need to do a flush to make
* it useful, and the flush would slow down modify_ldt().
*/
static int
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
bool is_vmalloc, had_top_level_entry;
unsigned long va;
spinlock_t *ptl;
pgd_t *pgd;
int i;
if (!static_cpu_has(X86_FEATURE_PTI))
return 0;
/*
* Any given ldt_struct should have map_ldt_struct() called at most
* once.
*/
WARN_ON(ldt->slot != -1);
/*
* Did we already have the top level entry allocated? We can't
* use pgd_none() for this because it doens't do anything on
* 4-level page table kernels.
*/
pgd = pgd_offset(mm, LDT_BASE_ADDR);
had_top_level_entry = (pgd->pgd != 0);
is_vmalloc = is_vmalloc_addr(ldt->entries);
for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
unsigned long offset = i << PAGE_SHIFT;
const void *src = (char *)ldt->entries + offset;
unsigned long pfn;
pte_t pte, *ptep;
va = (unsigned long)ldt_slot_va(slot) + offset;
pfn = is_vmalloc ? vmalloc_to_pfn(src) :
page_to_pfn(virt_to_page(src));
/*
* Treat the PTI LDT range as a *userspace* range.
* get_locked_pte() will allocate all needed pagetables
* and account for them in this mm.
*/
ptep = get_locked_pte(mm, va, &ptl);
if (!ptep)
return -ENOMEM;
/*
* Map it RO so the easy to find address is not a primary
* target via some kernel interface which misses a
* permission check.
*/
pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
set_pte_at(mm, va, ptep, pte);
pte_unmap_unlock(ptep, ptl);
}
if (mm->context.ldt) {
/*
* We already had an LDT. The top-level entry should already
* have been allocated and synchronized with the usermode
* tables.
*/
WARN_ON(!had_top_level_entry);
if (static_cpu_has(X86_FEATURE_PTI))
WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
} else {
/*
* This is the first time we're mapping an LDT for this process.
* Sync the pgd to the usermode tables.
*/
WARN_ON(had_top_level_entry);
if (static_cpu_has(X86_FEATURE_PTI)) {
WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}
}
va = (unsigned long)ldt_slot_va(slot);
flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
ldt->slot = slot;
#endif
return 0;
}
static void free_ldt_pgtables(struct mm_struct *mm)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
struct mmu_gather tlb;
unsigned long start = LDT_BASE_ADDR;
unsigned long end = start + (1UL << PGDIR_SHIFT);
if (!static_cpu_has(X86_FEATURE_PTI))
return;
tlb_gather_mmu(&tlb, mm, start, end);
free_pgd_range(&tlb, start, end, start, end);
tlb_finish_mmu(&tlb, start, end);
#endif
}
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
@ -156,6 +271,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
new_ldt->nr_entries * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
retval = map_ldt_struct(mm, new_ldt, 0);
if (retval) {
free_ldt_pgtables(mm);
free_ldt_struct(new_ldt);
goto out_unlock;
}
mm->context.ldt = new_ldt;
out_unlock:
@ -174,6 +295,11 @@ void destroy_context_ldt(struct mm_struct *mm)
mm->context.ldt = NULL;
}
void ldt_arch_exit_mmap(struct mm_struct *mm)
{
free_ldt_pgtables(mm);
}
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
struct mm_struct *mm = current->mm;
@ -287,6 +413,25 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
/*
* If we are using PTI, map the new LDT into the userspace pagetables.
* If there is already an LDT, use the other slot so that other CPUs
* will continue to use the old LDT until install_ldt() switches
* them over to the new LDT.
*/
error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
if (error) {
/*
* This only can fail for the first LDT setup. If an LDT is
* already installed then the PTE page is already
* populated. Mop up a half populated page table.
*/
if (!WARN_ON_ONCE(old_ldt))
free_ldt_pgtables(mm);
free_ldt_struct(new_ldt);
goto out_unlock;
}
install_ldt(mm, new_ldt);
free_ldt_struct(old_ldt);
error = 0;

View file

@ -48,8 +48,6 @@ static void load_segments(void)
"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
"\tmovl %%eax,%%ds\n"
"\tmovl %%eax,%%es\n"
"\tmovl %%eax,%%fs\n"
"\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
: : : "eax", "memory");
#undef STR
@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
* The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
set_gdt(phys_to_virt(0), 0);
idt_invalidate(phys_to_virt(0));
set_gdt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,

View file

@ -128,25 +128,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
spin_unlock_irqrestore(&rtc_lock, flags);
local_flush_tlb();
pr_debug("1.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
pr_debug("2.\n");
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
pr_debug("3.\n");
}
static inline void smpboot_restore_warm_reset_vector(void)
{
unsigned long flags;
/*
* Install writable page 0 entry to set BIOS data area.
*/
local_flush_tlb();
/*
* Paranoid: Set warm reset code and vector here back
* to default values.

View file

@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
cpu = get_cpu();
while (n-- > 0) {
if (LDT_empty(info) || LDT_zero(info)) {
if (LDT_empty(info) || LDT_zero(info))
memset(desc, 0, sizeof(*desc));
} else {
else
fill_ldt(desc, info);
/*
* Always set the accessed bit so that the CPU
* doesn't try to write to the (read-only) GDT.
*/
desc->type |= 1;
}
++info;
++desc;
}

View file

@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
*
* No need for ist_enter here because we don't use RCU.
*/
if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
regs->cs == __KERNEL_CS &&
regs->ip == (unsigned long)native_irq_return_iret)
{

View file

@ -61,11 +61,17 @@ jiffies_64 = jiffies;
. = ALIGN(HPAGE_SIZE); \
__end_rodata_hpage_align = .;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
#else
#define X64_ALIGN_RODATA_BEGIN
#define X64_ALIGN_RODATA_END
#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
#endif
PHDRS {
@ -102,8 +108,10 @@ SECTIONS
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
ALIGN_ENTRY_TEXT_BEGIN
ENTRY_TEXT
IRQENTRY_TEXT
ALIGN_ENTRY_TEXT_END
SOFTIRQENTRY_TEXT
*(.fixup)
*(.gnu.warning)

View file

@ -43,9 +43,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
obj-$(CONFIG_ACPI_NUMA) += srat.o
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o

View file

@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
static void percpu_setup_debug_store(int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
int npages;
void *cea;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return;
cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
npages = sizeof(struct debug_store) / PAGE_SIZE;
BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
PAGE_KERNEL);
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
/*
* Force the population of PMDs for not yet allocated per cpu
* memory like debug store buffers.
*/
npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
for (; npages; npages--, cea += PAGE_SIZE)
cea_set_pte(cea, 0, PAGE_NONE);
#endif
}
/* Setup the fixmap mappings only once per-processor */
static void __init setup_cpu_entry_area(int cpu)
{
@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu)
cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
#endif
percpu_setup_debug_store(cpu);
}
static __init void setup_cpu_entry_area_ptes(void)

View file

@ -5,7 +5,7 @@
static int ptdump_show(struct seq_file *m, void *v)
{
ptdump_walk_pgd_level(m, NULL);
ptdump_walk_pgd_level_debugfs(m, NULL, false);
return 0;
}
@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
.release = single_release,
};
static struct dentry *pe;
static int ptdump_show_curknl(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
down_read(&current->mm->mmap_sem);
ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
up_read(&current->mm->mmap_sem);
}
return 0;
}
static int ptdump_open_curknl(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show_curknl, NULL);
}
static const struct file_operations ptdump_curknl_fops = {
.owner = THIS_MODULE,
.open = ptdump_open_curknl,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#ifdef CONFIG_PAGE_TABLE_ISOLATION
static struct dentry *pe_curusr;
static int ptdump_show_curusr(struct seq_file *m, void *v)
{
if (current->mm->pgd) {
down_read(&current->mm->mmap_sem);
ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
up_read(&current->mm->mmap_sem);
}
return 0;
}
static int ptdump_open_curusr(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show_curusr, NULL);
}
static const struct file_operations ptdump_curusr_fops = {
.owner = THIS_MODULE,
.open = ptdump_open_curusr,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
#endif
static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void)
{
pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
&ptdump_fops);
if (!pe)
dir = debugfs_create_dir("page_tables", NULL);
if (!dir)
return -ENOMEM;
pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
&ptdump_fops);
if (!pe_knl)
goto err;
pe_curknl = debugfs_create_file("current_kernel", 0400,
dir, NULL, &ptdump_curknl_fops);
if (!pe_curknl)
goto err;
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pe_curusr = debugfs_create_file("current_user", 0400,
dir, NULL, &ptdump_curusr_fops);
if (!pe_curusr)
goto err;
#endif
return 0;
err:
debugfs_remove_recursive(dir);
return -ENOMEM;
}
static void __exit pt_dump_debug_exit(void)
{
debugfs_remove_recursive(pe);
debugfs_remove_recursive(dir);
}
module_init(pt_dump_debug_init);

View file

@ -52,11 +52,17 @@ enum address_markers_idx {
USER_SPACE_NR = 0,
KERNEL_SPACE_NR,
LOW_KERNEL_NR,
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
VMALLOC_START_NR,
VMEMMAP_START_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
LDT_NR,
#endif
CPU_ENTRY_AREA_NR,
#ifdef CONFIG_X86_ESPFIX64
@ -81,6 +87,9 @@ static struct addr_marker address_markers[] = {
#ifdef CONFIG_KASAN
[KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
[LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
@ -467,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
}
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
bool checkwx)
bool checkwx, bool dmesg)
{
#ifdef CONFIG_X86_64
pgd_t *start = (pgd_t *) &init_top_pgt;
@ -480,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
if (pgd) {
start = pgd;
st.to_dmesg = true;
st.to_dmesg = dmesg;
}
st.check_wx = checkwx;
@ -518,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
{
ptdump_walk_pgd_level_core(m, pgd, false);
ptdump_walk_pgd_level_core(m, pgd, false, true);
}
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
if (user && static_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
ptdump_walk_pgd_level_core(m, pgd, false, false);
}
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
static void ptdump_walk_user_pgd_level_checkwx(void)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
pgd_t *pgd = (pgd_t *) &init_top_pgt;
if (!static_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
pgd = kernel_to_user_pgdp(pgd);
ptdump_walk_pgd_level_core(NULL, pgd, true, false);
#endif
}
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
void ptdump_walk_pgd_level_checkwx(void)
{
ptdump_walk_pgd_level_core(NULL, NULL, true);
ptdump_walk_pgd_level_core(NULL, NULL, true, false);
ptdump_walk_user_pgd_level_checkwx();
}
static int __init pt_dump_init(void)

View file

@ -20,6 +20,7 @@
#include <asm/kaslr.h>
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
#include <asm/pti.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@ -161,6 +162,12 @@ struct map_range {
static int page_size_mask;
static void enable_global_pages(void)
{
if (!static_cpu_has(X86_FEATURE_PTI))
__supported_pte_mask |= _PAGE_GLOBAL;
}
static void __init probe_page_size_mask(void)
{
/*
@ -179,11 +186,11 @@ static void __init probe_page_size_mask(void)
cr4_set_bits_and_update_boot(X86_CR4_PSE);
/* Enable PGE if available */
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
} else
__supported_pte_mask &= ~_PAGE_GLOBAL;
enable_global_pages();
}
/* Enable 1 GB linear kernel mappings if available: */
if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@ -196,34 +203,44 @@ static void __init probe_page_size_mask(void)
static void setup_pcid(void)
{
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCID)) {
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() --
* the trampoline code can't handle CR4.PCIDE and
* it wouldn't do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually
* cause the bits in question to remain set all the
* way through the secondary boot asm.
*
* Instead, we brute-force it and set CR4.PCIDE
* manually in start_secondary().
*/
cr4_set_bits(X86_CR4_PCIDE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
* work if PCID is on but PGE is not. Since that
* combination doesn't exist on real hardware, there's
* no reason to try to fully support it, but it's
* polite to avoid corrupting data if we're on
* an improperly configured VM.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
}
if (!IS_ENABLED(CONFIG_X86_64))
return;
if (!boot_cpu_has(X86_FEATURE_PCID))
return;
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
* trampoline code can't handle CR4.PCIDE and it wouldn't
* do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually cause
* the bits in question to remain set all the way through
* the secondary boot asm.
*
* Instead, we brute-force it and set CR4.PCIDE manually in
* start_secondary().
*/
cr4_set_bits(X86_CR4_PCIDE);
/*
* INVPCID's single-context modes (2/3) only work if we set
* X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
* on systems that have X86_CR4_PCIDE clear, or that have
* no INVPCID support at all.
*/
if (boot_cpu_has(X86_FEATURE_INVPCID))
setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't work if
* PCID is on but PGE is not. Since that combination
* doesn't exist on real hardware, there's no reason to try
* to fully support it, but it's polite to avoid corrupting
* data if we're on an improperly configured VM.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
}
#endif
}
#ifdef CONFIG_X86_32
@ -624,6 +641,7 @@ void __init init_mem_mapping(void)
{
unsigned long end;
pti_check_boottime_disable();
probe_page_size_mask();
setup_pcid();
@ -847,7 +865,7 @@ void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.loaded_mm = &init_mm,
.next_asid = 1,
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */

View file

@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd);
}
#else
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_page(PGALLOC_GFP);
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)
{
free_page((unsigned long)pgd);
free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
}
#endif /* CONFIG_X86_PAE */

387
kernel/arch/x86/mm/pti.c Normal file
View file

@ -0,0 +1,387 @@
/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* This code is based in part on work published here:
*
* https://github.com/IAIK/KAISER
*
* The original work was written by and and signed off by for the Linux
* kernel by:
*
* Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
* Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
* Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
* Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
*
* Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
* Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
* Andy Lutomirsky <luto@amacapital.net>
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
#include <asm/vsyscall.h>
#include <asm/cmdline.h>
#include <asm/pti.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
/* Backporting helper */
#ifndef __GFP_NOTRACK
#define __GFP_NOTRACK 0
#endif
static void __init pti_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
pr_info("%s\n", reason);
}
static void __init pti_print_if_secure(const char *reason)
{
if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
pr_info("%s\n", reason);
}
void __init pti_check_boottime_disable(void)
{
char arg[5];
int ret;
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
pti_print_if_insecure("disabled on XEN PV.");
return;
}
ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
if (ret > 0) {
if (ret == 3 && !strncmp(arg, "off", 3)) {
pti_print_if_insecure("disabled on command line.");
return;
}
if (ret == 2 && !strncmp(arg, "on", 2)) {
pti_print_if_secure("force enabled on command line.");
goto enable;
}
if (ret == 4 && !strncmp(arg, "auto", 4))
goto autosel;
}
if (cmdline_find_option_bool(boot_command_line, "nopti")) {
pti_print_if_insecure("disabled on command line.");
return;
}
autosel:
if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
return;
enable:
setup_force_cpu_cap(X86_FEATURE_PTI);
}
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
/*
* Changes to the high (kernel) portion of the kernelmode page
* tables are not automatically propagated to the usermode tables.
*
* Users should keep in mind that, unlike the kernelmode tables,
* there is no vmalloc_fault equivalent for the usermode tables.
* Top-level entries added to init_mm's usermode pgd after boot
* will not be automatically propagated to other mms.
*/
if (!pgdp_maps_userspace(pgdp))
return pgd;
/*
* The user page tables get the full PGD, accessible from
* userspace:
*/
kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
/*
* If this is normal user memory, make it NX in the kernel
* pagetables so that, if we somehow screw up and return to
* usermode with the kernel CR3 loaded, we'll get a page fault
* instead of allowing user code to execute with the wrong CR3.
*
* As exceptions, we don't set NX if:
* - _PAGE_USER is not set. This could be an executable
* EFI runtime mapping or something similar, and the kernel
* may execute from it
* - we don't have NX support
* - we're clearing the PGD (i.e. the new pgd is not present).
*/
if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
(__supported_pte_mask & _PAGE_NX))
pgd.pgd |= _PAGE_NX;
/* return the copy of the PGD we want the kernel to use: */
return pgd;
}
/*
* Walk the user copy of the page tables (optionally) trying to allocate
* page table pages on the way down.
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
if (address < PAGE_OFFSET) {
WARN_ONCE(1, "attempt to walk user address\n");
return NULL;
}
if (pgd_none(*pgd)) {
unsigned long new_p4d_page = __get_free_page(gfp);
if (!new_p4d_page)
return NULL;
if (pgd_none(*pgd)) {
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
new_p4d_page = 0;
}
if (new_p4d_page)
free_page(new_p4d_page);
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);
return p4d_offset(pgd, address);
}
/*
* Walk the user copy of the page tables (optionally) trying to allocate
* page table pages on the way down.
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
pud_t *pud;
BUILD_BUG_ON(p4d_large(*p4d) != 0);
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
if (!new_pud_page)
return NULL;
if (p4d_none(*p4d)) {
set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
new_pud_page = 0;
}
if (new_pud_page)
free_page(new_pud_page);
}
pud = pud_offset(p4d, address);
/* The user page tables do not use large mappings: */
if (pud_large(*pud)) {
WARN_ON(1);
return NULL;
}
if (pud_none(*pud)) {
unsigned long new_pmd_page = __get_free_page(gfp);
if (!new_pmd_page)
return NULL;
if (pud_none(*pud)) {
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
new_pmd_page = 0;
}
if (new_pmd_page)
free_page(new_pmd_page);
}
return pmd_offset(pud, address);
}
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/*
* Walk the shadow copy of the page tables (optionally) trying to allocate
* page table pages on the way down. Does not support large pages.
*
* Note: this is only used when mapping *new* kernel data into the
* user/shadow page tables. It is never used for userspace data.
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
pte_t *pte;
/* We can't do anything sensible if we hit a large mapping. */
if (pmd_large(*pmd)) {
WARN_ON(1);
return NULL;
}
if (pmd_none(*pmd)) {
unsigned long new_pte_page = __get_free_page(gfp);
if (!new_pte_page)
return NULL;
if (pmd_none(*pmd)) {
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
new_pte_page = 0;
}
if (new_pte_page)
free_page(new_pte_page);
}
pte = pte_offset_kernel(pmd, address);
if (pte_flags(*pte) & _PAGE_USER) {
WARN_ONCE(1, "attempt to walk to user pte\n");
return NULL;
}
return pte;
}
static void __init pti_setup_vsyscall(void)
{
pte_t *pte, *target_pte;
unsigned int level;
pte = lookup_address(VSYSCALL_ADDR, &level);
if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
return;
target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
if (WARN_ON(!target_pte))
return;
*target_pte = *pte;
set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
}
#else
static void __init pti_setup_vsyscall(void) { }
#endif
static void __init
pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
{
unsigned long addr;
/*
* Clone the populated PMDs which cover start to end. These PMD areas
* can have holes.
*/
for (addr = start; addr < end; addr += PMD_SIZE) {
pmd_t *pmd, *target_pmd;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pgd = pgd_offset_k(addr);
if (WARN_ON(pgd_none(*pgd)))
return;
p4d = p4d_offset(pgd, addr);
if (WARN_ON(p4d_none(*p4d)))
return;
pud = pud_offset(p4d, addr);
if (pud_none(*pud))
continue;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
continue;
target_pmd = pti_user_pagetable_walk_pmd(addr);
if (WARN_ON(!target_pmd))
return;
/*
* Copy the PMD. That is, the kernelmode and usermode
* tables will share the last-level page tables of this
* address range
*/
*target_pmd = pmd_clear_flags(*pmd, clear);
}
}
/*
* Clone a single p4d (i.e. a top-level entry on 4-level systems and a
* next-level entry on 5-level systems.
*/
static void __init pti_clone_p4d(unsigned long addr)
{
p4d_t *kernel_p4d, *user_p4d;
pgd_t *kernel_pgd;
user_p4d = pti_user_pagetable_walk_p4d(addr);
kernel_pgd = pgd_offset_k(addr);
kernel_p4d = p4d_offset(kernel_pgd, addr);
*user_p4d = *kernel_p4d;
}
/*
* Clone the CPU_ENTRY_AREA into the user space visible page table.
*/
static void __init pti_clone_user_shared(void)
{
pti_clone_p4d(CPU_ENTRY_AREA_BASE);
}
/*
* Clone the ESPFIX P4D into the user space visinble page table
*/
static void __init pti_setup_espfix64(void)
{
#ifdef CONFIG_X86_ESPFIX64
pti_clone_p4d(ESPFIX_BASE_ADDR);
#endif
}
/*
* Clone the populated PMDs of the entry and irqentry text and force it RO.
*/
static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
(unsigned long) __irqentry_text_end, _PAGE_RW);
}
/*
* Initialize kernel page table isolation
*/
void __init pti_init(void)
{
if (!static_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
pti_clone_user_shared();
pti_clone_entry_text();
pti_setup_espfix64();
pti_setup_vsyscall();
}

View file

@ -28,6 +28,38 @@
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
*/
/*
* We get here when we do something requiring a TLB invalidation
* but could not go invalidate all of the contexts. We do the
* necessary invalidation by clearing out the 'ctx_id' which
* forces a TLB flush when the context is loaded.
*/
void clear_asid_other(void)
{
u16 asid;
/*
* This is only expected to be set if we have disabled
* kernel _PAGE_GLOBAL pages.
*/
if (!static_cpu_has(X86_FEATURE_PTI)) {
WARN_ON_ONCE(1);
return;
}
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
/* Do not need to flush the current asid */
if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
continue;
/*
* Make sure the next time we go to switch to
* this asid, we do a flush:
*/
this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
}
this_cpu_write(cpu_tlbstate.invalidate_other, false);
}
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
return;
}
if (this_cpu_read(cpu_tlbstate.invalidate_other))
clear_asid_other();
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
next->context.ctx_id)
@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
*need_flush = true;
}
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
{
unsigned long new_mm_cr3;
if (need_flush) {
invalidate_user_asid(new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid);
} else {
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
}
/*
* Caution: many callers of this function expect
* that load_cr3() is serializing and orders TLB
* fills with respect to the mm_cpumask writes.
*/
write_cr3(new_mm_cr3);
}
void leave_mm(int cpu)
{
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
write_cr3(build_cr3(next->pgd, new_asid));
load_new_mm_cr3(next->pgd, new_asid, true);
/*
* NB: This gets called via leave_mm() in the idle path
@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
write_cr3(build_cr3_noflush(next->pgd, new_asid));
load_new_mm_cr3(next->pgd, new_asid, false);
/* See above wrt _rcuidle. */
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);

View file

@ -195,6 +195,9 @@ static pgd_t *efi_pgd;
* because we want to avoid inserting EFI region mappings (EFI_VA_END
* to EFI_VA_START) into the standard kernel page tables. Everything
* else can be shared, see efi_sync_low_kernel_mappings().
*
* We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
* allocation.
*/
int __init efi_alloc_page_tables(void)
{
@ -207,7 +210,7 @@ int __init efi_alloc_page_tables(void)
return 0;
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
return -ENOMEM;

View file

@ -12,22 +12,29 @@
#include "blk.h"
/*
* Append a bio to a passthrough request. Only works can be merged into
* the request based on the driver constraints.
* Append a bio to a passthrough request. Only works if the bio can be merged
* into the request based on the driver constraints.
*/
int blk_rq_append_bio(struct request *rq, struct bio *bio)
int blk_rq_append_bio(struct request *rq, struct bio **bio)
{
blk_queue_bounce(rq->q, &bio);
struct bio *orig_bio = *bio;
blk_queue_bounce(rq->q, bio);
if (!rq->bio) {
blk_rq_bio_prep(rq->q, rq, bio);
blk_rq_bio_prep(rq->q, rq, *bio);
} else {
if (!ll_back_merge_fn(rq->q, rq, bio))
if (!ll_back_merge_fn(rq->q, rq, *bio)) {
if (orig_bio != *bio) {
bio_put(*bio);
*bio = orig_bio;
}
return -EINVAL;
}
rq->biotail->bi_next = bio;
rq->biotail = bio;
rq->__data_len += bio->bi_iter.bi_size;
rq->biotail->bi_next = *bio;
rq->biotail = *bio;
rq->__data_len += (*bio)->bi_iter.bi_size;
}
return 0;
@ -80,14 +87,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
* We link the bounce buffer in and could have to traverse it
* later so we have to get a ref to prevent it from being freed
*/
ret = blk_rq_append_bio(rq, bio);
bio_get(bio);
ret = blk_rq_append_bio(rq, &bio);
if (ret) {
bio_endio(bio);
__blk_rq_unmap_user(orig_bio);
bio_put(bio);
return ret;
}
bio_get(bio);
return 0;
}
@ -220,7 +225,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
int reading = rq_data_dir(rq) == READ;
unsigned long addr = (unsigned long) kbuf;
int do_copy = 0;
struct bio *bio;
struct bio *bio, *orig_bio;
int ret;
if (len > (queue_max_hw_sectors(q) << 9))
@ -243,10 +248,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
if (do_copy)
rq->rq_flags |= RQF_COPY_USER;
ret = blk_rq_append_bio(rq, bio);
orig_bio = bio;
ret = blk_rq_append_bio(rq, &bio);
if (unlikely(ret)) {
/* request is too big */
bio_put(bio);
bio_put(orig_bio);
return ret;
}

View file

@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
unsigned i = 0;
bool bounce = false;
int sectors = 0;
bool passthrough = bio_is_passthrough(*bio_orig);
bio_for_each_segment(from, *bio_orig, iter) {
if (i++ < BIO_MAX_PAGES)
@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
if (!bounce)
return;
if (sectors < bio_sectors(*bio_orig)) {
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
bio_chain(bio, *bio_orig);
generic_make_request(*bio_orig);
*bio_orig = bio;
}
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
bounce_bio_set);
bio_for_each_segment_all(to, bio, i) {
struct page *page = to->bv_page;

View file

@ -482,7 +482,8 @@ enum binder_deferred_state {
* @tsk task_struct for group_leader of process
* (invariant after initialized)
* @files files_struct for process
* (invariant after initialized)
* (protected by @files_lock)
* @files_lock mutex to protect @files
* @deferred_work_node: element for binder_deferred_list
* (protected by binder_deferred_lock)
* @deferred_work: bitmap of deferred work to perform
@ -530,6 +531,7 @@ struct binder_proc {
int pid;
struct task_struct *tsk;
struct files_struct *files;
struct mutex files_lock;
struct hlist_node deferred_work_node;
int deferred_work;
bool is_dead;
@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
{
struct files_struct *files = proc->files;
unsigned long rlim_cur;
unsigned long irqs;
int ret;
if (files == NULL)
return -ESRCH;
if (!lock_task_sighand(proc->tsk, &irqs))
return -EMFILE;
mutex_lock(&proc->files_lock);
if (proc->files == NULL) {
ret = -ESRCH;
goto err;
}
if (!lock_task_sighand(proc->tsk, &irqs)) {
ret = -EMFILE;
goto err;
}
rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
unlock_task_sighand(proc->tsk, &irqs);
return __alloc_fd(files, 0, rlim_cur, flags);
ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
err:
mutex_unlock(&proc->files_lock);
return ret;
}
/*
@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
static void task_fd_install(
struct binder_proc *proc, unsigned int fd, struct file *file)
{
mutex_lock(&proc->files_lock);
if (proc->files)
__fd_install(proc->files, fd, file);
mutex_unlock(&proc->files_lock);
}
/*
@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
{
int retval;
if (proc->files == NULL)
return -ESRCH;
mutex_lock(&proc->files_lock);
if (proc->files == NULL) {
retval = -ESRCH;
goto err;
}
retval = __close_fd(proc->files, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
retval == -ERESTARTNOHAND ||
retval == -ERESTART_RESTARTBLOCK))
retval = -EINTR;
err:
mutex_unlock(&proc->files_lock);
return retval;
}
@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
ret = binder_alloc_mmap_handler(&proc->alloc, vma);
if (ret)
return ret;
mutex_lock(&proc->files_lock);
proc->files = get_files_struct(current);
mutex_unlock(&proc->files_lock);
return 0;
err_bad_arg:
@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
spin_lock_init(&proc->outer_lock);
get_task_struct(current->group_leader);
proc->tsk = current->group_leader;
mutex_init(&proc->files_lock);
INIT_LIST_HEAD(&proc->todo);
proc->default_priority = task_nice(current);
binder_dev = container_of(filp->private_data, struct binder_device,
@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work)
files = NULL;
if (defer & BINDER_DEFERRED_PUT_FILES) {
mutex_lock(&proc->files_lock);
files = proc->files;
if (files)
proc->files = NULL;
mutex_unlock(&proc->files_lock);
}
if (defer & BINDER_DEFERRED_FLUSH)

View file

@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
}
static bool cache_node_is_unified(struct cacheinfo *this_leaf)
{
return of_property_read_bool(this_leaf->of_node, "cache-unified");
}
static void cache_of_override_properties(unsigned int cpu)
{
int index;
@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
for (index = 0; index < cache_leaves(cpu); index++) {
this_leaf = this_cpu_ci->info_list + index;
/*
* init_cache_level must setup the cache level correctly
* overriding the architecturally specified levels, so
* if type is NONE at this stage, it should be unified
*/
if (this_leaf->type == CACHE_TYPE_NOCACHE &&
cache_node_is_unified(this_leaf))
this_leaf->type = CACHE_TYPE_UNIFIED;
cache_size(this_leaf);
cache_get_line_size(this_leaf);
cache_nr_sets(this_leaf);

View file

@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
}
if (!chip->names)
devprop_gpiochip_set_names(chip);
devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
acpi_gpiochip_request_regions(acpi_gpio);
acpi_gpiochip_scan_gpios(acpi_gpio);

View file

@ -19,30 +19,27 @@
/**
* devprop_gpiochip_set_names - Set GPIO line names using device properties
* @chip: GPIO chip whose lines should be named, if possible
* @fwnode: Property Node containing the gpio-line-names property
*
* Looks for device property "gpio-line-names" and if it exists assigns
* GPIO line names for the chip. The memory allocated for the assigned
* names belong to the underlying firmware node and should not be released
* by the caller.
*/
void devprop_gpiochip_set_names(struct gpio_chip *chip)
void devprop_gpiochip_set_names(struct gpio_chip *chip,
const struct fwnode_handle *fwnode)
{
struct gpio_device *gdev = chip->gpiodev;
const char **names;
int ret, i;
if (!chip->parent) {
dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
return;
}
ret = device_property_read_string_array(chip->parent, "gpio-line-names",
ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
NULL, 0);
if (ret < 0)
return;
if (ret != gdev->ngpio) {
dev_warn(chip->parent,
dev_warn(&gdev->dev,
"names %d do not match number of GPIOs %d\n", ret,
gdev->ngpio);
return;
@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
if (!names)
return;
ret = device_property_read_string_array(chip->parent, "gpio-line-names",
ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
names, gdev->ngpio);
if (ret < 0) {
dev_warn(chip->parent, "failed to read GPIO line names\n");
dev_warn(&gdev->dev, "failed to read GPIO line names\n");
kfree(names);
return;
}

View file

@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
/* If the chip defines names itself, these take precedence */
if (!chip->names)
devprop_gpiochip_set_names(chip);
devprop_gpiochip_set_names(chip,
of_fwnode_handle(chip->of_node));
of_node_get(chip->of_node);

View file

@ -224,7 +224,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
return desc - &desc->gdev->descs[0];
}
void devprop_gpiochip_set_names(struct gpio_chip *chip);
void devprop_gpiochip_set_names(struct gpio_chip *chip,
const struct fwnode_handle *fwnode);
/* With descriptor prefix */

View file

@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
if (ret)
return ret;
if (!qp->qp_sec)
return 0;
mutex_lock(&real_qp->qp_sec->mutex);
ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
qp->qp_sec);

View file

@ -2085,8 +2085,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
if (ucore->inlen > sizeof(cmd)) {
if (ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
if (!ib_is_udata_cleared(ucore, sizeof(cmd),
ucore->inlen - sizeof(cmd)))
return -EOPNOTSUPP;
}

View file

@ -1400,7 +1400,8 @@ int ib_close_qp(struct ib_qp *qp)
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
atomic_dec(&real_qp->usecnt);
ib_close_shared_qp_security(qp->qp_sec);
if (qp->qp_sec)
ib_close_shared_qp_security(qp->qp_sec);
kfree(qp);
return 0;

View file

@ -586,10 +586,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
ret = -EAGAIN;
goto skip_cqe;
}
if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
if (unlikely(!CQE_STATUS(hw_cqe) &&
CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
t4_set_wq_in_error(wq);
hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
goto proc_cqe;
hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
}
goto proc_cqe;
}

View file

@ -1129,7 +1129,6 @@ struct hfi1_devdata {
u16 pcie_lnkctl;
u16 pcie_devctl2;
u32 pci_msix0;
u32 pci_lnkctl3;
u32 pci_tph2;
/*

View file

@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
dd->pci_lnkctl3);
if (ret)
goto error;
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
if (ret)
goto error;
if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
dd->pci_tph2);
if (ret)
goto error;
}
return 0;
error:
@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
&dd->pci_lnkctl3);
if (ret)
goto error;
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
if (ret)
goto error;
if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
&dd->pci_tph2);
if (ret)
goto error;
}
return 0;
error:

View file

@ -1415,6 +1415,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
}
INIT_LIST_HEAD(&context->vma_private_list);
mutex_init(&context->vma_private_list_mutex);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
@ -1576,7 +1577,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area)
* mlx5_ib_disassociate_ucontext().
*/
mlx5_ib_vma_priv_data->vma = NULL;
mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
list_del(&mlx5_ib_vma_priv_data->list);
mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
kfree(mlx5_ib_vma_priv_data);
}
@ -1596,10 +1599,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
return -ENOMEM;
vma_prv->vma = vma;
vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
vma->vm_private_data = vma_prv;
vma->vm_ops = &mlx5_ib_vm_ops;
mutex_lock(&ctx->vma_private_list_mutex);
list_add(&vma_prv->list, vma_head);
mutex_unlock(&ctx->vma_private_list_mutex);
return 0;
}
@ -1642,6 +1648,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
* mlx5_ib_vma_close.
*/
down_write(&owning_mm->mmap_sem);
mutex_lock(&context->vma_private_list_mutex);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
@ -1656,6 +1663,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
list_del(&vma_private->list);
kfree(vma_private);
}
mutex_unlock(&context->vma_private_list_mutex);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);

View file

@ -115,6 +115,8 @@ enum {
struct mlx5_ib_vma_private_data {
struct list_head list;
struct vm_area_struct *vma;
/* protect vma_private_list add/del */
struct mutex *vma_private_list_mutex;
};
struct mlx5_ib_ucontext {
@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
/* protect vma_private_list add/del */
struct mutex vma_private_list_mutex;
unsigned long upd_xlt_page;
/* protect ODP/KSM */

View file

@ -167,7 +167,7 @@ static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
reg = reg_readl(priv, REG_SPHY_CNTRL);
if (enable) {
reg |= PHY_RESET;
reg &= ~(EXT_PWR_DOWN | IDDQ_BIAS | CK25_DIS);
reg &= ~(EXT_PWR_DOWN | IDDQ_BIAS | IDDQ_GLOBAL_PWR | CK25_DIS);
reg_writel(priv, reg, REG_SPHY_CNTRL);
udelay(21);
reg = reg_readl(priv, REG_SPHY_CNTRL);

View file

@ -1875,7 +1875,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
* here forever if we consistently cannot allocate
* buffers.
*/
else if (rc == -ENOMEM)
else if (rc == -ENOMEM && budget)
rx_pkts++;
else if (rc == -EBUSY) /* partial completion */
break;
@ -1961,7 +1961,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
if (likely(rc == -EIO))
if (likely(rc == -EIO) && budget)
rx_pkts++;
else if (rc == -EBUSY) /* partial completion */
break;

View file

@ -14227,7 +14227,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
/* Reset PHY, otherwise the read DMA engine will be in a mode that
* breaks all requests to 256 bytes.
*/
if (tg3_asic_rev(tp) == ASIC_REV_57766)
if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
tg3_asic_rev(tp) == ASIC_REV_5717 ||
tg3_asic_rev(tp) == ASIC_REV_5719)
reset_phy = true;
err = tg3_restart_hw(tp, reset_phy);

View file

@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
for (i = 0; i < txq->bd.ring_size; i++) {
/* Initialize the BD for every fragment in the page. */
bdp->cbd_sc = cpu_to_fec16(0);
if (bdp->cbd_bufaddr &&
!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
dma_unmap_single(&fep->pdev->dev,
fec32_to_cpu(bdp->cbd_bufaddr),
fec16_to_cpu(bdp->cbd_datlen),
DMA_TO_DEVICE);
if (txq->tx_skbuff[i]) {
dev_kfree_skb_any(txq->tx_skbuff[i]);
txq->tx_skbuff[i] = NULL;

View file

@ -344,7 +344,8 @@ static int orion_mdio_probe(struct platform_device *pdev)
dev->regs + MVMDIO_ERR_INT_MASK);
} else if (dev->err_interrupt == -EPROBE_DEFER) {
return -EPROBE_DEFER;
ret = -EPROBE_DEFER;
goto out_mdio;
}
if (pdev->dev.of_node)

View file

@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
case MLX5_CMD_OP_QUERY_Q_COUNTER:
case MLX5_CMD_OP_SET_RATE_LIMIT:
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);

View file

@ -590,6 +590,7 @@ struct mlx5e_channel {
struct mlx5_core_dev *mdev;
struct mlx5e_tstamp *tstamp;
int ix;
int cpu;
};
struct mlx5e_channels {

View file

@ -71,11 +71,6 @@ struct mlx5e_channel_param {
struct mlx5e_cq_param icosq_cq;
};
static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
{
return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
}
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
return MLX5_CAP_GEN(mdev, striding_rq) &&
@ -452,17 +447,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
int mtt_sz = mlx5e_get_wqe_mtt_sz();
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
int node = mlx5e_get_node(c->priv, c->ix);
int i;
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
GFP_KERNEL, node);
GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->mpwqe.info)
goto err_out;
/* We allocate more than mtt_sz as we will align the pointer */
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
GFP_KERNEL, node);
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
cpu_to_node(c->cpu));
if (unlikely(!rq->mpwqe.mtt_no_align))
goto err_free_wqe_info;
@ -570,7 +564,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
int err;
int i;
rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
rqp->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
&rq->wq_ctrl);
@ -636,8 +630,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
default: /* MLX5_WQ_TYPE_LINKED_LIST */
rq->wqe.frag_info =
kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
GFP_KERNEL,
mlx5e_get_node(c->priv, c->ix));
GFP_KERNEL, cpu_to_node(c->cpu));
if (!rq->wqe.frag_info) {
err = -ENOMEM;
goto err_rq_wq_destroy;
@ -1007,13 +1000,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@ -1060,13 +1053,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
sq->channel = c;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@ -1132,13 +1125,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
if (err)
return err;
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
if (err)
goto err_sq_wq_destroy;
@ -1510,8 +1503,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
struct mlx5_core_dev *mdev = c->priv->mdev;
int err;
param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
param->wq.buf_numa_node = cpu_to_node(c->cpu);
param->wq.db_numa_node = cpu_to_node(c->cpu);
param->eq_ix = c->ix;
err = mlx5e_alloc_cq_common(mdev, param, cq);
@ -1610,6 +1603,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
mlx5e_free_cq(cq);
}
static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
{
return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
}
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
struct mlx5e_params *params,
struct mlx5e_channel_param *cparam)
@ -1758,12 +1756,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
{
struct mlx5e_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
int cpu = mlx5e_get_cpu(priv, ix);
struct mlx5e_channel *c;
unsigned int irq;
int err;
int eqn;
c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
if (!c)
return -ENOMEM;
@ -1771,6 +1770,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->mdev = priv->mdev;
c->tstamp = &priv->tstamp;
c->ix = ix;
c->cpu = cpu;
c->pdev = &priv->mdev->pdev->dev;
c->netdev = priv->netdev;
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@ -1859,8 +1859,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
for (tc = 0; tc < c->num_tc; tc++)
mlx5e_activate_txqsq(&c->sq[tc]);
mlx5e_activate_rq(&c->rq);
netif_set_xps_queue(c->netdev,
mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
}
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@ -3554,6 +3553,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
struct sk_buff *skb,
netdev_features_t features)
{
unsigned int offset = 0;
struct udphdr *udph;
u8 proto;
u16 port;
@ -3563,7 +3563,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
proto = ip_hdr(skb)->protocol;
break;
case htons(ETH_P_IPV6):
proto = ipv6_hdr(skb)->nexthdr;
proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
break;
default:
goto out;

View file

@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
u8 actual_size;
int err;
if (!size)
return -EINVAL;
if (!fdev->mdev)
return -ENOTCONN;
@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
u8 actual_size;
int err;
if (!size)
return -EINVAL;
if (!fdev->mdev)
return -ENOTCONN;

View file

@ -316,9 +316,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
{
struct mlx5_priv *priv = &dev->priv;
struct mlx5_eq_table *table = &priv->eq_table;
struct irq_affinity irqdesc = {
.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
};
int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
int nvec;
@ -332,10 +329,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
if (!priv->irq_info)
goto err_free_msix;
nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
nvec = pci_alloc_irq_vectors(dev->pdev,
MLX5_EQ_VEC_COMP_BASE + 1, nvec,
PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
&irqdesc);
PCI_IRQ_MSIX);
if (nvec < 0)
return nvec;
@ -621,6 +617,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
return (u64)timer_l | (u64)timer_h1 << 32;
}
static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
{
struct mlx5_priv *priv = &mdev->priv;
int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
return -ENOMEM;
}
cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
priv->irq_info[i].mask);
if (IS_ENABLED(CONFIG_SMP) &&
irq_set_affinity_hint(irq, priv->irq_info[i].mask))
mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
return 0;
}
static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
{
struct mlx5_priv *priv = &mdev->priv;
int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
irq_set_affinity_hint(irq, NULL);
free_cpumask_var(priv->irq_info[i].mask);
}
static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
{
int err;
int i;
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
err = mlx5_irq_set_affinity_hint(mdev, i);
if (err)
goto err_out;
}
return 0;
err_out:
for (i--; i >= 0; i--)
mlx5_irq_clear_affinity_hint(mdev, i);
return err;
}
static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
{
int i;
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
mlx5_irq_clear_affinity_hint(mdev, i);
}
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
unsigned int *irqn)
{
@ -1093,6 +1146,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_stop_eqs;
}
err = mlx5_irq_set_affinity_hints(dev);
if (err) {
dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
goto err_affinity_hints;
}
err = mlx5_init_fs(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init flow steering\n");
@ -1150,6 +1209,9 @@ err_sriov:
mlx5_cleanup_fs(dev);
err_fs:
mlx5_irq_clear_affinity_hints(dev);
err_affinity_hints:
free_comp_eqs(dev);
err_stop_eqs:
@ -1218,6 +1280,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
mlx5_sriov_detach(dev);
mlx5_cleanup_fs(dev);
mlx5_irq_clear_affinity_hints(dev);
free_comp_eqs(dev);
mlx5_stop_eqs(dev);
mlx5_put_uars_page(dev, priv->uar);

View file

@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
err_cmd:
memset(din, 0, sizeof(din));
memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}

View file

@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
return ret_entry;
}
static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
u32 rate, u16 index)
{
u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
MLX5_SET(set_rate_limit_in, in, opcode,
MLX5_CMD_OP_SET_RATE_LIMIT);
MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
MLX5_SET(set_pp_rate_limit_in, in, opcode,
MLX5_CMD_OP_SET_PP_RATE_LIMIT);
MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
entry->refcount++;
} else {
/* new rate limit */
err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
if (err) {
mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
rate, err);
@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
entry->refcount--;
if (!entry->refcount) {
/* need to remove rate */
mlx5_set_rate_limit_cmd(dev, 0, entry->index);
mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
entry->rate = 0;
}
@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
/* Clear all configured rates */
for (i = 0; i < table->max_size; i++)
if (table->rl_entry[i].rate)
mlx5_set_rate_limit_cmd(dev, 0,
table->rl_entry[i].index);
mlx5_set_pp_rate_limit_cmd(dev, 0,
table->rl_entry[i].index);
kfree(dev->priv.rl_table.rl_entry);
}

View file

@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
struct mlx5e_vxlan *vxlan;
spin_lock(&vxlan_db->lock);
spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_lookup(&vxlan_db->tree, port);
spin_unlock(&vxlan_db->lock);
spin_unlock_bh(&vxlan_db->lock);
return vxlan;
}
@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
struct mlx5e_vxlan *vxlan;
int err;
if (mlx5e_vxlan_lookup_port(priv, port))
mutex_lock(&priv->state_lock);
vxlan = mlx5e_vxlan_lookup_port(priv, port);
if (vxlan) {
atomic_inc(&vxlan->refcount);
goto free_work;
}
if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
goto free_work;
@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
goto err_delete_port;
vxlan->udp_port = port;
atomic_set(&vxlan->refcount, 1);
spin_lock_irq(&vxlan_db->lock);
spin_lock_bh(&vxlan_db->lock);
err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
spin_unlock_irq(&vxlan_db->lock);
spin_unlock_bh(&vxlan_db->lock);
if (err)
goto err_free;
@ -113,35 +118,39 @@ err_free:
err_delete_port:
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
free_work:
mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
{
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
struct mlx5e_vxlan *vxlan;
spin_lock_irq(&vxlan_db->lock);
vxlan = radix_tree_delete(&vxlan_db->tree, port);
spin_unlock_irq(&vxlan_db->lock);
if (!vxlan)
return;
mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
kfree(vxlan);
}
static void mlx5e_vxlan_del_port(struct work_struct *work)
{
struct mlx5e_vxlan_work *vxlan_work =
container_of(work, struct mlx5e_vxlan_work, work);
struct mlx5e_priv *priv = vxlan_work->priv;
struct mlx5e_priv *priv = vxlan_work->priv;
struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
u16 port = vxlan_work->port;
struct mlx5e_vxlan *vxlan;
bool remove = false;
__mlx5e_vxlan_core_del_port(priv, port);
mutex_lock(&priv->state_lock);
spin_lock_bh(&vxlan_db->lock);
vxlan = radix_tree_lookup(&vxlan_db->tree, port);
if (!vxlan)
goto out_unlock;
if (atomic_dec_and_test(&vxlan->refcount)) {
radix_tree_delete(&vxlan_db->tree, port);
remove = true;
}
out_unlock:
spin_unlock_bh(&vxlan_db->lock);
if (remove) {
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
kfree(vxlan);
}
mutex_unlock(&priv->state_lock);
kfree(vxlan_work);
}
@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
struct mlx5e_vxlan *vxlan;
unsigned int port = 0;
spin_lock_irq(&vxlan_db->lock);
/* Lockless since we are the only radix-tree consumers, wq is disabled */
while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
port = vxlan->udp_port;
spin_unlock_irq(&vxlan_db->lock);
__mlx5e_vxlan_core_del_port(priv, (u16)port);
spin_lock_irq(&vxlan_db->lock);
radix_tree_delete(&vxlan_db->tree, port);
mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
kfree(vxlan);
}
spin_unlock_irq(&vxlan_db->lock);
}

View file

@ -36,6 +36,7 @@
#include "en.h"
struct mlx5e_vxlan {
atomic_t refcount;
u16 udp_port;
};

View file

@ -4164,6 +4164,7 @@ static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
{
u16 vid = 1;
int err;
err = mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, true);
@ -4176,8 +4177,19 @@ static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port)
true, false);
if (err)
goto err_port_vlan_set;
for (; vid <= VLAN_N_VID - 1; vid++) {
err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
vid, false);
if (err)
goto err_vid_learning_set;
}
return 0;
err_vid_learning_set:
for (vid--; vid >= 1; vid--)
mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
err_port_vlan_set:
mlxsw_sp_port_stp_set(mlxsw_sp_port, false);
err_port_stp_set:
@ -4187,6 +4199,12 @@ err_port_stp_set:
static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
{
u16 vid;
for (vid = VLAN_N_VID - 1; vid >= 1; vid--)
mlxsw_sp_port_vid_learning_set(mlxsw_sp_port,
vid, true);
mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1,
false, false);
mlxsw_sp_port_stp_set(mlxsw_sp_port, false);

View file

@ -77,6 +77,7 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
}
if (buffer->flags & EFX_TX_BUF_SKB) {
EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
(*pkts_compl)++;
(*bytes_compl) += buffer->skb->len;
dev_consume_skb_any((struct sk_buff *)buffer->skb);
@ -426,12 +427,14 @@ static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
{
struct efx_tx_buffer *buffer;
unsigned int bytes_compl = 0;
unsigned int pkts_compl = 0;
/* Work backwards until we hit the original insert pointer value */
while (tx_queue->insert_count != tx_queue->write_count) {
--tx_queue->insert_count;
buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
}
}

View file

@ -2069,7 +2069,7 @@ static struct phy_driver marvell_drivers[] = {
.flags = PHY_HAS_INTERRUPT,
.probe = marvell_probe,
.config_init = &m88e1145_config_init,
.config_aneg = &marvell_config_aneg,
.config_aneg = &m88e1101_config_aneg,
.read_status = &genphy_read_status,
.ack_interrupt = &marvell_ack_interrupt,
.config_intr = &marvell_config_intr,

View file

@ -622,6 +622,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
phydev->link = 0;
if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
phydev->drv->config_intr(phydev);
return genphy_config_aneg(phydev);
}
return 0;

View file

@ -525,6 +525,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
pl->link_config.pause = MLO_PAUSE_AN;
pl->link_config.speed = SPEED_UNKNOWN;
pl->link_config.duplex = DUPLEX_UNKNOWN;
pl->link_config.an_enabled = true;
pl->ops = ops;
__set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
@ -948,6 +949,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
mutex_lock(&pl->state_mutex);
/* Configure the MAC to match the new settings */
linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
pl->link_config.interface = config.interface;
pl->link_config.speed = our_kset.base.speed;
pl->link_config.duplex = our_kset.base.duplex;
pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;

View file

@ -1204,6 +1204,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x907b, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */

View file

@ -3105,6 +3105,11 @@ static void vxlan_config_apply(struct net_device *dev,
max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
VXLAN_HEADROOM);
if (max_mtu < ETH_MIN_MTU)
max_mtu = ETH_MIN_MTU;
if (!changelink && !conf->mtu)
dev->mtu = max_mtu;
}
if (dev->mtu > max_mtu)

View file

@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
static struct device_node *
tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
{
/*
* of_find_node_by_name() drops a reference, so make sure to grab one.
*/
struct device_node *np = of_node_get(padctl->dev->of_node);
struct device_node *pads, *np;
np = of_find_node_by_name(np, "pads");
if (np)
np = of_find_node_by_name(np, name);
pads = of_get_child_by_name(padctl->dev->of_node, "pads");
if (!pads)
return NULL;
np = of_get_child_by_name(pads, name);
of_node_put(pads);
return np;
}
@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
static struct device_node *
tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
{
/*
* of_find_node_by_name() drops a reference, so make sure to grab one.
*/
struct device_node *np = of_node_get(pad->dev.of_node);
struct device_node *np, *lanes;
np = of_find_node_by_name(np, "lanes");
if (!np)
lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
if (!lanes)
return NULL;
return of_find_node_by_name(np, pad->soc->lanes[index].name);
np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
of_node_put(lanes);
return np;
}
static int
@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
unsigned int i;
int err;
children = of_find_node_by_name(pad->dev.of_node, "lanes");
children = of_get_child_by_name(pad->dev.of_node, "lanes");
if (!children)
return -ENODEV;
@ -444,21 +444,21 @@ static struct device_node *
tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
unsigned int index)
{
/*
* of_find_node_by_name() drops a reference, so make sure to grab one.
*/
struct device_node *np = of_node_get(padctl->dev->of_node);
struct device_node *ports, *np;
char *name;
np = of_find_node_by_name(np, "ports");
if (np) {
char *name;
ports = of_get_child_by_name(padctl->dev->of_node, "ports");
if (!ports)
return NULL;
name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
if (!name)
return ERR_PTR(-ENOMEM);
np = of_find_node_by_name(np, name);
kfree(name);
name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
if (!name) {
of_node_put(ports);
return ERR_PTR(-ENOMEM);
}
np = of_get_child_by_name(ports, name);
kfree(name);
of_node_put(ports);
return np;
}
@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
static int tegra_xusb_padctl_probe(struct platform_device *pdev)
{
struct device_node *np = of_node_get(pdev->dev.of_node);
struct device_node *np = pdev->dev.of_node;
const struct tegra_xusb_padctl_soc *soc;
struct tegra_xusb_padctl *padctl;
const struct of_device_id *match;
@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
int err;
/* for backwards compatibility with old device trees */
np = of_find_node_by_name(np, "pads");
np = of_get_child_by_name(np, "pads");
if (!np) {
dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
return tegra_xusb_padctl_legacy_probe(pdev);

View file

@ -564,9 +564,9 @@ enum qeth_cq {
};
struct qeth_ipato {
int enabled;
int invert4;
int invert6;
bool enabled;
bool invert4;
bool invert6;
struct list_head entries;
};

View file

@ -1479,9 +1479,9 @@ static int qeth_setup_card(struct qeth_card *card)
qeth_set_intial_options(card);
/* IP address takeover */
INIT_LIST_HEAD(&card->ipato.entries);
card->ipato.enabled = 0;
card->ipato.invert4 = 0;
card->ipato.invert6 = 0;
card->ipato.enabled = false;
card->ipato.invert4 = false;
card->ipato.invert6 = false;
/* init QDIO stuff */
qeth_init_qdio_info(card);
INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
@ -5445,6 +5445,13 @@ out:
}
EXPORT_SYMBOL_GPL(qeth_poll);
static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
{
if (!cmd->hdr.return_code)
cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
return cmd->hdr.return_code;
}
int qeth_setassparms_cb(struct qeth_card *card,
struct qeth_reply *reply, unsigned long data)
{
@ -6304,7 +6311,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
(struct qeth_checksum_cmd *)reply->param;
QETH_CARD_TEXT(card, 4, "chkdoccb");
if (cmd->hdr.return_code)
if (qeth_setassparms_inspect_rc(cmd))
return 0;
memset(chksum_cb, 0, sizeof(*chksum_cb));

View file

@ -82,7 +82,7 @@ void qeth_l3_del_vipa(struct qeth_card *, enum qeth_prot_versions, const u8 *);
int qeth_l3_add_rxip(struct qeth_card *, enum qeth_prot_versions, const u8 *);
void qeth_l3_del_rxip(struct qeth_card *card, enum qeth_prot_versions,
const u8 *);
int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *, struct qeth_ipaddr *);
void qeth_l3_update_ipato(struct qeth_card *card);
struct qeth_ipaddr *qeth_l3_get_addr_buffer(enum qeth_prot_versions);
int qeth_l3_add_ip(struct qeth_card *, struct qeth_ipaddr *);
int qeth_l3_delete_ip(struct qeth_card *, struct qeth_ipaddr *);

View file

@ -163,8 +163,8 @@ static void qeth_l3_convert_addr_to_bits(u8 *addr, u8 *bits, int len)
}
}
int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
struct qeth_ipaddr *addr)
static bool qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
struct qeth_ipaddr *addr)
{
struct qeth_ipato_entry *ipatoe;
u8 addr_bits[128] = {0, };
@ -173,6 +173,8 @@ int qeth_l3_is_addr_covered_by_ipato(struct qeth_card *card,
if (!card->ipato.enabled)
return 0;
if (addr->type != QETH_IP_TYPE_NORMAL)
return 0;
qeth_l3_convert_addr_to_bits((u8 *) &addr->u, addr_bits,
(addr->proto == QETH_PROT_IPV4)? 4:16);
@ -289,8 +291,7 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
memcpy(addr, tmp_addr, sizeof(struct qeth_ipaddr));
addr->ref_counter = 1;
if (addr->type == QETH_IP_TYPE_NORMAL &&
qeth_l3_is_addr_covered_by_ipato(card, addr)) {
if (qeth_l3_is_addr_covered_by_ipato(card, addr)) {
QETH_CARD_TEXT(card, 2, "tkovaddr");
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
}
@ -604,6 +605,27 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
/*
* IP address takeover related functions
*/
/**
* qeth_l3_update_ipato() - Update 'takeover' property, for all NORMAL IPs.
*
* Caller must hold ip_lock.
*/
void qeth_l3_update_ipato(struct qeth_card *card)
{
struct qeth_ipaddr *addr;
unsigned int i;
hash_for_each(card->ip_htable, i, addr, hnode) {
if (addr->type != QETH_IP_TYPE_NORMAL)
continue;
if (qeth_l3_is_addr_covered_by_ipato(card, addr))
addr->set_flags |= QETH_IPA_SETIP_TAKEOVER_FLAG;
else
addr->set_flags &= ~QETH_IPA_SETIP_TAKEOVER_FLAG;
}
}
static void qeth_l3_clear_ipato_list(struct qeth_card *card)
{
struct qeth_ipato_entry *ipatoe, *tmp;
@ -615,6 +637,7 @@ static void qeth_l3_clear_ipato_list(struct qeth_card *card)
kfree(ipatoe);
}
qeth_l3_update_ipato(card);
spin_unlock_bh(&card->ip_lock);
}
@ -639,8 +662,10 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
}
}
if (!rc)
if (!rc) {
list_add_tail(&new->entry, &card->ipato.entries);
qeth_l3_update_ipato(card);
}
spin_unlock_bh(&card->ip_lock);
@ -663,6 +688,7 @@ void qeth_l3_del_ipato_entry(struct qeth_card *card,
(proto == QETH_PROT_IPV4)? 4:16) &&
(ipatoe->mask_bits == mask_bits)) {
list_del(&ipatoe->entry);
qeth_l3_update_ipato(card);
kfree(ipatoe);
}
}

View file

@ -370,8 +370,8 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
struct qeth_ipaddr *addr;
int i, rc = 0;
bool enable;
int rc = 0;
if (!card)
return -EINVAL;
@ -384,25 +384,18 @@ static ssize_t qeth_l3_dev_ipato_enable_store(struct device *dev,
}
if (sysfs_streq(buf, "toggle")) {
card->ipato.enabled = (card->ipato.enabled)? 0 : 1;
} else if (sysfs_streq(buf, "1")) {
card->ipato.enabled = 1;
hash_for_each(card->ip_htable, i, addr, hnode) {
if ((addr->type == QETH_IP_TYPE_NORMAL) &&
qeth_l3_is_addr_covered_by_ipato(card, addr))
addr->set_flags |=
QETH_IPA_SETIP_TAKEOVER_FLAG;
}
} else if (sysfs_streq(buf, "0")) {
card->ipato.enabled = 0;
hash_for_each(card->ip_htable, i, addr, hnode) {
if (addr->set_flags &
QETH_IPA_SETIP_TAKEOVER_FLAG)
addr->set_flags &=
~QETH_IPA_SETIP_TAKEOVER_FLAG;
}
} else
enable = !card->ipato.enabled;
} else if (kstrtobool(buf, &enable)) {
rc = -EINVAL;
goto out;
}
if (card->ipato.enabled != enable) {
card->ipato.enabled = enable;
spin_lock_bh(&card->ip_lock);
qeth_l3_update_ipato(card);
spin_unlock_bh(&card->ip_lock);
}
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@ -428,20 +421,27 @@ static ssize_t qeth_l3_dev_ipato_invert4_store(struct device *dev,
const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
bool invert;
int rc = 0;
if (!card)
return -EINVAL;
mutex_lock(&card->conf_mutex);
if (sysfs_streq(buf, "toggle"))
card->ipato.invert4 = (card->ipato.invert4)? 0 : 1;
else if (sysfs_streq(buf, "1"))
card->ipato.invert4 = 1;
else if (sysfs_streq(buf, "0"))
card->ipato.invert4 = 0;
else
if (sysfs_streq(buf, "toggle")) {
invert = !card->ipato.invert4;
} else if (kstrtobool(buf, &invert)) {
rc = -EINVAL;
goto out;
}
if (card->ipato.invert4 != invert) {
card->ipato.invert4 = invert;
spin_lock_bh(&card->ip_lock);
qeth_l3_update_ipato(card);
spin_unlock_bh(&card->ip_lock);
}
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
@ -607,20 +607,27 @@ static ssize_t qeth_l3_dev_ipato_invert6_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
bool invert;
int rc = 0;
if (!card)
return -EINVAL;
mutex_lock(&card->conf_mutex);
if (sysfs_streq(buf, "toggle"))
card->ipato.invert6 = (card->ipato.invert6)? 0 : 1;
else if (sysfs_streq(buf, "1"))
card->ipato.invert6 = 1;
else if (sysfs_streq(buf, "0"))
card->ipato.invert6 = 0;
else
if (sysfs_streq(buf, "toggle")) {
invert = !card->ipato.invert6;
} else if (kstrtobool(buf, &invert)) {
rc = -EINVAL;
goto out;
}
if (card->ipato.invert6 != invert) {
card->ipato.invert6 = invert;
spin_lock_bh(&card->ip_lock);
qeth_l3_update_ipato(card);
spin_unlock_bh(&card->ip_lock);
}
out:
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}

View file

@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
return req;
for_each_bio(bio) {
ret = blk_rq_append_bio(req, bio);
struct bio *bounce_bio = bio;
ret = blk_rq_append_bio(req, &bounce_bio);
if (ret)
return ERR_PTR(ret);
}

View file

@ -348,7 +348,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
DMA_BIDIRECTIONAL);
direction);
}
mutex_unlock(&buffer->lock);
@ -370,7 +370,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
mutex_lock(&buffer->lock);
list_for_each_entry(a, &buffer->attachments, list) {
dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
DMA_BIDIRECTIONAL);
direction);
}
mutex_unlock(&buffer->lock);

View file

@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
" %d i: %d bio: %p, allocating another"
" bio\n", bio->bi_vcnt, i, bio);
rc = blk_rq_append_bio(req, bio);
rc = blk_rq_append_bio(req, &bio);
if (rc) {
pr_err("pSCSI: failed to append bio\n");
goto fail;
@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
}
if (bio) {
rc = blk_rq_append_bio(req, bio);
rc = blk_rq_append_bio(req, &bio);
if (rc) {
pr_err("pSCSI: failed to append bio\n");
goto fail;

View file

@ -1764,7 +1764,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
{
struct n_tty_data *ldata = tty->disc_data;
if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
ldata->line_start = ldata->read_tail;
if (!L_ICANON(tty) || !read_cnt(ldata)) {
@ -2427,7 +2427,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
case TIOCINQ:
down_write(&tty->termios_rwsem);
if (L_ICANON(tty))
if (L_ICANON(tty) && !L_EXTPROC(tty))
retval = inq_canon(ldata);
else
retval = read_cnt(ldata);

View file

@ -446,7 +446,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_string);
* Callers other than flush_to_ldisc() need to exclude the kworker
* from concurrent use of the line discipline, see paste_selection().
*
* Returns the number of bytes not processed
* Returns the number of bytes processed
*/
int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
char *f, int count)

View file

@ -251,7 +251,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
if (ret)
goto err_mux;
ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi");
ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
if (ulpi_node) {
phy_node = of_get_next_available_child(ulpi_node, NULL);
ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");

View file

@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
case USB_SSP_CAP_TYPE:
ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1;
USB_SSP_SUBLINK_SPEED_ATTRIBS);
if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
dev->bos->ssp_cap = ssp_cap;
break;

View file

@ -57,10 +57,11 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Microsoft LifeCam-VX700 v2.0 */
{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
/* Logitech HD Pro Webcams C920, C920-C and C930e */
/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
{ USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
/* Logitech ConferenceCam CC3000e */
{ USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@ -154,6 +155,9 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
{ USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
/* ELSA MicroLink 56K */
{ USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
/* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
{ USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },

View file

@ -189,6 +189,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
xhci->quirks |= XHCI_TRUST_TX_LENGTH;
xhci->quirks |= XHCI_BROKEN_STREAMS;
}
if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
pdev->device == 0x0014)
xhci->quirks |= XHCI_TRUST_TX_LENGTH;
if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
pdev->device == 0x0015)
xhci->quirks |= XHCI_RESET_ON_RESUME;

View file

@ -1017,6 +1017,7 @@ static const struct usb_device_id id_table_combined[] = {
.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
{ USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
{ USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
{ } /* Terminating entry */
};

Some files were not shown because too many files have changed in this diff Show more