Commit a092869e authored by Dan Williams's avatar Dan Williams Committed by Greg Kroah-Hartman
Browse files

x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()



commit ec6347bb43395cb92126788a1a5b25302543f815 upstream.

In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.

Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:

  On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
  >
  > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
  > >
  > > However now I see that copy_user_generic() works for the wrong reason.
  > > It works because the exception on the source address due to poison
  > > looks no different than a write fault on the user address to the
  > > caller, it's still just a short copy. So it makes copy_to_user() work
  > > for the wrong reason relative to the name.
  >
  > Right.
  >
  > And it won't work that way on other architectures. On x86, we have a
  > generic function that can take faults on either side, and we use it
  > for both cases (and for the "in_user" case too), but that's an
  > artifact of the architecture oddity.
  >
  > In fact, it's probably wrong even on x86 - because it can hide bugs -
  > but writing those things is painful enough that everybody prefers
  > having just one function.

Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().

Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.

One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.

 [ bp: Massage a bit. ]
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Reviewed-by: default avatarTony Luck <tony.luck@intel.com>
Acked-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com

Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 18703f74
......@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
......
......@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
#define __HAVE_ARCH_MEMCPY_MCSAFE
extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
......
......@@ -436,6 +436,32 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_generic(void *to, const void *from, unsigned long size);
static inline unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned long size)
{
return copy_mc_generic(to, from, size);
}
#define copy_mc_to_kernel copy_mc_to_kernel
static inline unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = copy_mc_generic((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
#endif
#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
......@@ -524,20 +550,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return ret;
}
static __always_inline unsigned long __must_check
copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = memcpy_mcsafe((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
......
......@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcpy_mcsafe_64.o
memcpy_64.o copy_mc_64.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
......
......@@ -50,7 +50,7 @@ err3; stb r0,0(r3)
blr
_GLOBAL(memcpy_mcsafe)
_GLOBAL(copy_mc_generic)
mr r7,r5
cmpldi r5,16
blt .Lshort_copy
......@@ -239,4 +239,4 @@ err1; stb r0,0(r3)
15: li r3,0
blr
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
EXPORT_SYMBOL_GPL(copy_mc_generic);
......@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
......
......@@ -59,7 +59,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
config MCSAFE_TEST
config COPY_MC_TEST
def_bool n
config EFI_PGT_DUMP
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COPY_MC_TEST_H_
#define _COPY_MC_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_COPY_MC_TEST
extern unsigned long copy_mc_test_src;
extern unsigned long copy_mc_test_dst;
static inline void copy_mc_inject_src(void *addr)
{
if (addr)
copy_mc_test_src = (unsigned long) addr;
else
copy_mc_test_src = ~0UL;
}
static inline void copy_mc_inject_dst(void *addr)
{
if (addr)
copy_mc_test_dst = (unsigned long) addr;
else
copy_mc_test_dst = ~0UL;
}
#else /* CONFIG_COPY_MC_TEST */
static inline void copy_mc_inject_src(void *addr)
{
}
static inline void copy_mc_inject_dst(void *addr)
{
}
#endif /* CONFIG_COPY_MC_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_COPY_MC_TEST
.macro COPY_MC_TEST_CTL
.pushsection .data
.align 8
.globl copy_mc_test_src
copy_mc_test_src:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_src)
.globl copy_mc_test_dst
copy_mc_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_dst)
.popsection
.endm
.macro COPY_MC_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_src, %r9
ja \target
.endm
.macro COPY_MC_TEST_DST reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_dst, %r9
ja \target
.endm
#else
.macro COPY_MC_TEST_CTL
.endm
.macro COPY_MC_TEST_SRC reg count target
.endm
.macro COPY_MC_TEST_DST reg count target
.endm
#endif /* CONFIG_COPY_MC_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _COPY_MC_TEST_H_ */
......@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
#ifdef CONFIG_ARCH_HAS_COPY_MC
extern void enable_copy_mc_fragile(void);
unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
#else
static inline void enable_copy_mc_fragile(void)
{
}
#endif
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;
static inline void mcsafe_inject_src(void *addr)
{
if (addr)
mcsafe_test_src = (unsigned long) addr;
else
mcsafe_test_src = ~0UL;
}
static inline void mcsafe_inject_dst(void *addr)
{
if (addr)
mcsafe_test_dst = (unsigned long) addr;
else
mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}
static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
.pushsection .data
.align 8
.globl mcsafe_test_src
mcsafe_test_src:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_src)
.globl mcsafe_test_dst
mcsafe_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_dst)
.popsection
.endm
.macro MCSAFE_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_src, %r9
ja \target
.endm
.macro MCSAFE_TEST_DST reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_dst, %r9
ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */
......@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
* @dst: destination address
* @src: source address
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
* We only call into the "safe" function on systems that can
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
return 0;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
......
......@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
#endif
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
......
......@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}
static __always_inline __must_check unsigned long
copy_to_user_mcsafe(void *to, const void *from, unsigned len)
{
unsigned long ret;
__uaccess_begin();
/*
* Note, __memcpy_mcsafe() is explicitly used since it can
* handle exceptions / faults. memcpy_mcsafe() may fall back to
* memcpy() which lacks this handling.
*/
ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
......@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
......@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/task_work.h>
#include <linux/hardirq.h>
......@@ -2122,7 +2121,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
* mce=recovery force enable memcpy_mcsafe()
* mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
......@@ -2730,13 +2729,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
mcheck_debugfs_init();
......
......@@ -8,6 +8,7 @@
#include <asm/hpet.h>
#include <asm/setup.h>
#include <asm/mce.h>
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
......@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
amd_disable_seq_and_redirect_scrub);
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
#include <linux/jump_label.h>
#include <asm/string_64.h>
/* Ivy Bridge, Haswell, Broadwell */
static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
{
......@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
pci_read_config_dword(pdev, 0x84, &capid0);
if (capid0 & 0x10)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
/* Skylake */
......@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
* enabled, so memory machine check recovery is also enabled.
*/
if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
}
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
......@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
#endif
#endif
bool x86_apple_machine;
EXPORT_SYMBOL(x86_apple_machine);
......
......@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/jump_label.h>
#include <linux/uaccess.h>
#include <linux/export.h>
#include <linux/string.h>
#include <linux/types.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_MCE
/*
* See COPY_MC_TEST for self-test of the copy_mc_fragile()
* implementation.
*/
static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
void enable_copy_mc_fragile(void)
{
static_branch_inc(&copy_mc_fragile_key);
}
#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
/*
* Similar to copy_user_handle_tail, probe for the write fault point, or
* source exception point.
*/
__visible notrace unsigned long
copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++)
if (copy_mc_fragile(to, from, 1))
break;
return len;
}
#else
/*
* No point in doing careful copying, or consulting a static key when
* there is no #MC handler in the CONFIG_X86_MCE=n case.
*/
void enable_copy_mc_fragile(void)
{
}
#define copy_mc_fragile_enabled (0)
#endif
/**
* copy_mc_to_kernel - memory copy that handles source exceptions
*
* @dst: destination address
* @src: source address
* @len: number of bytes to copy
*
* Call into the 'fragile' version on systems that have trouble
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
{
if (copy_mc_fragile_enabled)
return copy_mc_fragile(dst, src, len);
memcpy(dst, src, len);
return 0;
}
EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
{
unsigned long ret;
if (!copy_mc_fragile_enabled)
return copy_user_generic(dst, src, len);
__uaccess_begin();
ret = copy_mc_fragile(dst, src, len);
__uaccess_end();
return ret;
}
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
#include <linux/linkage.h>
#include <asm/copy_mc_test.h>
#include <asm/export.h>
#include <asm/asm.h>
#ifndef CONFIG_UML
#ifdef CONFIG_X86_MCE
COPY_MC_TEST_CTL
/*
* copy_mc_fragile - copy memory with indication if an exception / fault happened
*
* The 'fragile' version is opted into by platform quirks and takes
* pains to avoid unrecoverable corner cases like 'fast-string'
* instruction sequences, and consuming poison across a cacheline
* boundary. The non-fragile version is equivalent to memcpy()
* regardless of CPU machine-check-recovery capability.
*/
SYM_FUNC_START(copy_mc_fragile)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
/* Check for bad alignment of source */
testl $7, %esi
/* Already aligned */
jz .L_8byte_aligned
/* Copy one byte at a time until source is 8-byte aligned */
movl %esi, %ecx
andl $7, %ecx
subl $8, %ecx
negl %ecx
subl %ecx, %edx
.L_read_leading_bytes:
movb (%rsi), %al
COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
.L_write_leading_bytes:
movb %al, (%rdi)
incq %rsi
incq %rdi
decl %ecx
jnz .L_read_leading_bytes
.L_8byte_aligned:
movl %edx, %ecx
andl $7, %edx
shrl $3, %ecx
jz .L_no_whole_words
.L_read_words:
movq (%rsi), %r8
COPY_MC_TEST_SRC %rsi 8 .E_read_words
COPY_MC_TEST_DST %rdi 8 .E_write_words
.L_write_words:
movq %r8, (%rdi)
addq $8, %rsi
addq $8, %rdi
decl %ecx
jnz .L_read_words
/* Any trailing bytes? */
.L_no_whole_words:
andl %edx, %edx
jz .L_done_memcpy_trap