diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b773b34768d12..906f568b9a9e1a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -74,7 +74,7 @@ export BITS
 #
 #    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383
 #
-KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
+KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-avx2 -O3 -fno-tree-vectorize -march=westmere -mpopcnt -fivopts -fmodulo-sched
 KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json
 KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2
 
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 61e991507353eb..d931f3f5258931 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -544,7 +544,7 @@
 #define HWP_MAX_PERF(x) 		((x & 0xff) << 8)
 #define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
 #define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
-#define HWP_EPP_PERFORMANCE		0x00
+#define HWP_EPP_PERFORMANCE		0x01
 #define HWP_EPP_BALANCE_PERFORMANCE	0x80
 #define HWP_EPP_BALANCE_POWERSAVE	0xC0
 #define HWP_EPP_POWERSAVE		0xFF
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 92f3664dd933b1..a9bbadb21d9ed5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -245,6 +245,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
 
 /* Interface to set priority of a cpu */
 void sched_set_itmt_core_prio(int prio, int core_cpu);
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu);
 
 /* Interface to notify scheduler that system supports ITMT */
 int sched_set_itmt_support(void);
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index 30b1d63b97f3a1..fe0f6e2bb29765 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	/* update the ITMT scheduler logic to use the power policy data */
+	/* scale the val up by 2 so the range is 224 - 256 */
+	sched_set_itmt_power_ratio(256 - val * 2, cpu);
+
 	return count;
 }
 
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 51b805c727fc8f..81e7cf72e83d7d 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -25,6 +25,7 @@
 
 static DEFINE_MUTEX(itmt_update_mutex);
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio);
 
 /* Boolean to track if system has ITMT capabilities */
 static bool __read_mostly sched_itmt_capable;
@@ -159,7 +160,12 @@ void sched_clear_itmt_support(void)
 
 int arch_asym_cpu_priority(int cpu)
 {
-	return per_cpu(sched_core_priority, cpu);
+	int power_ratio = per_cpu(sched_power_ratio, cpu);
+
+	/* a power ratio of 0 (uninitialized) is assumed to be maximum */
+	if (power_ratio == 0)
+		power_ratio = 256 - 2 * 6;
+	return per_cpu(sched_core_priority, cpu) * power_ratio / 256;
 }
 
 /**
@@ -180,3 +186,24 @@ void sched_set_itmt_core_prio(int prio, int cpu)
 {
 	per_cpu(sched_core_priority, cpu) = prio;
 }
+
+/**
+ * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT
+ * @power_ratio:	The power scaling ratio [1..256] for the core
+ * @core_cpu:		The cpu number associated with the core
+ *
+ * Set a scaling to the cpu performance based on long term power
+ * settings (like EPB).
+ *
+ * Note this is for the policy not for the actual dynamic frequency;
+ * the frequency will increase itself as workloads run on a core.
+ */
+
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu)
+{
+	int cpu;
+
+	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+		per_cpu(sched_power_ratio, cpu) = power_ratio;
+	}
+}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index dfe6847fd99e5e..8bc41b2ac6c3a3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1592,6 +1592,9 @@ unsigned long calibrate_delay_is_known(void)
 	if (!constant_tsc || !mask)
 		return 0;
 
+	if (cpu != 0)
+		return cpu_data(0).loops_per_jiffy;
+
 	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1af30e3472cdd9..bea10effa18f3a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8675,4 +8675,4 @@ static int __init vmx_init(void)
 	kvm_x86_vendor_exit();
 	return r;
 }
-module_init(vmx_init);
+late_initcall(vmx_init);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ac52255fab01f4..eae6d65b901464 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -756,9 +756,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i",
 		loglvl, tsk->comm, task_pid_nr(tsk), address,
-		(void *)regs->ip, (void *)regs->sp, error_code);
+		(void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id());
 
 	print_vma_addr(KERN_CONT " in ", regs->ip);
 
diff --git a/block/early-lookup.c b/block/early-lookup.c
index 3fb57f7d2b1276..243ad0ca102121 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -5,6 +5,7 @@
  */
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 struct uuidcmp {
 	const char *uuid;
@@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt)
  */
 int __init early_lookup_bdev(const char *name, dev_t *devt)
 {
-	if (strncmp(name, "PARTUUID=", 9) == 0)
-		return devt_from_partuuid(name + 9, devt);
+	if (strncmp(name, "PARTUUID=", 9) == 0) {
+		int res;
+		int  needtowait = 40<<1;
+		res = devt_from_partuuid(name + 9, devt);
+		if (!res) return res;
+		while (res && needtowait) {
+			msleep(500);
+			res = devt_from_partuuid(name + 9, devt);
+			needtowait--;
+		}
+		return res;
+	}
 	if (strncmp(name, "PARTLABEL=", 10) == 0)
 		return devt_from_partlabel(name + 10, devt);
 	if (strncmp(name, "/dev/", 5) == 0)
diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
index c3f9938e1ad27f..e77478e064d8f6 100644
--- a/crypto/kdf_sp800108.c
+++ b/crypto/kdf_sp800108.c
@@ -149,7 +149,7 @@ static int __init crypto_kdf108_init(void)
 
 static void __exit crypto_kdf108_exit(void) { }
 
-module_init(crypto_kdf108_init);
+late_initcall(crypto_kdf108_init);
 module_exit(crypto_kdf108_exit);
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 70af3fbbebe54c..dab3d50896351a 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1549,7 +1549,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags not_used)
 acpi_status
 acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t **cache)
 {
-	*cache = kmem_cache_create(name, size, 0, 0, NULL);
+	*cache = kmem_cache_create(name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
 	if (*cache == NULL)
 		return AE_ERROR;
 	else
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index fdfa7b2662180b..13229e9341081e 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -34,14 +34,14 @@
 #include "libata.h"
 
 static int ahci_skip_host_reset;
-int ahci_ignore_sss;
+int ahci_ignore_sss=1;
 EXPORT_SYMBOL_GPL(ahci_ignore_sss);
 
 module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444);
 MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)");
 
 module_param_named(ignore_sss, ahci_ignore_sss, int, 0444);
-MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)");
+MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])");
 
 static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			unsigned hints);
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index c6664a78796979..413b0aa5c19d52 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -471,6 +471,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv,
 static char fw_path_para[256];
 static const char * const fw_path[] = {
 	fw_path_para,
+	"/etc/firmware/" UTS_RELEASE,
+	"/etc/firmware",
 	"/lib/firmware/updates/" UTS_RELEASE,
 	"/lib/firmware/updates",
 	"/lib/firmware/" UTS_RELEASE,
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index b4330a01a566bf..e41cd5c6885530 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -208,13 +208,16 @@ static int verify_pmtmr_rate(void)
 static int __init init_acpi_pm_clocksource(void)
 {
 	u64 value1, value2;
-	unsigned int i, j = 0;
+	unsigned int i, j = 0, checks = 1;
 
 	if (!pmtmr_ioport)
 		return -ENODEV;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		checks = ACPI_PM_MONOTONICITY_CHECKS;
+
 	/* "verify" this timing source: */
-	for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) {
+	for (j = 0; j < checks; j++) {
 		udelay(100 * j);
 		value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
 		for (i = 0; i < ACPI_PM_READ_CHECKS; i++) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 400337f3b572da..d158197677c908 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -376,6 +376,13 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	 * update them at any time after it has been called.
 	 */
 	sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
+	/*
+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+	 */
+	if (cppc_perf.highest_perf == 0xff)
+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
 
 	if (max_highest_perf <= min_highest_perf) {
 		if (cppc_perf.highest_perf > max_highest_perf)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 524ed143f875d3..fb0e8d5db68982 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -575,7 +575,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -583,7 +583,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
-		.target_residency = 100,
+		.target_residency = 900,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -591,7 +591,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -599,7 +599,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 1500,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -607,7 +607,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 2000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -615,7 +615,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 5000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -623,7 +623,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -643,7 +643,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -651,7 +651,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -659,7 +659,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -667,7 +667,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 2000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -675,7 +675,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 4000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -683,7 +683,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 7000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -691,7 +691,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -712,7 +712,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -720,7 +720,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -728,7 +728,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 85,
-		.target_residency = 200,
+		.target_residency = 600,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -736,7 +736,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 124,
-		.target_residency = 800,
+		.target_residency = 3000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -744,7 +744,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 200,
-		.target_residency = 800,
+		.target_residency = 3200,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -752,7 +752,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 480,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -760,7 +760,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 890,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -781,7 +781,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 300,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -810,7 +810,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 4,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -818,7 +818,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 170,
-		.target_residency = 600,
+		.target_residency = 900,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -968,7 +968,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 2,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 8ec4872b447145..baf7a0d9c332f4 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -625,7 +625,7 @@ static int i8042_enable_kbd_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_KBDINT;
 		i8042_ctr |= I8042_CTR_KBDDIS;
-		pr_err("Failed to enable KBD port\n");
+		pr_info("Failed to enable KBD port\n");
 		return -EIO;
 	}
 
@@ -644,7 +644,7 @@ static int i8042_enable_aux_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_AUXINT;
 		i8042_ctr |= I8042_CTR_AUXDIS;
-		pr_err("Failed to enable AUX port\n");
+		pr_info("Failed to enable AUX port\n");
 		return -EIO;
 	}
 
@@ -736,7 +736,7 @@ static int i8042_check_mux(void)
 	i8042_ctr &= ~I8042_CTR_AUXINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
-		pr_err("Failed to disable AUX port, can't use MUX\n");
+		pr_info("Failed to disable AUX port, can't use MUX\n");
 		return -EIO;
 	}
 
@@ -959,7 +959,7 @@ static int i8042_controller_selftest(void)
 	do {
 
 		if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
-			pr_err("i8042 controller selftest timeout\n");
+			pr_info("i8042 controller selftest timeout\n");
 			return -ENODEV;
 		}
 
@@ -981,7 +981,7 @@ static int i8042_controller_selftest(void)
 	pr_info("giving up on controller selftest, continuing anyway...\n");
 	return 0;
 #else
-	pr_err("i8042 controller selftest failed\n");
+	pr_info("i8042 controller selftest failed\n");
 	return -EIO;
 #endif
 }
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index e9c5e1e11fa02d..d912cdbd26a4d2 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -43,7 +43,7 @@
 
 #define DRV_NAME	"dummy"
 
-static int numdummies = 1;
+static int numdummies = 0;
 
 /* fake multicast ability */
 static void set_multicast_list(struct net_device *dev)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8da50df56b0795..81c80b5fc542ab 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -62,7 +62,7 @@ static u8 nvme_max_retries = 5;
 module_param_named(max_retries, nvme_max_retries, byte, 0644);
 MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 
-static unsigned long default_ps_max_latency_us = 100000;
+static unsigned long default_ps_max_latency_us = 200;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1aa5d6f98ebda2..9f006c30eca554 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -61,7 +61,7 @@ struct pci_pme_device {
 	struct pci_dev *dev;
 };
 
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
 
 /*
  * Following exit from Conventional Reset, devices must be ready within 1 sec
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 4ba649370aa1a7..a3a03061b41d42 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -710,6 +710,11 @@ static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
 	.set_cur_state = powerclamp_set_cur_state,
 };
 
+static const struct x86_cpu_id amd_cpu[] = {
+	{ X86_VENDOR_AMD },
+	{},
+};
+
 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
 	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
 	{}
@@ -718,9 +723,13 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
 
 static int __init powerclamp_probe(void)
 {
-
 	if (!x86_match_cpu(intel_powerclamp_ids)) {
-		pr_err("CPU does not support MWAIT\n");
+		pr_info("CPU does not support MWAIT\n");
+		return -ENODEV;
+	}
+
+	if (x86_match_cpu(amd_cpu)){
+		pr_info("Intel PowerClamp does not support AMD CPUs\n");
 		return -ENODEV;
 	}
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0a216a078c3155..e0b51138141e3b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1282,6 +1282,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	mm = current->mm;
 	mm->end_code = end_code;
 	mm->start_code = start_code;
+	if (start_code >= ELF_ET_DYN_BASE)
+		mm->mmap_base = start_code;
 	mm->start_data = start_data;
 	mm->end_data = end_data;
 	mm->start_stack = bprm->p;
diff --git a/fs/xattr.c b/fs/xattr.c
index 05ec7e7d9e87e2..09723a7857feaa 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -139,16 +139,17 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
 	}
 
 	/*
-	 * In the user.* namespace, only regular files and directories can have
-	 * extended attributes. For sticky directories, only the owner and
-	 * privileged users can write attributes.
+	 * In the user.* namespace, only regular files, symbolic links, and
+	 * directories can have extended attributes. For symbolic links and
+	 * sticky directories, only the owner and privileged users can write
+	 * attributes.
 	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode))
 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
-		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
-		    (mask & MAY_WRITE) &&
-		    !inode_owner_or_capable(idmap, inode))
+		if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX))
+		        || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE)
+		    && !inode_owner_or_capable(idmap, inode))
 			return -EPERM;
 	}
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad5735..ff2eb4ab1842b2 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -45,7 +45,7 @@
 /*
  * The default maximum commit age, in seconds.
  */
-#define JBD2_DEFAULT_MAX_COMMIT_AGE 5
+#define JBD2_DEFAULT_MAX_COMMIT_AGE 30
 
 #ifdef CONFIG_JBD2_DEBUG
 /*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2b322a9b88a2bd..6f977c21fd81d5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -163,6 +163,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
@@ -1192,6 +1193,7 @@ do {										\
  */
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/include/net/sock.h b/include/net/sock.h
index fa055cf1785efd..da0a3e5bc99f5b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1579,10 +1579,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
+	int reclaimable, reclaim_threshold;
+
+	reclaim_threshold = 64 * 1024;
 	if (!sk_has_account(sk))
 		return;
 	sk_forward_alloc_add(sk, size);
-	sk_mem_reclaim(sk);
+	reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+	if (reclaimable > reclaim_threshold) {
+		reclaimable -= reclaim_threshold;
+		__sk_mem_reclaim(sk, reclaimable);
+	}
 }
 
 /*
@@ -2813,7 +2820,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *meminfo);
  * platforms.  This makes socket queueing behavior and performance
  * not depend upon such differences.
  */
-#define _SK_MEM_PACKETS		256
+#define _SK_MEM_PACKETS		1024
 #define _SK_MEM_OVERHEAD	SKB_TRUESIZE(256)
 #define SK_WMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
 #define SK_RMEM_MAX		(_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index d174914a837dbf..bf8e2af101a3c8 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -82,7 +82,7 @@
 #define BOND_STATE_ACTIVE       0   /* link is active */
 #define BOND_STATE_BACKUP       1   /* link is backup */
 
-#define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
+#define BOND_DEFAULT_MAX_BONDS  0   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 6af29da8889ebf..a83c82ad7150a2 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -476,7 +476,9 @@ void __init prepare_namespace(void)
 	 * For example, it is not atypical to wait 5 seconds here
 	 * for the touchpad of a laptop to initialize.
 	 */
+	async_synchronize_full();
 	wait_for_device_probe();
+	async_synchronize_full();
 
 	md_run_setup();
 
diff --git a/init/init_task.c b/init/init_task.c
index 136a8231355ab7..15d7392020cae9 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -139,7 +139,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-	.timer_slack_ns = 50000, /* 50 usec default slack */
+	.timer_slack_ns = 50, /* 50 nsec default slack */
 	.thread_pid	= &init_struct_pid,
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
 #ifdef CONFIG_AUDIT
diff --git a/init/main.c b/init/main.c
index c4778edae7972f..f5128cc7b34bf1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1220,10 +1220,13 @@ static __init_or_module void
 trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
 {
 	ktime_t rettime, *calltime = data;
+	long long delta;
 
 	rettime = ktime_get();
-	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
+	delta = ktime_us_delta(rettime, *calltime);
+	if (ret || delta)
+		printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
+			fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
 }
 
 static ktime_t initcall_calltime;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2bbb6eca51445b..125cdf85741c5e 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -747,6 +747,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int i = 0;
 
 	lockdep_assert_preemption_disabled();
 
@@ -783,7 +784,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 			break;
 		}
 
-		cpu_relax();
+		if (i++ > 1000)
+			cpu_relax();
 	}
 
 	return state;
diff --git a/kernel/module/signing.c b/kernel/module/signing.c
index a2ff4242e623d5..8cd7c6f5d8bbfe 100644
--- a/kernel/module/signing.c
+++ b/kernel/module/signing.c
@@ -14,6 +14,8 @@
 #include <linux/security.h>
 #include <crypto/public_key.h>
 #include <uapi/linux/module.h>
+#include <linux/efi.h>
+
 #include "internal.h"
 
 #undef MODULE_PARAM_PREFIX
@@ -21,6 +23,11 @@
 
 static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
 module_param(sig_enforce, bool_enable_only, 0644);
+/* Allow disabling module signature requirement by adding boot param */
+static bool sig_unenforce = false;
+module_param(sig_unenforce, bool_enable_only, 0644);
+
+extern struct boot_params boot_params;
 
 /*
  * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
@@ -28,6 +35,8 @@ module_param(sig_enforce, bool_enable_only, 0644);
  */
 bool is_module_sig_enforced(void)
 {
+	if (sig_unenforce)
+		return false;
 	return sig_enforce;
 }
 EXPORT_SYMBOL(is_module_sig_enforced);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9803f10a082a7b..04c73988e7370c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -594,7 +594,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 
 	/* Matches synchronize_rcu() in __sched_core_enable() */
 	preempt_disable();
-	if (sched_core_disabled()) {
+	if (likely(sched_core_disabled())) {
 		raw_spin_lock_nested(&rq->__lock, subclass);
 		/* preempt_count *MUST* be > 1 */
 		preempt_enable_no_resched();
@@ -804,7 +804,7 @@ void update_rq_clock(struct rq *rq)
 #endif
 
 	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-	if (delta < 0)
+	if (unlikely(delta < 0))
 		return;
 	rq->clock += delta;
 	update_rq_clock_task(rq, delta);
@@ -6055,7 +6055,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	struct rq *rq_i;
 	bool need_sync;
 
-	if (!sched_core_enabled(rq))
+	if (likely(!sched_core_enabled(rq)))
 		return __pick_next_task(rq, prev, rf);
 
 	cpu = cpu_of(rq);
@@ -7229,7 +7229,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
 #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
 int __sched __cond_resched(void)
 {
-	if (should_resched(0) && !irqs_disabled()) {
+	if (unlikely(should_resched(0) && !irqs_disabled())) {
 		preempt_schedule_common();
 		return 1;
 	}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ddc096d6b0c203..e25af5af5434e7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -190,7 +190,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  */
 static unsigned int get_update_sysctl_factor(void)
 {
-	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
+	unsigned int cpus = num_online_cpus();
 	unsigned int factor;
 
 	switch (sysctl_sched_tunable_scaling) {
@@ -12866,7 +12866,7 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 
 		update_next_balance(sd, &next_balance);
 
-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+		if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost)
 			break;
 
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index f9cb7896c1b966..45723ca32d1d8b 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -1390,10 +1390,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 	return ret;
 }
 
+static DEFINE_PER_CPU(unsigned long, last_yield);
+
 static void do_sched_yield(void)
 {
 	struct rq_flags rf;
 	struct rq *rq;
+	int cpu = raw_smp_processor_id();
+
+	cond_resched();
+
+	/* rate limit yielding to something sensible */
+
+	if (!time_after(jiffies, per_cpu(last_yield, cpu)))
+		return;
+
+	per_cpu(last_yield, cpu) = jiffies;
 
 	rq = this_rq_lock_irq(&rf);
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 51e38f5f47018c..c5cc616484badd 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
 }
 EXPORT_SYMBOL_GPL(add_wait_queue_priority);
 
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
@@ -258,6 +269,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
+	set_current_state(state);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
 void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
 	wq_entry->flags = flags;
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index cd2e88ee1f148c..4d86cfe17c3fde 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,8 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
+
 /* In .bss so it's zeroed */
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 EXPORT_SYMBOL(raid6_empty_zero_page);
@@ -138,8 +140,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 
 	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
 		if (!best || (*algo)->priority > best->priority)
-			if (!(*algo)->valid || (*algo)->valid())
+			if (!(*algo)->valid || (*algo)->valid()) {
 				best = *algo;
+				break;
+			}
 
 	if (best) {
 		raid6_2data_recov = best->data2;
@@ -155,12 +159,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -170,26 +177,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
 			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -206,31 +207,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index de65e8b4f75f21..5af8600f9726e8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5529,11 +5529,11 @@ static int zone_batchsize(struct zone *zone)
 
 	/*
 	 * The number of pages to batch allocate is either ~0.1%
-	 * of the zone or 1MB, whichever is smaller. The batch
+	 * of the zone or 4MB, whichever is smaller. The batch
 	 * size is striking a balance between allocation latency
 	 * and zone lock contention.
 	 */
-	batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
+	batch = min(zone_managed_pages(zone) >> 10, 4 * SZ_1M / PAGE_SIZE);
 	batch /= 4;		/* We effectively *= 4 below */
 	if (batch < 1)
 		batch = 1;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index fe7947f7740623..99e138cfdd9522 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -634,7 +634,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b731a4a8f2b0d5..654eb58ecd2006 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5144,8 +5144,8 @@ void __init tcp_init(void)
 	tcp_init_mem();
 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-	max_wshare = min(4UL*1024*1024, limit);
-	max_rshare = min(6UL*1024*1024, limit);
+	max_wshare = min(16UL*1024*1024, limit);
+	max_rshare = min(16UL*1024*1024, limit);
 
 	init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;