diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b37abb55e948b7..23c7dfa24cda91 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -490,7 +490,7 @@
 #define HWP_MAX_PERF(x) 		((x & 0xff) << 8)
 #define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
 #define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
-#define HWP_EPP_PERFORMANCE		0x00
+#define HWP_EPP_PERFORMANCE		0x01
 #define HWP_EPP_BALANCE_PERFORMANCE	0x80
 #define HWP_EPP_BALANCE_POWERSAVE	0xC0
 #define HWP_EPP_POWERSAVE		0xFF
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 3235ba1e5b06d7..80babac99d46a8 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -183,6 +183,7 @@ extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
 
 /* Interface to set priority of a cpu */
 void sched_set_itmt_core_prio(int prio, int core_cpu);
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu);
 
 /* Interface to notify scheduler that system supports ITMT */
 int sched_set_itmt_support(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f060..959a3a6279cc1a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1622,7 +1622,9 @@ void __init alternative_instructions(void)
 	 * Then patch alternatives, such that those paravirt calls that are in
 	 * alternatives can be overwritten by their immediate fragments.
 	 */
+	printk("clr: Applying alternatives\n");
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
+	printk("clr: Applying alternatives done\n");
 
 	/*
 	 * Now all calls are established. Apply the call thunks if
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index e4c3ba91321c95..fcac72edede5b8 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -166,6 +166,10 @@ static ssize_t energy_perf_bias_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
+	/* update the ITMT scheduler logic to use the power policy data */
+	/* scale the val up by 2 so the range is 224 - 256 */
+	sched_set_itmt_power_ratio(256 - val * 2, cpu);
+
 	return count;
 }
 
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index ee4fe8cdb85767..476928553e52ed 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -25,6 +25,7 @@
 
 static DEFINE_MUTEX(itmt_update_mutex);
 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_power_ratio);
 
 /* Boolean to track if system has ITMT capabilities */
 static bool __read_mostly sched_itmt_capable;
@@ -160,7 +161,12 @@ void sched_clear_itmt_support(void)
 
 int arch_asym_cpu_priority(int cpu)
 {
-	return per_cpu(sched_core_priority, cpu);
+	int power_ratio = per_cpu(sched_power_ratio, cpu);
+
+	/* a power ratio of 0 (uninitialized) is assumed to be maximum */
+	if (power_ratio == 0)
+		power_ratio = 256 - 2 * 6;
+	return per_cpu(sched_core_priority, cpu) * power_ratio / 256;
 }
 
 /**
@@ -181,3 +187,24 @@ void sched_set_itmt_core_prio(int prio, int cpu)
 {
 	per_cpu(sched_core_priority, cpu) = prio;
 }
+
+/**
+ * sched_set_itmt_power_ratio() - Set CPU priority based on ITMT
+ * @power_ratio:	The power scaling ratio [1..256] for the core
+ * @core_cpu:		The cpu number associated with the core
+ *
+ * Set a scaling to the cpu performance based on long term power
+ * settings (like EPB).
+ *
+ * Note this is for the policy not for the actual dynamic frequency;
+ * the frequency will increase itself as workloads run on a core.
+ */
+
+void sched_set_itmt_power_ratio(int power_ratio, int core_cpu)
+{
+	int cpu;
+
+	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+		per_cpu(sched_power_ratio, cpu) = power_ratio;
+	}
+}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 15f97c0abc9d09..ec0f9951abc6d1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1644,6 +1644,9 @@ unsigned long calibrate_delay_is_known(void)
 	if (!constant_tsc || !mask)
 		return 0;
 
+	if (cpu != 0)
+		return cpu_data(0).loops_per_jiffy;
+
 	sibling = cpumask_any_but(mask, cpu);
 	if (sibling < nr_cpu_ids)
 		return cpu_data(sibling).loops_per_jiffy;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ab778eac195205..a702a8f9716823 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -779,9 +779,9 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 	if (!printk_ratelimit())
 		return;
 
-	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
+	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx cpu %i",
 		loglvl, tsk->comm, task_pid_nr(tsk), address,
-		(void *)regs->ip, (void *)regs->sp, error_code);
+		(void *)regs->ip, (void *)regs->sp, error_code, raw_smp_processor_id());
 
 	print_vma_addr(KERN_CONT " in ", regs->ip);
 
diff --git a/block/early-lookup.c b/block/early-lookup.c
index 3effbd0d35e9fa..503a8c65ee9863 100644
--- a/block/early-lookup.c
+++ b/block/early-lookup.c
@@ -5,6 +5,7 @@
  */
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 struct uuidcmp {
 	const char *uuid;
@@ -243,8 +244,18 @@ static int __init devt_from_devnum(const char *name, dev_t *devt)
  */
 int __init early_lookup_bdev(const char *name, dev_t *devt)
 {
-	if (strncmp(name, "PARTUUID=", 9) == 0)
-		return devt_from_partuuid(name + 9, devt);
+	if (strncmp(name, "PARTUUID=", 9) == 0) {
+		int res;
+		int  needtowait = 40<<1;
+		res = devt_from_partuuid(name + 9, devt);
+		if (!res) return res;
+		while (res && needtowait) {
+			msleep(500);
+			res = devt_from_partuuid(name + 9, devt);
+			needtowait--;
+		}
+		return res;
+	}
 	if (strncmp(name, "PARTLABEL=", 10) == 0)
 		return devt_from_partlabel(name + 10, devt);
 	if (strncmp(name, "/dev/", 5) == 0)
diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
index c3f9938e1ad27f..e77478e064d8f6 100644
--- a/crypto/kdf_sp800108.c
+++ b/crypto/kdf_sp800108.c
@@ -149,7 +149,7 @@ static int __init crypto_kdf108_init(void)
 
 static void __exit crypto_kdf108_exit(void) { }
 
-module_init(crypto_kdf108_init);
+late_initcall(crypto_kdf108_init);
 module_exit(crypto_kdf108_exit);
 
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/Makefile b/drivers/Makefile
index 1bec7819a837ab..dcdb0ddb7b66d8 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -66,15 +66,8 @@ obj-y				+= char/
 # iommu/ comes before gpu as gpu are using iommu controllers
 obj-y				+= iommu/
 
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y				+= gpu/
-
 obj-$(CONFIG_CONNECTOR)		+= connector/
 
-# i810fb and intelfb depend on char/agp/
-obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
-
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
@@ -86,6 +79,14 @@ obj-y				+= macintosh/
 obj-y				+= scsi/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y				+= gpu/
+
+# i810fb and intelfb depend on char/agp/
+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
+obj-$(CONFIG_FB_INTEL)          += video/fbdev/intelfb/
+
 obj-$(CONFIG_TARGET_CORE)	+= target/
 obj-$(CONFIG_MTD)		+= mtd/
 obj-$(CONFIG_SPI)		+= spi/
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index f725813d0cce6a..dc1eb49db49e51 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -1556,7 +1556,7 @@ void acpi_os_release_lock(acpi_spinlock lockp, acpi_cpu_flags flags)
 acpi_status
 acpi_os_create_cache(char *name, u16 size, u16 depth, acpi_cache_t ** cache)
 {
-	*cache = kmem_cache_create(name, size, 0, 0, NULL);
+	*cache = kmem_cache_create(name, size, 0, SLAB_HWCACHE_ALIGN, NULL);
 	if (*cache == NULL)
 		return AE_ERROR;
 	else
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index f1263364fa97fa..efcae19412e596 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -34,14 +34,14 @@
 #include "libata.h"
 
 static int ahci_skip_host_reset;
-int ahci_ignore_sss;
+int ahci_ignore_sss=1;
 EXPORT_SYMBOL_GPL(ahci_ignore_sss);
 
 module_param_named(skip_host_reset, ahci_skip_host_reset, int, 0444);
 MODULE_PARM_DESC(skip_host_reset, "skip global host reset (0=don't skip, 1=skip)");
 
 module_param_named(ignore_sss, ahci_ignore_sss, int, 0444);
-MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore)");
+MODULE_PARM_DESC(ignore_sss, "Ignore staggered spinup flag (0=don't ignore, 1=ignore [default])");
 
 static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
 			unsigned hints);
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index b58c42f1b1ce65..817c81bc9107ab 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -470,6 +470,8 @@ static int fw_decompress_xz(struct device *dev, struct fw_priv *fw_priv,
 static char fw_path_para[256];
 static const char * const fw_path[] = {
 	fw_path_para,
+	"/etc/firmware/" UTS_RELEASE,
+	"/etc/firmware",
 	"/lib/firmware/updates/" UTS_RELEASE,
 	"/lib/firmware/updates",
 	"/lib/firmware/" UTS_RELEASE,
diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c
index 82338773602cae..d84f0e29452ecf 100644
--- a/drivers/clocksource/acpi_pm.c
+++ b/drivers/clocksource/acpi_pm.c
@@ -176,13 +176,16 @@ static int verify_pmtmr_rate(void)
 static int __init init_acpi_pm_clocksource(void)
 {
 	u64 value1, value2;
-	unsigned int i, j = 0;
+	unsigned int i, j = 0, checks = 1;
 
 	if (!pmtmr_ioport)
 		return -ENODEV;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		checks = ACPI_PM_MONOTONICITY_CHECKS;
+
 	/* "verify" this timing source: */
-	for (j = 0; j < ACPI_PM_MONOTONICITY_CHECKS; j++) {
+	for (j = 0; j < checks; j++) {
 		udelay(100 * j);
 		value1 = clocksource_acpi_pm.read(&clocksource_acpi_pm);
 		for (i = 0; i < ACPI_PM_READ_CHECKS; i++) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index dc50c9fb488dfc..c54e6455ec7b46 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -373,6 +373,13 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	 * update them at any time after it has been called.
 	 */
 	sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
+	/*
+	 * On some systems with overclocking enabled, CPPC.highest_perf is hardcoded to 0xff.
+	 * In this case we can't use CPPC.highest_perf to enable ITMT.
+	 * In this case we can look at MSR_HWP_CAPABILITIES bits [8:0] to decide.
+	 */
+	if (cppc_perf.highest_perf == 0xff)
+		cppc_perf.highest_perf = HWP_HIGHEST_PERF(READ_ONCE(all_cpu_data[cpu]->hwp_cap_cached));
 
 	if (max_highest_perf <= min_highest_perf) {
 		if (cppc_perf.highest_perf > max_highest_perf)
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index ea5a6a14c5537a..4d64eff6234abf 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -573,7 +573,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -581,7 +581,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
-		.target_residency = 100,
+		.target_residency = 900,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -589,7 +589,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -597,7 +597,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 1500,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -605,7 +605,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 2000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -613,7 +613,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 5000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -621,7 +621,7 @@ static struct cpuidle_state hsw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -641,7 +641,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -649,7 +649,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -657,7 +657,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
-		.target_residency = 400,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -665,7 +665,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x32",
 		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
-		.target_residency = 500,
+		.target_residency = 2000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -673,7 +673,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
-		.target_residency = 900,
+		.target_residency = 4000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -681,7 +681,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
-		.target_residency = 1800,
+		.target_residency = 7000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -689,7 +689,7 @@ static struct cpuidle_state bdw_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
-		.target_residency = 7700,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -710,7 +710,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 120,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -718,7 +718,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 70,
-		.target_residency = 100,
+		.target_residency = 1000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -726,7 +726,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 85,
-		.target_residency = 200,
+		.target_residency = 600,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -734,7 +734,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x33",
 		.flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 124,
-		.target_residency = 800,
+		.target_residency = 3000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -742,7 +742,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x40",
 		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 200,
-		.target_residency = 800,
+		.target_residency = 3200,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -750,7 +750,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x50",
 		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 480,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -758,7 +758,7 @@ static struct cpuidle_state skl_cstates[] __initdata = {
 		.desc = "MWAIT 0x60",
 		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
 		.exit_latency = 890,
-		.target_residency = 5000,
+		.target_residency = 9000,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -779,7 +779,7 @@ static struct cpuidle_state skx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 10,
-		.target_residency = 20,
+		.target_residency = 300,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -808,7 +808,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 4,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -816,7 +816,7 @@ static struct cpuidle_state icx_cstates[] __initdata = {
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 170,
-		.target_residency = 600,
+		.target_residency = 900,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
@@ -937,7 +937,7 @@ static struct cpuidle_state gmt_cstates[] __initdata = {
 		.desc = "MWAIT 0x01",
 		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
 		.exit_latency = 2,
-		.target_residency = 4,
+		.target_residency = 40,
 		.enter = &intel_idle,
 		.enter_s2idle = intel_idle_s2idle, },
 	{
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index 6dac7c1853a541..fab04cd8a7a095 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -621,7 +621,7 @@ static int i8042_enable_kbd_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_KBDINT;
 		i8042_ctr |= I8042_CTR_KBDDIS;
-		pr_err("Failed to enable KBD port\n");
+		pr_info("Failed to enable KBD port\n");
 		return -EIO;
 	}
 
@@ -640,7 +640,7 @@ static int i8042_enable_aux_port(void)
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
 		i8042_ctr &= ~I8042_CTR_AUXINT;
 		i8042_ctr |= I8042_CTR_AUXDIS;
-		pr_err("Failed to enable AUX port\n");
+		pr_info("Failed to enable AUX port\n");
 		return -EIO;
 	}
 
@@ -732,7 +732,7 @@ static int i8042_check_mux(void)
 	i8042_ctr &= ~I8042_CTR_AUXINT;
 
 	if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) {
-		pr_err("Failed to disable AUX port, can't use MUX\n");
+		pr_info("Failed to disable AUX port, can't use MUX\n");
 		return -EIO;
 	}
 
@@ -955,7 +955,7 @@ static int i8042_controller_selftest(void)
 	do {
 
 		if (i8042_command(&param, I8042_CMD_CTL_TEST)) {
-			pr_err("i8042 controller selftest timeout\n");
+			pr_info("i8042 controller selftest timeout\n");
 			return -ENODEV;
 		}
 
@@ -977,7 +977,7 @@ static int i8042_controller_selftest(void)
 	pr_info("giving up on controller selftest, continuing anyway...\n");
 	return 0;
 #else
-	pr_err("i8042 controller selftest failed\n");
+	pr_info("i8042 controller selftest failed\n");
 	return -EIO;
 #endif
 }
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index c4b1b0aa438ac5..06b00f7a8eab9d 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -43,7 +43,7 @@
 
 #define DRV_NAME	"dummy"
 
-static int numdummies = 1;
+static int numdummies = 0;
 
 /* fake multicast ability */
 static void set_multicast_list(struct net_device *dev)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 21783aa2ee8e18..a140e737357bce 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -59,7 +59,7 @@ static u8 nvme_max_retries = 5;
 module_param_named(max_retries, nvme_max_retries, byte, 0644);
 MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 
-static unsigned long default_ps_max_latency_us = 100000;
+static unsigned long default_ps_max_latency_us = 200;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
 		 "max power saving latency for new devices; use PM QOS to change per device");
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 59c01d68c6d5ed..bfdcc03c57b7ed 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -62,7 +62,7 @@ struct pci_pme_device {
 	struct pci_dev *dev;
 };
 
-#define PME_TIMEOUT 1000 /* How long between PME checks */
+#define PME_TIMEOUT 4000 /* How long between PME checks */
 
 /*
  * Following exit from Conventional Reset, devices must be ready within 1 sec
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 36243a3972fd7f..c787c1d2390f34 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -739,6 +739,11 @@ static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
 	.set_cur_state = powerclamp_set_cur_state,
 };
 
+static const struct x86_cpu_id amd_cpu[] = {
+	{ X86_VENDOR_AMD },
+	{},
+};
+
 static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
 	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
 	{}
@@ -747,9 +752,13 @@ MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
 
 static int __init powerclamp_probe(void)
 {
-
 	if (!x86_match_cpu(intel_powerclamp_ids)) {
-		pr_err("CPU does not support MWAIT\n");
+		pr_info("CPU does not support MWAIT\n");
+		return -ENODEV;
+	}
+
+	if (x86_match_cpu(amd_cpu)){
+		pr_info("Intel PowerClamp does not support AMD CPUs\n");
 		return -ENODEV;
 	}
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7b3d2d4914073f..e1b8673960e062 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1296,6 +1296,8 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	mm = current->mm;
 	mm->end_code = end_code;
 	mm->start_code = start_code;
+	if (start_code >= ELF_ET_DYN_BASE)
+		mm->mmap_base = start_code;
 	mm->start_data = start_data;
 	mm->end_data = end_data;
 	mm->start_stack = bprm->p;
diff --git a/fs/xattr.c b/fs/xattr.c
index efd4736bc94b09..663befc923ca68 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -140,16 +140,17 @@ xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
 	}
 
 	/*
-	 * In the user.* namespace, only regular files and directories can have
-	 * extended attributes. For sticky directories, only the owner and
-	 * privileged users can write attributes.
+	 * In the user.* namespace, only regular files, symbolic links, and
+	 * directories can have extended attributes. For symbolic links and
+	 * sticky directories, only the owner and privileged users can write
+	 * attributes.
 	 */
 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode))
 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
-		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
-		    (mask & MAY_WRITE) &&
-		    !inode_owner_or_capable(idmap, inode))
+		if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX))
+		        || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE)
+		    && !inode_owner_or_capable(idmap, inode))
 			return -EPERM;
 	}
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 52772c826c8682..062adf94afa499 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -45,7 +45,7 @@
 /*
  * The default maximum commit age, in seconds.
  */
-#define JBD2_DEFAULT_MAX_COMMIT_AGE 5
+#define JBD2_DEFAULT_MAX_COMMIT_AGE 30
 
 #ifdef CONFIG_JBD2_DEBUG
 /*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 5ec7739400f4d7..d1df939187fbf1 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -165,6 +165,7 @@ static inline bool wq_has_sleeper(struct wait_queue_head *wq_head)
 
 extern void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
+extern void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 extern void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 
@@ -1195,6 +1196,7 @@ do {										\
  */
 void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state);
 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry);
 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout);
diff --git a/include/net/sock.h b/include/net/sock.h
index 92f7ea62a9159c..54bdc875fda9fc 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1682,10 +1682,17 @@ static inline void sk_mem_charge(struct sock *sk, int size)
 
 static inline void sk_mem_uncharge(struct sock *sk, int size)
 {
+	int reclaimable, reclaim_threshold;
+
+	reclaim_threshold = 64 * 1024;
 	if (!sk_has_account(sk))
 		return;
 	sk_forward_alloc_add(sk, size);
-	sk_mem_reclaim(sk);
+	reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+	if (reclaimable > reclaim_threshold) {
+		reclaimable -= reclaim_threshold;
+		__sk_mem_reclaim(sk, reclaimable);
+	}
 }
 
 /*
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4b03ca7cb8a5eb..d48a96e2d23332 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -130,10 +130,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 				  */
 #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
 
-#define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */
+#define TCP_DELACK_MAX	((unsigned)(HZ/10))	/* maximal time to delay before sending an ACK */
 #if HZ >= 100
-#define TCP_DELACK_MIN	((unsigned)(HZ/25))	/* minimal time to delay before sending an ACK */
-#define TCP_ATO_MIN	((unsigned)(HZ/25))
+#define TCP_DELACK_MIN	((unsigned)(HZ/100))	/* minimal time to delay before sending an ACK */
+#define TCP_ATO_MIN	((unsigned)(HZ/100))
 #else
 #define TCP_DELACK_MIN	4U
 #define TCP_ATO_MIN	4U
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index d174914a837dbf..bf8e2af101a3c8 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -82,7 +82,7 @@
 #define BOND_STATE_ACTIVE       0   /* link is active */
 #define BOND_STATE_BACKUP       1   /* link is backup */
 
-#define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
+#define BOND_DEFAULT_MAX_BONDS  0   /* Default maximum number of devices to support */
 
 #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
 
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 5dfd30b13f4857..bc2835722b7ea9 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -474,7 +474,9 @@ void __init prepare_namespace(void)
 	 * For example, it is not atypical to wait 5 seconds here
 	 * for the touchpad of a laptop to initialize.
 	 */
+	async_synchronize_full();
 	wait_for_device_probe();
+	async_synchronize_full();
 
 	md_run_setup();
 
diff --git a/init/init_task.c b/init/init_task.c
index ff6c4b9bfe6b1c..ba451478532da1 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -130,7 +130,7 @@ struct task_struct init_task
 	.journal_info	= NULL,
 	INIT_CPU_TIMERS(init_task)
 	.pi_lock	= __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
-	.timer_slack_ns = 50000, /* 50 usec default slack */
+	.timer_slack_ns = 50, /* 50 nsec default slack */
 	.thread_pid	= &init_struct_pid,
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
diff --git a/init/main.c b/init/main.c
index 436d73261810bd..4ace622f2e9025 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1183,10 +1183,13 @@ static __init_or_module void
 trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
 {
 	ktime_t rettime, *calltime = data;
+	long long delta;
 
 	rettime = ktime_get();
-	printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
-		 fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
+	delta = ktime_us_delta(rettime, *calltime);
+	if (ret || delta)
+		printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
+			fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
 }
 
 static ktime_t initcall_calltime;
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 9eabd585ce7afa..56229464d3d152 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -749,6 +749,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 	struct task_struct *new, *owner;
 	unsigned long flags, new_flags;
 	enum owner_state state;
+	int i = 0;
 
 	lockdep_assert_preemption_disabled();
 
@@ -785,7 +786,8 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
 			break;
 		}
 
-		cpu_relax();
+		if (i++ > 1000)
+			cpu_relax();
 	}
 
 	return state;
diff --git a/kernel/module/signing.c b/kernel/module/signing.c
index a2ff4242e623d5..8cd7c6f5d8bbfe 100644
--- a/kernel/module/signing.c
+++ b/kernel/module/signing.c
@@ -14,6 +14,8 @@
 #include <linux/security.h>
 #include <crypto/public_key.h>
 #include <uapi/linux/module.h>
+#include <linux/efi.h>
+
 #include "internal.h"
 
 #undef MODULE_PARAM_PREFIX
@@ -21,6 +23,11 @@
 
 static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
 module_param(sig_enforce, bool_enable_only, 0644);
+/* Allow disabling module signature requirement by adding boot param */
+static bool sig_unenforce = false;
+module_param(sig_unenforce, bool_enable_only, 0644);
+
+extern struct boot_params boot_params;
 
 /*
  * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
@@ -28,6 +35,8 @@ module_param(sig_enforce, bool_enable_only, 0644);
  */
 bool is_module_sig_enforced(void)
 {
+	if (sig_unenforce)
+		return false;
 	return sig_enforce;
 }
 EXPORT_SYMBOL(is_module_sig_enforced);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 802551e0009bf1..1868e28d7ad204 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -554,7 +554,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 
 	/* Matches synchronize_rcu() in __sched_core_enable() */
 	preempt_disable();
-	if (sched_core_disabled()) {
+	if (likely(sched_core_disabled())) {
 		raw_spin_lock_nested(&rq->__lock, subclass);
 		/* preempt_count *MUST* be > 1 */
 		preempt_enable_no_resched();
@@ -763,7 +763,7 @@ void update_rq_clock(struct rq *rq)
 #endif
 
 	delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-	if (delta < 0)
+	if (unlikely(delta < 0))
 		return;
 	rq->clock += delta;
 	update_rq_clock_task(rq, delta);
@@ -6075,7 +6075,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	struct rq *rq_i;
 	bool need_sync;
 
-	if (!sched_core_enabled(rq))
+	if (likely(!sched_core_enabled(rq)))
 		return __pick_next_task(rq, prev, rf);
 
 	cpu = cpu_of(rq);
@@ -8525,10 +8525,22 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 	return ret;
 }
 
+static DEFINE_PER_CPU(unsigned long, last_yield);
+
 static void do_sched_yield(void)
 {
 	struct rq_flags rf;
 	struct rq *rq;
+	int cpu = raw_smp_processor_id();
+
+	cond_resched();
+
+	/* rate limit yielding to something sensible */
+	
+	if (!time_after(jiffies, per_cpu(last_yield, cpu)))
+		return;
+	
+	per_cpu(last_yield, cpu) = jiffies;
 
 	rq = this_rq_lock_irq(&rf);
 
@@ -8559,7 +8571,7 @@ SYSCALL_DEFINE0(sched_yield)
 #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
 int __sched __cond_resched(void)
 {
-	if (should_resched(0)) {
+	if (unlikely(should_resched(0))) {
 		preempt_schedule_common();
 		return 1;
 	}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df348aa55d3c7e..210b5d65276de0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -212,7 +212,7 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  */
 static unsigned int get_update_sysctl_factor(void)
 {
-	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
+	unsigned int cpus = num_online_cpus();
 	unsigned int factor;
 
 	switch (sysctl_sched_tunable_scaling) {
@@ -3942,7 +3942,8 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
  */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq)
 {
-	long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
+	long delta;
+	u64 now;
 
 	/*
 	 * No need to update load_avg for root_task_group as it is not used.
@@ -3950,9 +3951,19 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq)
 	if (cfs_rq->tg == &root_task_group)
 		return;
 
+	/*
+	 * For migration heavy workloads, access to tg->load_avg can be
+	 * unbound. Limit the update rate to at most once per ms.
+	 */
+	now = sched_clock_cpu(cpu_of(rq_of(cfs_rq)));
+	if (now - cfs_rq->last_update_tg_load_avg < NSEC_PER_MSEC)
+		return;
+
+	delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
 	if (abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
 		atomic_long_add(delta, &cfs_rq->tg->load_avg);
 		cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
+		cfs_rq->last_update_tg_load_avg = now;
 	}
 }
 
@@ -12128,7 +12139,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 
 		update_next_balance(sd, &next_balance);
 
-		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost)
+		if (this_rq->avg_idle/2 < curr_cost + sd->max_newidle_lb_cost)
 			break;
 
 		if (sd->flags & SD_BALANCE_NEWIDLE) {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04846272409cc0..2ff809fc35f764 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -594,6 +594,7 @@ struct cfs_rq {
 	} removed;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
+	u64			last_update_tg_load_avg;
 	unsigned long		tg_load_avg_contrib;
 	long			propagate;
 	long			prop_runnable_sum;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 802d98cf2de317..9e5a15389c30ef 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -47,6 +47,17 @@ void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_
 }
 EXPORT_SYMBOL_GPL(add_wait_queue_priority);
 
+void add_wait_queue_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	__add_wait_queue(wq_head, wq_entry);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(add_wait_queue_exclusive_lifo);
+
 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
 {
 	unsigned long flags;
@@ -298,6 +309,19 @@ prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_ent
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+void prepare_to_wait_exclusive_lifo(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
+{
+	unsigned long flags;
+
+	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
+	spin_lock_irqsave(&wq_head->lock, flags);
+	if (list_empty(&wq_entry->entry))
+		__add_wait_queue(wq_head, wq_entry);
+	set_current_state(state);
+	spin_unlock_irqrestore(&wq_head->lock, flags);
+}
+EXPORT_SYMBOL(prepare_to_wait_exclusive_lifo);
+
 void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
 {
 	wq_entry->flags = flags;
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 0ec534faf019bc..84fb8ac3a36691 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -18,6 +18,8 @@
 #else
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/sched/clock.h>
+
 /* In .bss so it's zeroed */
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
 EXPORT_SYMBOL(raid6_empty_zero_page);
@@ -142,8 +144,10 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 
 	for (best = NULL, algo = raid6_recov_algos; *algo; algo++)
 		if (!best || (*algo)->priority > best->priority)
-			if (!(*algo)->valid || (*algo)->valid())
+			if (!(*algo)->valid || (*algo)->valid()) {
 				best = *algo;
+				break;
+			}
 
 	if (best) {
 		raid6_2data_recov = best->data2;
@@ -159,12 +163,15 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
 	void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
-	unsigned long perf, bestgenperf, j0, j1;
+	unsigned long perf;
+	const unsigned long max_perf = 2500;
 	int start = (disks>>1)-1, stop = disks-3;	/* work on the second half of the disks */
 	const struct raid6_calls *const *algo;
 	const struct raid6_calls *best;
+	const u64 ns_per_mb = 1000000000 >> 20;
+	u64 n, ns, t, ns_best = ~0ULL;
 
-	for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+	for (best = NULL, algo = raid6_algos; *algo; algo++) {
 		if (!best || (*algo)->priority >= best->priority) {
 			if ((*algo)->valid && !(*algo)->valid())
 				continue;
@@ -174,26 +181,20 @@ static inline const struct raid6_calls *raid6_choose_gen(
 				break;
 			}
 
-			perf = 0;
-
 			preempt_disable();
-			j0 = jiffies;
-			while ((j1 = jiffies) == j0)
-				cpu_relax();
-			while (time_before(jiffies,
-					    j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+			t = local_clock();
+			for (perf = 0; perf < max_perf; perf++) {
 				(*algo)->gen_syndrome(disks, PAGE_SIZE, *dptrs);
-				perf++;
 			}
+			ns = local_clock() - t;
 			preempt_enable();
 
-			if (perf > bestgenperf) {
-				bestgenperf = perf;
+			if (ns < ns_best) {
+				ns_best = ns;
 				best = *algo;
 			}
-			pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
-				(perf * HZ * (disks-2)) >>
-				(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+			n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+			pr_info("raid6: %-8s gen() %5llu MB/s (%llu ns)\n", (*algo)->name, (ns > 0) ? n / ns : 0, ns);
 		}
 	}
 
@@ -210,31 +211,23 @@ static inline const struct raid6_calls *raid6_choose_gen(
 		goto out;
 	}
 
-	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
-		best->name,
-		(bestgenperf * HZ * (disks - 2)) >>
-		(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
+	n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+	pr_info("raid6: using algorithm %s gen() %llu MB/s (%llu ns)\n",
+		best->name, (ns_best > 0) ? n / ns_best : 0, ns_best);
 
 	if (best->xor_syndrome) {
-		perf = 0;
-
 		preempt_disable();
-		j0 = jiffies;
-		while ((j1 = jiffies) == j0)
-			cpu_relax();
-		while (time_before(jiffies,
-				   j1 + (1 << RAID6_TIME_JIFFIES_LG2))) {
+		t = local_clock();
+		for (perf = 0; perf < max_perf; perf++) {
 			best->xor_syndrome(disks, start, stop,
 					   PAGE_SIZE, *dptrs);
-			perf++;
 		}
+		ns = local_clock() - t;
 		preempt_enable();
 
-		pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
-			(perf * HZ * (disks - 2)) >>
-			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
+		n = max_perf * PAGE_SIZE * ns_per_mb * (disks - 2);
+		pr_info("raid6: .... xor() %llu MB/s, rmw enabled (%llu ns)\n", (ns > 0) ? n / ns : 0, ns);
 	}
-
 out:
 	return best;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5b009b233ab892..d2f2b67dff5c9f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -626,7 +626,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
 	cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
 
 	x = __this_cpu_add_return(stats_updates, abs(val));
-	if (x > MEMCG_CHARGE_BATCH) {
+	if (unlikely(x > MEMCG_CHARGE_BATCH * 128)) {
 		/*
 		 * If stats_flush_threshold exceeds the threshold
 		 * (>num_online_cpus()), cgroup stats update will be triggered
@@ -845,7 +845,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
 
 	/* Update memcg and lruvec */
-	if (!mem_cgroup_disabled())
+	if (likely(!mem_cgroup_disabled()))
 		__mod_memcg_lruvec_state(lruvec, idx, val);
 }
 
@@ -2151,7 +2151,7 @@ void folio_memcg_lock(struct folio *folio)
 
 static void __folio_memcg_unlock(struct mem_cgroup *memcg)
 {
-	if (memcg && memcg->move_lock_task == current) {
+	if (likely(memcg && memcg->move_lock_task == current)) {
 		unsigned long flags = memcg->move_lock_flags;
 
 		memcg->move_lock_task = NULL;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 85741403948f55..efd9bf746ac057 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5213,11 +5213,11 @@ static int zone_batchsize(struct zone *zone)
 
 	/*
 	 * The number of pages to batch allocate is either ~0.1%
-	 * of the zone or 1MB, whichever is smaller. The batch
+	 * of the zone or 4MB, whichever is smaller. The batch
 	 * size is striking a balance between allocation latency
 	 * and zone lock contention.
 	 */
-	batch = min(zone_managed_pages(zone) >> 10, SZ_1M / PAGE_SIZE);
+	batch = min(zone_managed_pages(zone) >> 10, 4 * SZ_1M / PAGE_SIZE);
 	batch /= 4;		/* We effectively *= 4 below */
 	if (batch < 1)
 		batch = 1;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 394a498c282322..c7ecb4b596a0b7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -627,7 +627,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
+		prepare_to_wait_exclusive_lifo(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3d3a24f795734e..9d9b8d1c3ac9cc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -4689,8 +4689,8 @@ void __init tcp_init(void)
 	tcp_init_mem();
 	/* Set per-socket limits to no more than 1/128 the pressure threshold */
 	limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-	max_wshare = min(4UL*1024*1024, limit);
-	max_rshare = min(6UL*1024*1024, limit);
+	max_wshare = min(16UL*1024*1024, limit);
+	max_rshare = min(16UL*1024*1024, limit);
 
 	init_net.ipv4.sysctl_tcp_wmem[0] = PAGE_SIZE;
 	init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024;