diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index 274cc7546efc2a..9c1b15cd89ab0d 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -63,6 +63,7 @@ Everything else
    vduse
    futex2
    perf_ring_buffer
+   ntsync
 
 .. only::  subproject and html
 
diff --git a/Documentation/userspace-api/ntsync.rst b/Documentation/userspace-api/ntsync.rst
new file mode 100644
index 00000000000000..25e7c4aef96813
--- /dev/null
+++ b/Documentation/userspace-api/ntsync.rst
@@ -0,0 +1,385 @@
+===================================
+NT synchronization primitive driver
+===================================
+
+This page documents the user-space API for the ntsync driver.
+
+ntsync is a support driver for emulation of NT synchronization
+primitives by user-space NT emulators. It exists because implementation
+in user-space, using existing tools, cannot match Windows performance
+while offering accurate semantics. It is implemented entirely in
+software, and does not drive any hardware device.
+
+This interface is meant as a compatibility tool only, and should not
+be used for general synchronization. Instead use generic, versatile
+interfaces such as futex(2) and poll(2).
+
+Synchronization primitives
+==========================
+
+The ntsync driver exposes three types of synchronization primitives:
+semaphores, mutexes, and events.
+
+A semaphore holds a single volatile 32-bit counter, and a static 32-bit
+integer denoting the maximum value. It is considered signaled (that is,
+can be acquired without contention, or will wake up a waiting thread)
+when the counter is nonzero. The counter is decremented by one when a
+wait is satisfied. Both the initial and maximum count are established
+when the semaphore is created.
+
+A mutex holds a volatile 32-bit recursion count, and a volatile 32-bit
+identifier denoting its owner. A mutex is considered signaled when its
+owner is zero (indicating that it is not owned). The recursion count is
+incremented when a wait is satisfied, and ownership is set to the given
+identifier.
+
+A mutex also holds an internal flag denoting whether its previous owner
+has died; such a mutex is said to be abandoned. Owner death is not
+tracked automatically based on thread death, but rather must be
+communicated using ``NTSYNC_IOC_MUTEX_KILL``. An abandoned mutex is
+inherently considered unowned.
+
+Except for the "unowned" semantics of zero, the actual value of the
+owner identifier is not interpreted by the ntsync driver at all. The
+intended use is to store a thread identifier; however, the ntsync
+driver does not actually validate that a calling thread provides
+consistent or unique identifiers.
+
+An event is similar to a semaphore with a maximum count of one. It holds
+a volatile boolean state denoting whether it is signaled or not. There
+are two types of events, auto-reset and manual-reset. An auto-reset
+event is designaled when a wait is satisfied; a manual-reset event is
+not. The event type is specified when the event is created.
+
+Unless specified otherwise, all operations on an object are atomic and
+totally ordered with respect to other operations on the same object.
+
+Objects are represented by files. When all file descriptors to an
+object are closed, that object is deleted.
+
+Char device
+===========
+
+The ntsync driver creates a single char device /dev/ntsync. Each file
+description opened on the device represents a unique instance intended
+to back an individual NT virtual machine. Objects created by one ntsync
+instance may only be used with other objects created by the same
+instance.
+
+ioctl reference
+===============
+
+All operations on the device are done through ioctls. There are four
+structures used in ioctl calls::
+
+   struct ntsync_sem_args {
+   	__u32 count;
+   	__u32 max;
+   };
+
+   struct ntsync_mutex_args {
+   	__u32 owner;
+   	__u32 count;
+   };
+
+   struct ntsync_event_args {
+   	__u32 signaled;
+   	__u32 manual;
+   };
+
+   struct ntsync_wait_args {
+   	__u64 timeout;
+   	__u64 objs;
+   	__u32 count;
+   	__u32 owner;
+   	__u32 index;
+   	__u32 alert;
+   	__u32 flags;
+   	__u32 pad;
+   };
+
+Depending on the ioctl, members of the structure may be used as input,
+output, or not at all.
+
+The ioctls on the device file are as follows:
+
+.. c:macro:: NTSYNC_IOC_CREATE_SEM
+
+  Create a semaphore object. Takes a pointer to struct
+  :c:type:`ntsync_sem_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``count``
+       - Initial count of the semaphore.
+     * - ``max``
+       - Maximum count of the semaphore.
+
+  Fails with ``EINVAL`` if ``count`` is greater than ``max``.
+  On success, returns a file descriptor the created semaphore.
+
+.. c:macro:: NTSYNC_IOC_CREATE_MUTEX
+
+  Create a mutex object. Takes a pointer to struct
+  :c:type:`ntsync_mutex_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``count``
+       - Initial recursion count of the mutex.
+     * - ``owner``
+       - Initial owner of the mutex.
+
+  If ``owner`` is nonzero and ``count`` is zero, or if ``owner`` is
+  zero and ``count`` is nonzero, the function fails with ``EINVAL``.
+  On success, returns a file descriptor the created mutex.
+
+.. c:macro:: NTSYNC_IOC_CREATE_EVENT
+
+  Create an event object. Takes a pointer to struct
+  :c:type:`ntsync_event_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``signaled``
+       - If nonzero, the event is initially signaled, otherwise
+         nonsignaled.
+     * - ``manual``
+       - If nonzero, the event is a manual-reset event, otherwise
+         auto-reset.
+
+  On success, returns a file descriptor the created event.
+
+The ioctls on the individual objects are as follows:
+
+.. c:macro:: NTSYNC_IOC_SEM_POST
+
+  Post to a semaphore object. Takes a pointer to a 32-bit integer,
+  which on input holds the count to be added to the semaphore, and on
+  output contains its previous count.
+
+  If adding to the semaphore's current count would raise the latter
+  past the semaphore's maximum count, the ioctl fails with
+  ``EOVERFLOW`` and the semaphore is not affected. If raising the
+  semaphore's count causes it to become signaled, eligible threads
+  waiting on this semaphore will be woken and the semaphore's count
+  decremented appropriately.
+
+.. c:macro:: NTSYNC_IOC_MUTEX_UNLOCK
+
+  Release a mutex object. Takes a pointer to struct
+  :c:type:`ntsync_mutex_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``owner``
+       - Specifies the owner trying to release this mutex.
+     * - ``count``
+       - On output, contains the previous recursion count.
+
+  If ``owner`` is zero, the ioctl fails with ``EINVAL``. If ``owner``
+  is not the current owner of the mutex, the ioctl fails with
+  ``EPERM``.
+
+  The mutex's count will be decremented by one. If decrementing the
+  mutex's count causes it to become zero, the mutex is marked as
+  unowned and signaled, and eligible threads waiting on it will be
+  woken as appropriate.
+
+.. c:macro:: NTSYNC_IOC_SET_EVENT
+
+  Signal an event object. Takes a pointer to a 32-bit integer, which on
+  output contains the previous state of the event.
+
+  Eligible threads will be woken, and auto-reset events will be
+  designaled appropriately.
+
+.. c:macro:: NTSYNC_IOC_RESET_EVENT
+
+  Designal an event object. Takes a pointer to a 32-bit integer, which
+  on output contains the previous state of the event.
+
+.. c:macro:: NTSYNC_IOC_PULSE_EVENT
+
+  Wake threads waiting on an event object while leaving it in an
+  unsignaled state. Takes a pointer to a 32-bit integer, which on
+  output contains the previous state of the event.
+
+  A pulse operation can be thought of as a set followed by a reset,
+  performed as a single atomic operation. If two threads are waiting on
+  an auto-reset event which is pulsed, only one will be woken. If two
+  threads are waiting a manual-reset event which is pulsed, both will
+  be woken. However, in both cases, the event will be unsignaled
+  afterwards, and a simultaneous read operation will always report the
+  event as unsignaled.
+
+.. c:macro:: NTSYNC_IOC_READ_SEM
+
+  Read the current state of a semaphore object. Takes a pointer to
+  struct :c:type:`ntsync_sem_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``count``
+       - On output, contains the current count of the semaphore.
+     * - ``max``
+       - On output, contains the maximum count of the semaphore.
+
+.. c:macro:: NTSYNC_IOC_READ_MUTEX
+
+  Read the current state of a mutex object. Takes a pointer to struct
+  :c:type:`ntsync_mutex_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``owner``
+       - On output, contains the current owner of the mutex, or zero
+         if the mutex is not currently owned.
+     * - ``count``
+       - On output, contains the current recursion count of the mutex.
+
+  If the mutex is marked as abandoned, the function fails with
+  ``EOWNERDEAD``. In this case, ``count`` and ``owner`` are set to
+  zero.
+
+.. c:macro:: NTSYNC_IOC_READ_EVENT
+
+  Read the current state of an event object. Takes a pointer to struct
+  :c:type:`ntsync_event_args`, which is used as follows:
+
+  .. list-table::
+
+     * - ``signaled``
+       - On output, contains the current state of the event.
+     * - ``manual``
+       - On output, contains 1 if the event is a manual-reset event,
+         and 0 otherwise.
+
+.. c:macro:: NTSYNC_IOC_KILL_OWNER
+
+  Mark a mutex as unowned and abandoned if it is owned by the given
+  owner. Takes an input-only pointer to a 32-bit integer denoting the
+  owner. If the owner is zero, the ioctl fails with ``EINVAL``. If the
+  owner does not own the mutex, the function fails with ``EPERM``.
+
+  Eligible threads waiting on the mutex will be woken as appropriate
+  (and such waits will fail with ``EOWNERDEAD``, as described below).
+
+.. c:macro:: NTSYNC_IOC_WAIT_ANY
+
+  Poll on any of a list of objects, atomically acquiring at most one.
+  Takes a pointer to struct :c:type:`ntsync_wait_args`, which is
+  used as follows:
+
+  .. list-table::
+
+     * - ``timeout``
+       - Absolute timeout in nanoseconds. If ``NTSYNC_WAIT_REALTIME``
+         is set, the timeout is measured against the REALTIME clock;
+         otherwise it is measured against the MONOTONIC clock. If the
+         timeout is equal to or earlier than the current time, the
+         function returns immediately without sleeping. If ``timeout``
+         is U64_MAX, the function will sleep until an object is
+         signaled, and will not fail with ``ETIMEDOUT``.
+     * - ``objs``
+       - Pointer to an array of ``count`` file descriptors
+         (specified as an integer so that the structure has the same
+         size regardless of architecture). If any object is
+         invalid, the function fails with ``EINVAL``.
+     * - ``count``
+       - Number of objects specified in the ``objs`` array.
+         If greater than ``NTSYNC_MAX_WAIT_COUNT``, the function fails
+         with ``EINVAL``.
+     * - ``owner``
+       - Mutex owner identifier. If any object in ``objs`` is a mutex,
+         the ioctl will attempt to acquire that mutex on behalf of
+         ``owner``. If ``owner`` is zero, the ioctl fails with
+         ``EINVAL``.
+     * - ``index``
+       - On success, contains the index (into ``objs``) of the object
+         which was signaled. If ``alert`` was signaled instead,
+         this contains ``count``.
+     * - ``alert``
+       - Optional event object file descriptor. If nonzero, this
+         specifies an "alert" event object which, if signaled, will
+         terminate the wait. If nonzero, the identifier must point to a
+         valid event.
+     * - ``flags``
+       - Zero or more flags. Currently the only flag is
+         ``NTSYNC_WAIT_REALTIME``, which causes the timeout to be
+         measured against the REALTIME clock instead of MONOTONIC.
+     * - ``pad``
+       - Unused, must be set to zero.
+
+  This function attempts to acquire one of the given objects. If unable
+  to do so, it sleeps until an object becomes signaled, subsequently
+  acquiring it, or the timeout expires. In the latter case the ioctl
+  fails with ``ETIMEDOUT``. The function only acquires one object, even
+  if multiple objects are signaled.
+
+  A semaphore is considered to be signaled if its count is nonzero, and
+  is acquired by decrementing its count by one. A mutex is considered
+  to be signaled if it is unowned or if its owner matches the ``owner``
+  argument, and is acquired by incrementing its recursion count by one
+  and setting its owner to the ``owner`` argument. An auto-reset event
+  is acquired by designaling it; a manual-reset event is not affected
+  by acquisition.
+
+  Acquisition is atomic and totally ordered with respect to other
+  operations on the same object. If two wait operations (with different
+  ``owner`` identifiers) are queued on the same mutex, only one is
+  signaled. If two wait operations are queued on the same semaphore,
+  and a value of one is posted to it, only one is signaled.
+
+  If an abandoned mutex is acquired, the ioctl fails with
+  ``EOWNERDEAD``. Although this is a failure return, the function may
+  otherwise be considered successful. The mutex is marked as owned by
+  the given owner (with a recursion count of 1) and as no longer
+  abandoned, and ``index`` is still set to the index of the mutex.
+
+  The ``alert`` argument is an "extra" event which can terminate the
+  wait, independently of all other objects.
+
+  It is valid to pass the same object more than once, including by
+  passing the same event in the ``objs`` array and in ``alert``. If a
+  wakeup occurs due to that object being signaled, ``index`` is set to
+  the lowest index corresponding to that object.
+
+  The function may fail with ``EINTR`` if a signal is received.
+
+.. c:macro:: NTSYNC_IOC_WAIT_ALL
+
+  Poll on a list of objects, atomically acquiring all of them. Takes a
+  pointer to struct :c:type:`ntsync_wait_args`, which is used
+  identically to ``NTSYNC_IOC_WAIT_ANY``, except that ``index`` is
+  always filled with zero on success if not woken via alert.
+
+  This function attempts to simultaneously acquire all of the given
+  objects. If unable to do so, it sleeps until all objects become
+  simultaneously signaled, subsequently acquiring them, or the timeout
+  expires. In the latter case the ioctl fails with ``ETIMEDOUT`` and no
+  objects are modified.
+
+  Objects may become signaled and subsequently designaled (through
+  acquisition by other threads) while this thread is sleeping. Only
+  once all objects are simultaneously signaled does the ioctl acquire
+  them and return. The entire acquisition is atomic and totally ordered
+  with respect to other operations on any of the given objects.
+
+  If an abandoned mutex is acquired, the ioctl fails with
+  ``EOWNERDEAD``. Similarly to ``NTSYNC_IOC_WAIT_ANY``, all objects are
+  nevertheless marked as acquired. Note that if multiple mutex objects
+  are specified, there is no way to know which were marked as
+  abandoned.
+
+  As with "any" waits, the ``alert`` argument is an "extra" event which
+  can terminate the wait. Critically, however, an "all" wait will
+  succeed if all members in ``objs`` are signaled, *or* if ``alert`` is
+  signaled. In the latter case ``index`` will be set to ``count``. As
+  with "any" waits, if both conditions are filled, the former takes
+  priority, and objects in ``objs`` will be acquired.
+
+  Unlike ``NTSYNC_IOC_WAIT_ANY``, it is not valid to pass the same
+  object more than once, nor is it valid to pass the same object in
+  ``objs`` and in ``alert``. If this is attempted, the function fails
+  with ``EINVAL``.
diff --git a/MAINTAINERS b/MAINTAINERS
index d0f18fdba068b0..13fb284b2feae8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16486,6 +16486,15 @@ T:	git https://github.com/Paragon-Software-Group/linux-ntfs3.git
 F:	Documentation/filesystems/ntfs3.rst
 F:	fs/ntfs3/
 
+NTSYNC SYNCHRONIZATION PRIMITIVE DRIVER
+M:	Elizabeth Figura <zfigura@codeweavers.com>
+L:	wine-devel@winehq.org
+S:	Supported
+F:	Documentation/userspace-api/ntsync.rst
+F:	drivers/misc/ntsync.c
+F:	include/uapi/linux/ntsync.h
+F:	tools/testing/selftests/drivers/ntsync/
+
 NUBUS SUBSYSTEM
 M:	Finn Thain <fthain@linux-m68k.org>
 L:	linux-m68k@lists.linux-m68k.org
diff --git a/Makefile b/Makefile
index 18c2a7cf9e9134..44bdb0c03c1737 100644
--- a/Makefile
+++ b/Makefile
@@ -1003,11 +1003,6 @@ KBUILD_CFLAGS	+= -fno-strict-overflow
 # Make sure -fstack-check isn't enabled (like gentoo apparently did)
 KBUILD_CFLAGS  += -fno-stack-check
 
-# conserve stack if available
-ifdef CONFIG_CC_IS_GCC
-KBUILD_CFLAGS   += -fconserve-stack
-endif
-
 # Ensure compilers do not transform certain loops into calls to wcslen()
 KBUILD_CFLAGS += -fno-builtin-wcslen
 
diff --git a/block/elevator.c b/block/elevator.c
index 43ba4ab1ada7fd..3e9e41359d84aa 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -566,9 +566,19 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
 
 	if (q->nr_hw_queues != 1 &&
 	    !blk_mq_is_shared_tags(q->tag_set->flags))
+#if defined(CONFIG_ZEN_INTERACTIVE) && defined(CONFIG_MQ_IOSCHED_KYBER)
+		return elevator_find_get("kyber");
+#elif defined(CONFIG_ZEN_INTERACTIVE)
+		return elevator_find_get("mq-deadline");
+#else
 		return NULL;
+#endif
 
+#if defined(CONFIG_ZEN_INTERACTIVE) && defined(CONFIG_IOSCHED_BFQ)
+	return elevator_find_get("bfq");
+#else
 	return elevator_find_get("mq-deadline");
+#endif
 }
 
 /*
diff --git a/drivers/Makefile b/drivers/Makefile
index 45d1c3e630f754..4f5ab2429a7f07 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -64,14 +64,8 @@ obj-y				+= char/
 # iommu/ comes before gpu as gpu are using iommu controllers
 obj-y				+= iommu/
 
-# gpu/ comes after char for AGP vs DRM startup and after iommu
-obj-y				+= gpu/
-
 obj-$(CONFIG_CONNECTOR)		+= connector/
 
-# i810fb depends on char/agp/
-obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
-
 obj-$(CONFIG_PARPORT)		+= parport/
 obj-y				+= base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)		+= nvdimm/
@@ -83,6 +77,13 @@ obj-y				+= macintosh/
 obj-y				+= scsi/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
+
+# gpu/ comes after char for AGP vs DRM startup and after iommu
+obj-y				+= gpu/
+
+# i810fb depends on char/agp/
+obj-$(CONFIG_FB_I810)           += video/fbdev/i810/
+
 obj-$(CONFIG_TARGET_CORE)	+= target/
 obj-$(CONFIG_MTD)		+= mtd/
 obj-$(CONFIG_SPI)		+= spi/
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 0e65d37c923113..be91dfe6b3f044 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -18,10 +18,16 @@
 #include "cpufreq_ondemand.h"
 
 /* On-demand governor macros */
+#if defined(CONFIG_ZEN_INTERACTIVE)
+#define DEF_FREQUENCY_UP_THRESHOLD		(55)
+#define MICRO_FREQUENCY_UP_THRESHOLD		(60)
+#define DEF_SAMPLING_DOWN_FACTOR		(5)
+#else
 #define DEF_FREQUENCY_UP_THRESHOLD		(80)
+#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define DEF_SAMPLING_DOWN_FACTOR		(1)
+#endif
 #define MAX_SAMPLING_DOWN_FACTOR		(100000)
-#define MICRO_FREQUENCY_UP_THRESHOLD		(95)
 #define MIN_FREQUENCY_UP_THRESHOLD		(1)
 #define MAX_FREQUENCY_UP_THRESHOLD		(100)
 
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index b5cbb57ee5f600..a0f7fa1518c660 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -46,6 +46,7 @@ struct evdev_client {
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
+	struct rcu_head rcu;
 	enum input_clock_type clk_type;
 	bool revoked;
 	unsigned long *evmasks[EV_CNT];
@@ -368,13 +369,22 @@ static void evdev_attach_client(struct evdev *evdev,
 	spin_unlock(&evdev->client_lock);
 }
 
+static void evdev_reclaim_client(struct rcu_head *rp)
+{
+	struct evdev_client *client = container_of(rp, struct evdev_client, rcu);
+	unsigned int i;
+	for (i = 0; i < EV_CNT; ++i)
+		bitmap_free(client->evmasks[i]);
+	kvfree(client);
+}
+
 static void evdev_detach_client(struct evdev *evdev,
 				struct evdev_client *client)
 {
 	spin_lock(&evdev->client_lock);
 	list_del_rcu(&client->node);
 	spin_unlock(&evdev->client_lock);
-	synchronize_rcu();
+	call_rcu(&client->rcu, evdev_reclaim_client);
 }
 
 static int evdev_open_device(struct evdev *evdev)
@@ -427,7 +437,6 @@ static int evdev_release(struct inode *inode, struct file *file)
 {
 	struct evdev_client *client = file->private_data;
 	struct evdev *evdev = client->evdev;
-	unsigned int i;
 
 	mutex_lock(&evdev->mutex);
 
@@ -439,11 +448,6 @@ static int evdev_release(struct inode *inode, struct file *file)
 
 	evdev_detach_client(evdev, client);
 
-	for (i = 0; i < EV_CNT; ++i)
-		bitmap_free(client->evmasks[i]);
-
-	kvfree(client);
-
 	evdev_close_device(evdev);
 
 	return 0;
@@ -486,7 +490,6 @@ static int evdev_open(struct inode *inode, struct file *file)
 
  err_free_client:
 	evdev_detach_client(evdev, client);
-	kvfree(client);
 	return error;
 }
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 78c975d7cd5f42..a123ad7b471f95 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3308,6 +3308,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 			goto bad;
 	}
 
+#ifdef CONFIG_ZEN_INTERACTIVE
+	set_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
+	set_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
+#endif
+
 	ret = crypt_ctr_cipher(ti, argv[0], argv[1]);
 	if (ret < 0)
 		goto bad;
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 3fe7e2a9bd294d..6c8b999a5e0853 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -517,7 +517,6 @@ config OPEN_DICE
 
 config NTSYNC
 	tristate "NT synchronization primitive emulation"
-	depends on BROKEN
 	help
 	  This module provides kernel support for emulation of Windows NT
 	  synchronization primitives. It is not a hardware driver.
diff --git a/drivers/misc/ntsync.c b/drivers/misc/ntsync.c
index 4954553b7baa6f..457ff28b789f75 100644
--- a/drivers/misc/ntsync.c
+++ b/drivers/misc/ntsync.c
@@ -6,11 +6,17 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/atomic.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/hrtimer.h>
+#include <linux/ktime.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/overflow.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <uapi/linux/ntsync.h>
@@ -19,6 +25,8 @@
 
 enum ntsync_type {
 	NTSYNC_TYPE_SEM,
+	NTSYNC_TYPE_MUTEX,
+	NTSYNC_TYPE_EVENT,
 };
 
 /*
@@ -30,10 +38,13 @@ enum ntsync_type {
  *
  * Both rely on struct file for reference counting. Individual
  * ntsync_obj objects take a reference to the device when created.
+ * Wait operations take a reference to each object being waited on for
+ * the duration of the wait.
  */
 
 struct ntsync_obj {
 	spinlock_t lock;
+	int dev_locked;
 
 	enum ntsync_type type;
 
@@ -46,22 +57,344 @@ struct ntsync_obj {
 			__u32 count;
 			__u32 max;
 		} sem;
+		struct {
+			__u32 count;
+			pid_t owner;
+			bool ownerdead;
+		} mutex;
+		struct {
+			bool manual;
+			bool signaled;
+		} event;
 	} u;
+
+	/*
+	 * any_waiters is protected by the object lock, but all_waiters is
+	 * protected by the device wait_all_lock.
+	 */
+	struct list_head any_waiters;
+	struct list_head all_waiters;
+
+	/*
+	 * Hint describing how many tasks are queued on this object in a
+	 * wait-all operation.
+	 *
+	 * Any time we do a wake, we may need to wake "all" waiters as well as
+	 * "any" waiters. In order to atomically wake "all" waiters, we must
+	 * lock all of the objects, and that means grabbing the wait_all_lock
+	 * below (and, due to lock ordering rules, before locking this object).
+	 * However, wait-all is a rare operation, and grabbing the wait-all
+	 * lock for every wake would create unnecessary contention.
+	 * Therefore we first check whether all_hint is zero, and, if it is,
+	 * we skip trying to wake "all" waiters.
+	 *
+	 * Since wait requests must originate from user-space threads, we're
+	 * limited here by PID_MAX_LIMIT, so there's no risk of overflow.
+	 */
+	atomic_t all_hint;
+};
+
+struct ntsync_q_entry {
+	struct list_head node;
+	struct ntsync_q *q;
+	struct ntsync_obj *obj;
+	__u32 index;
+};
+
+struct ntsync_q {
+	struct task_struct *task;
+	__u32 owner;
+
+	/*
+	 * Protected via atomic_try_cmpxchg(). Only the thread that wins the
+	 * compare-and-swap may actually change object states and wake this
+	 * task.
+	 */
+	atomic_t signaled;
+
+	bool all;
+	bool ownerdead;
+	__u32 count;
+	struct ntsync_q_entry entries[];
 };
 
 struct ntsync_device {
+	/*
+	 * Wait-all operations must atomically grab all objects, and be totally
+	 * ordered with respect to each other and wait-any operations.
+	 * If one thread is trying to acquire several objects, another thread
+	 * cannot touch the object at the same time.
+	 *
+	 * This device-wide lock is used to serialize wait-for-all
+	 * operations, and operations on an object that is involved in a
+	 * wait-for-all.
+	 */
+	struct mutex wait_all_lock;
+
 	struct file *file;
 };
 
+/*
+ * Single objects are locked using obj->lock.
+ *
+ * Multiple objects are 'locked' while holding dev->wait_all_lock.
+ * In this case however, individual objects are not locked by holding
+ * obj->lock, but by setting obj->dev_locked.
+ *
+ * This means that in order to lock a single object, the sequence is slightly
+ * more complicated than usual. Specifically it needs to check obj->dev_locked
+ * after acquiring obj->lock, if set, it needs to drop the lock and acquire
+ * dev->wait_all_lock in order to serialize against the multi-object operation.
+ */
+
+static void dev_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev == dev);
+	spin_lock(&obj->lock);
+	/*
+	 * By setting obj->dev_locked inside obj->lock, it is ensured that
+	 * anyone holding obj->lock must see the value.
+	 */
+	obj->dev_locked = 1;
+	spin_unlock(&obj->lock);
+}
+
+static void dev_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev == dev);
+	spin_lock(&obj->lock);
+	obj->dev_locked = 0;
+	spin_unlock(&obj->lock);
+}
+
+static void obj_lock(struct ntsync_obj *obj)
+{
+	struct ntsync_device *dev = obj->dev;
+
+	for (;;) {
+		spin_lock(&obj->lock);
+		if (likely(!obj->dev_locked))
+			break;
+
+		spin_unlock(&obj->lock);
+		mutex_lock(&dev->wait_all_lock);
+		spin_lock(&obj->lock);
+		/*
+		 * obj->dev_locked should be set and released under the same
+		 * wait_all_lock section, since we now own this lock, it should
+		 * be clear.
+		 */
+		lockdep_assert(!obj->dev_locked);
+		spin_unlock(&obj->lock);
+		mutex_unlock(&dev->wait_all_lock);
+	}
+}
+
+static void obj_unlock(struct ntsync_obj *obj)
+{
+	spin_unlock(&obj->lock);
+}
+
+static bool ntsync_lock_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	bool all;
+
+	obj_lock(obj);
+	all = atomic_read(&obj->all_hint);
+	if (unlikely(all)) {
+		obj_unlock(obj);
+		mutex_lock(&dev->wait_all_lock);
+		dev_lock_obj(dev, obj);
+	}
+
+	return all;
+}
+
+static void ntsync_unlock_obj(struct ntsync_device *dev, struct ntsync_obj *obj, bool all)
+{
+	if (all) {
+		dev_unlock_obj(dev, obj);
+		mutex_unlock(&dev->wait_all_lock);
+	} else {
+		obj_unlock(obj);
+	}
+}
+
+#define ntsync_assert_held(obj) \
+	lockdep_assert((lockdep_is_held(&(obj)->lock) != LOCK_STATE_NOT_HELD) || \
+		       ((lockdep_is_held(&(obj)->dev->wait_all_lock) != LOCK_STATE_NOT_HELD) && \
+			(obj)->dev_locked))
+
+static bool is_signaled(struct ntsync_obj *obj, __u32 owner)
+{
+	ntsync_assert_held(obj);
+
+	switch (obj->type) {
+	case NTSYNC_TYPE_SEM:
+		return !!obj->u.sem.count;
+	case NTSYNC_TYPE_MUTEX:
+		if (obj->u.mutex.owner && obj->u.mutex.owner != owner)
+			return false;
+		return obj->u.mutex.count < UINT_MAX;
+	case NTSYNC_TYPE_EVENT:
+		return obj->u.event.signaled;
+	}
+
+	WARN(1, "bad object type %#x\n", obj->type);
+	return false;
+}
+
+/*
+ * "locked_obj" is an optional pointer to an object which is already locked and
+ * should not be locked again. This is necessary so that changing an object's
+ * state and waking it can be a single atomic operation.
+ */
+static void try_wake_all(struct ntsync_device *dev, struct ntsync_q *q,
+			 struct ntsync_obj *locked_obj)
+{
+	__u32 count = q->count;
+	bool can_wake = true;
+	int signaled = -1;
+	__u32 i;
+
+	lockdep_assert_held(&dev->wait_all_lock);
+	if (locked_obj)
+		lockdep_assert(locked_obj->dev_locked);
+
+	for (i = 0; i < count; i++) {
+		if (q->entries[i].obj != locked_obj)
+			dev_lock_obj(dev, q->entries[i].obj);
+	}
+
+	for (i = 0; i < count; i++) {
+		if (!is_signaled(q->entries[i].obj, q->owner)) {
+			can_wake = false;
+			break;
+		}
+	}
+
+	if (can_wake && atomic_try_cmpxchg(&q->signaled, &signaled, 0)) {
+		for (i = 0; i < count; i++) {
+			struct ntsync_obj *obj = q->entries[i].obj;
+
+			switch (obj->type) {
+			case NTSYNC_TYPE_SEM:
+				obj->u.sem.count--;
+				break;
+			case NTSYNC_TYPE_MUTEX:
+				if (obj->u.mutex.ownerdead)
+					q->ownerdead = true;
+				obj->u.mutex.ownerdead = false;
+				obj->u.mutex.count++;
+				obj->u.mutex.owner = q->owner;
+				break;
+			case NTSYNC_TYPE_EVENT:
+				if (!obj->u.event.manual)
+					obj->u.event.signaled = false;
+				break;
+			}
+		}
+		wake_up_process(q->task);
+	}
+
+	for (i = 0; i < count; i++) {
+		if (q->entries[i].obj != locked_obj)
+			dev_unlock_obj(dev, q->entries[i].obj);
+	}
+}
+
+static void try_wake_all_obj(struct ntsync_device *dev, struct ntsync_obj *obj)
+{
+	struct ntsync_q_entry *entry;
+
+	lockdep_assert_held(&dev->wait_all_lock);
+	lockdep_assert(obj->dev_locked);
+
+	list_for_each_entry(entry, &obj->all_waiters, node)
+		try_wake_all(dev, entry->q, obj);
+}
+
+static void try_wake_any_sem(struct ntsync_obj *sem)
+{
+	struct ntsync_q_entry *entry;
+
+	ntsync_assert_held(sem);
+	lockdep_assert(sem->type == NTSYNC_TYPE_SEM);
+
+	list_for_each_entry(entry, &sem->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (!sem->u.sem.count)
+			break;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			sem->u.sem.count--;
+			wake_up_process(q->task);
+		}
+	}
+}
+
+static void try_wake_any_mutex(struct ntsync_obj *mutex)
+{
+	struct ntsync_q_entry *entry;
+
+	ntsync_assert_held(mutex);
+	lockdep_assert(mutex->type == NTSYNC_TYPE_MUTEX);
+
+	list_for_each_entry(entry, &mutex->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (mutex->u.mutex.count == UINT_MAX)
+			break;
+		if (mutex->u.mutex.owner && mutex->u.mutex.owner != q->owner)
+			continue;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			if (mutex->u.mutex.ownerdead)
+				q->ownerdead = true;
+			mutex->u.mutex.ownerdead = false;
+			mutex->u.mutex.count++;
+			mutex->u.mutex.owner = q->owner;
+			wake_up_process(q->task);
+		}
+	}
+}
+
+static void try_wake_any_event(struct ntsync_obj *event)
+{
+	struct ntsync_q_entry *entry;
+
+	ntsync_assert_held(event);
+	lockdep_assert(event->type == NTSYNC_TYPE_EVENT);
+
+	list_for_each_entry(entry, &event->any_waiters, node) {
+		struct ntsync_q *q = entry->q;
+		int signaled = -1;
+
+		if (!event->u.event.signaled)
+			break;
+
+		if (atomic_try_cmpxchg(&q->signaled, &signaled, entry->index)) {
+			if (!event->u.event.manual)
+				event->u.event.signaled = false;
+			wake_up_process(q->task);
+		}
+	}
+}
+
 /*
  * Actually change the semaphore state, returning -EOVERFLOW if it is made
  * invalid.
  */
-static int post_sem_state(struct ntsync_obj *sem, __u32 count)
+static int release_sem_state(struct ntsync_obj *sem, __u32 count)
 {
 	__u32 sum;
 
-	lockdep_assert_held(&sem->lock);
+	ntsync_assert_held(sem);
 
 	if (check_add_overflow(sem->u.sem.count, count, &sum) ||
 	    sum > sem->u.sem.max)
@@ -71,11 +404,13 @@ static int post_sem_state(struct ntsync_obj *sem, __u32 count)
 	return 0;
 }
 
-static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
+static int ntsync_sem_release(struct ntsync_obj *sem, void __user *argp)
 {
+	struct ntsync_device *dev = sem->dev;
 	__u32 __user *user_args = argp;
 	__u32 prev_count;
 	__u32 args;
+	bool all;
 	int ret;
 
 	if (copy_from_user(&args, argp, sizeof(args)))
@@ -84,12 +419,17 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
 	if (sem->type != NTSYNC_TYPE_SEM)
 		return -EINVAL;
 
-	spin_lock(&sem->lock);
+	all = ntsync_lock_obj(dev, sem);
 
 	prev_count = sem->u.sem.count;
-	ret = post_sem_state(sem, args);
+	ret = release_sem_state(sem, args);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, sem);
+		try_wake_any_sem(sem);
+	}
 
-	spin_unlock(&sem->lock);
+	ntsync_unlock_obj(dev, sem, all);
 
 	if (!ret && put_user(prev_count, user_args))
 		ret = -EFAULT;
@@ -97,6 +437,220 @@ static int ntsync_sem_post(struct ntsync_obj *sem, void __user *argp)
 	return ret;
 }
 
+/*
+ * Actually change the mutex state, returning -EPERM if not the owner.
+ */
+static int unlock_mutex_state(struct ntsync_obj *mutex,
+			      const struct ntsync_mutex_args *args)
+{
+	ntsync_assert_held(mutex);
+
+	if (mutex->u.mutex.owner != args->owner)
+		return -EPERM;
+
+	if (!--mutex->u.mutex.count)
+		mutex->u.mutex.owner = 0;
+	return 0;
+}
+
+static int ntsync_mutex_unlock(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_mutex_args __user *user_args = argp;
+	struct ntsync_device *dev = mutex->dev;
+	struct ntsync_mutex_args args;
+	__u32 prev_count;
+	bool all;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+	if (!args.owner)
+		return -EINVAL;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	prev_count = mutex->u.mutex.count;
+	ret = unlock_mutex_state(mutex, &args);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, mutex);
+		try_wake_any_mutex(mutex);
+	}
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	if (!ret && put_user(prev_count, &user_args->count))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+/*
+ * Actually change the mutex state to mark its owner as dead,
+ * returning -EPERM if not the owner.
+ */
+static int kill_mutex_state(struct ntsync_obj *mutex, __u32 owner)
+{
+	ntsync_assert_held(mutex);
+
+	if (mutex->u.mutex.owner != owner)
+		return -EPERM;
+
+	mutex->u.mutex.ownerdead = true;
+	mutex->u.mutex.owner = 0;
+	mutex->u.mutex.count = 0;
+	return 0;
+}
+
+static int ntsync_mutex_kill(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_device *dev = mutex->dev;
+	__u32 owner;
+	bool all;
+	int ret;
+
+	if (get_user(owner, (__u32 __user *)argp))
+		return -EFAULT;
+	if (!owner)
+		return -EINVAL;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	ret = kill_mutex_state(mutex, owner);
+	if (!ret) {
+		if (all)
+			try_wake_all_obj(dev, mutex);
+		try_wake_any_mutex(mutex);
+	}
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	return ret;
+}
+
+static int ntsync_event_set(struct ntsync_obj *event, void __user *argp, bool pulse)
+{
+	struct ntsync_device *dev = event->dev;
+	__u32 prev_state;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	prev_state = event->u.event.signaled;
+	event->u.event.signaled = true;
+	if (all)
+		try_wake_all_obj(dev, event);
+	try_wake_any_event(event);
+	if (pulse)
+		event->u.event.signaled = false;
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (put_user(prev_state, (__u32 __user *)argp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ntsync_event_reset(struct ntsync_obj *event, void __user *argp)
+{
+	struct ntsync_device *dev = event->dev;
+	__u32 prev_state;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	prev_state = event->u.event.signaled;
+	event->u.event.signaled = false;
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (put_user(prev_state, (__u32 __user *)argp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ntsync_sem_read(struct ntsync_obj *sem, void __user *argp)
+{
+	struct ntsync_sem_args __user *user_args = argp;
+	struct ntsync_device *dev = sem->dev;
+	struct ntsync_sem_args args;
+	bool all;
+
+	if (sem->type != NTSYNC_TYPE_SEM)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, sem);
+
+	args.count = sem->u.sem.count;
+	args.max = sem->u.sem.max;
+
+	ntsync_unlock_obj(dev, sem, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return 0;
+}
+
+static int ntsync_mutex_read(struct ntsync_obj *mutex, void __user *argp)
+{
+	struct ntsync_mutex_args __user *user_args = argp;
+	struct ntsync_device *dev = mutex->dev;
+	struct ntsync_mutex_args args;
+	bool all;
+	int ret;
+
+	if (mutex->type != NTSYNC_TYPE_MUTEX)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, mutex);
+
+	args.count = mutex->u.mutex.count;
+	args.owner = mutex->u.mutex.owner;
+	ret = mutex->u.mutex.ownerdead ? -EOWNERDEAD : 0;
+
+	ntsync_unlock_obj(dev, mutex, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return ret;
+}
+
+static int ntsync_event_read(struct ntsync_obj *event, void __user *argp)
+{
+	struct ntsync_event_args __user *user_args = argp;
+	struct ntsync_device *dev = event->dev;
+	struct ntsync_event_args args;
+	bool all;
+
+	if (event->type != NTSYNC_TYPE_EVENT)
+		return -EINVAL;
+
+	all = ntsync_lock_obj(dev, event);
+
+	args.manual = event->u.event.manual;
+	args.signaled = event->u.event.signaled;
+
+	ntsync_unlock_obj(dev, event, all);
+
+	if (copy_to_user(user_args, &args, sizeof(args)))
+		return -EFAULT;
+	return 0;
+}
+
 static int ntsync_obj_release(struct inode *inode, struct file *file)
 {
 	struct ntsync_obj *obj = file->private_data;
@@ -114,8 +668,24 @@ static long ntsync_obj_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)parm;
 
 	switch (cmd) {
-	case NTSYNC_IOC_SEM_POST:
-		return ntsync_sem_post(obj, argp);
+	case NTSYNC_IOC_SEM_RELEASE:
+		return ntsync_sem_release(obj, argp);
+	case NTSYNC_IOC_SEM_READ:
+		return ntsync_sem_read(obj, argp);
+	case NTSYNC_IOC_MUTEX_UNLOCK:
+		return ntsync_mutex_unlock(obj, argp);
+	case NTSYNC_IOC_MUTEX_KILL:
+		return ntsync_mutex_kill(obj, argp);
+	case NTSYNC_IOC_MUTEX_READ:
+		return ntsync_mutex_read(obj, argp);
+	case NTSYNC_IOC_EVENT_SET:
+		return ntsync_event_set(obj, argp, false);
+	case NTSYNC_IOC_EVENT_RESET:
+		return ntsync_event_reset(obj, argp);
+	case NTSYNC_IOC_EVENT_PULSE:
+		return ntsync_event_set(obj, argp, true);
+	case NTSYNC_IOC_EVENT_READ:
+		return ntsync_event_read(obj, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
@@ -140,6 +710,9 @@ static struct ntsync_obj *ntsync_alloc_obj(struct ntsync_device *dev,
 	obj->dev = dev;
 	get_file(dev->file);
 	spin_lock_init(&obj->lock);
+	INIT_LIST_HEAD(&obj->any_waiters);
+	INIT_LIST_HEAD(&obj->all_waiters);
+	atomic_set(&obj->all_hint, 0);
 
 	return obj;
 }
@@ -165,7 +738,6 @@ static int ntsync_obj_get_fd(struct ntsync_obj *obj)
 
 static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 {
-	struct ntsync_sem_args __user *user_args = argp;
 	struct ntsync_sem_args args;
 	struct ntsync_obj *sem;
 	int fd;
@@ -182,12 +754,398 @@ static int ntsync_create_sem(struct ntsync_device *dev, void __user *argp)
 	sem->u.sem.count = args.count;
 	sem->u.sem.max = args.max;
 	fd = ntsync_obj_get_fd(sem);
-	if (fd < 0) {
+	if (fd < 0)
 		kfree(sem);
-		return fd;
+
+	return fd;
+}
+
+static int ntsync_create_mutex(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_mutex_args args;
+	struct ntsync_obj *mutex;
+	int fd;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	if (!args.owner != !args.count)
+		return -EINVAL;
+
+	mutex = ntsync_alloc_obj(dev, NTSYNC_TYPE_MUTEX);
+	if (!mutex)
+		return -ENOMEM;
+	mutex->u.mutex.count = args.count;
+	mutex->u.mutex.owner = args.owner;
+	fd = ntsync_obj_get_fd(mutex);
+	if (fd < 0)
+		kfree(mutex);
+
+	return fd;
+}
+
+static int ntsync_create_event(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_event_args args;
+	struct ntsync_obj *event;
+	int fd;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	event = ntsync_alloc_obj(dev, NTSYNC_TYPE_EVENT);
+	if (!event)
+		return -ENOMEM;
+	event->u.event.manual = args.manual;
+	event->u.event.signaled = args.signaled;
+	fd = ntsync_obj_get_fd(event);
+	if (fd < 0)
+		kfree(event);
+
+	return fd;
+}
+
+static struct ntsync_obj *get_obj(struct ntsync_device *dev, int fd)
+{
+	struct file *file = fget(fd);
+	struct ntsync_obj *obj;
+
+	if (!file)
+		return NULL;
+
+	if (file->f_op != &ntsync_obj_fops) {
+		fput(file);
+		return NULL;
 	}
 
-	return put_user(fd, &user_args->sem);
+	obj = file->private_data;
+	if (obj->dev != dev) {
+		fput(file);
+		return NULL;
+	}
+
+	return obj;
+}
+
+static void put_obj(struct ntsync_obj *obj)
+{
+	fput(obj->file);
+}
+
+static int ntsync_schedule(const struct ntsync_q *q, const struct ntsync_wait_args *args)
+{
+	ktime_t timeout = ns_to_ktime(args->timeout);
+	clockid_t clock = CLOCK_MONOTONIC;
+	ktime_t *timeout_ptr;
+	int ret = 0;
+
+	timeout_ptr = (args->timeout == U64_MAX ? NULL : &timeout);
+
+	if (args->flags & NTSYNC_WAIT_REALTIME)
+		clock = CLOCK_REALTIME;
+
+	do {
+		if (signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (atomic_read(&q->signaled) != -1) {
+			ret = 0;
+			break;
+		}
+		ret = schedule_hrtimeout_range_clock(timeout_ptr, 0, HRTIMER_MODE_ABS, clock);
+	} while (ret < 0);
+	__set_current_state(TASK_RUNNING);
+
+	return ret;
+}
+
+/*
+ * Allocate and initialize the ntsync_q structure, but do not queue us yet.
+ */
+static int setup_wait(struct ntsync_device *dev,
+		      const struct ntsync_wait_args *args, bool all,
+		      struct ntsync_q **ret_q)
+{
+	int fds[NTSYNC_MAX_WAIT_COUNT + 1];
+	const __u32 count = args->count;
+	struct ntsync_q *q;
+	__u32 total_count;
+	__u32 i, j;
+
+	if (args->pad || (args->flags & ~NTSYNC_WAIT_REALTIME))
+		return -EINVAL;
+
+	if (args->count > NTSYNC_MAX_WAIT_COUNT)
+		return -EINVAL;
+
+	total_count = count;
+	if (args->alert)
+		total_count++;
+
+	if (copy_from_user(fds, u64_to_user_ptr(args->objs),
+			   array_size(count, sizeof(*fds))))
+		return -EFAULT;
+	if (args->alert)
+		fds[count] = args->alert;
+
+	q = kmalloc(struct_size(q, entries, total_count), GFP_KERNEL);
+	if (!q)
+		return -ENOMEM;
+	q->task = current;
+	q->owner = args->owner;
+	atomic_set(&q->signaled, -1);
+	q->all = all;
+	q->ownerdead = false;
+	q->count = count;
+
+	for (i = 0; i < total_count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = get_obj(dev, fds[i]);
+
+		if (!obj)
+			goto err;
+
+		if (all) {
+			/* Check that the objects are all distinct. */
+			for (j = 0; j < i; j++) {
+				if (obj == q->entries[j].obj) {
+					put_obj(obj);
+					goto err;
+				}
+			}
+		}
+
+		entry->obj = obj;
+		entry->q = q;
+		entry->index = i;
+	}
+
+	*ret_q = q;
+	return 0;
+
+err:
+	for (j = 0; j < i; j++)
+		put_obj(q->entries[j].obj);
+	kfree(q);
+	return -EINVAL;
+}
+
+static void try_wake_any_obj(struct ntsync_obj *obj)
+{
+	switch (obj->type) {
+	case NTSYNC_TYPE_SEM:
+		try_wake_any_sem(obj);
+		break;
+	case NTSYNC_TYPE_MUTEX:
+		try_wake_any_mutex(obj);
+		break;
+	case NTSYNC_TYPE_EVENT:
+		try_wake_any_event(obj);
+		break;
+	}
+}
+
+static int ntsync_wait_any(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_wait_args args;
+	__u32 i, total_count;
+	struct ntsync_q *q;
+	int signaled;
+	bool all;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	ret = setup_wait(dev, &args, false, &q);
+	if (ret < 0)
+		return ret;
+
+	total_count = args.count;
+	if (args.alert)
+		total_count++;
+
+	/* queue ourselves */
+
+	for (i = 0; i < total_count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		all = ntsync_lock_obj(dev, obj);
+		list_add_tail(&entry->node, &obj->any_waiters);
+		ntsync_unlock_obj(dev, obj, all);
+	}
+
+	/*
+	 * Check if we are already signaled.
+	 *
+	 * Note that the API requires that normal objects are checked before
+	 * the alert event. Hence we queue the alert event last, and check
+	 * objects in order.
+	 */
+
+	for (i = 0; i < total_count; i++) {
+		struct ntsync_obj *obj = q->entries[i].obj;
+
+		if (atomic_read(&q->signaled) != -1)
+			break;
+
+		all = ntsync_lock_obj(dev, obj);
+		try_wake_any_obj(obj);
+		ntsync_unlock_obj(dev, obj, all);
+	}
+
+	/* sleep */
+
+	ret = ntsync_schedule(q, &args);
+
+	/* and finally, unqueue */
+
+	for (i = 0; i < total_count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		all = ntsync_lock_obj(dev, obj);
+		list_del(&entry->node);
+		ntsync_unlock_obj(dev, obj, all);
+
+		put_obj(obj);
+	}
+
+	signaled = atomic_read(&q->signaled);
+	if (signaled != -1) {
+		struct ntsync_wait_args __user *user_args = argp;
+
+		/* even if we caught a signal, we need to communicate success */
+		ret = q->ownerdead ? -EOWNERDEAD : 0;
+
+		if (put_user(signaled, &user_args->index))
+			ret = -EFAULT;
+	} else if (!ret) {
+		ret = -ETIMEDOUT;
+	}
+
+	kfree(q);
+	return ret;
+}
+
+static int ntsync_wait_all(struct ntsync_device *dev, void __user *argp)
+{
+	struct ntsync_wait_args args;
+	struct ntsync_q *q;
+	int signaled;
+	__u32 i;
+	int ret;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	ret = setup_wait(dev, &args, true, &q);
+	if (ret < 0)
+		return ret;
+
+	/* queue ourselves */
+
+	mutex_lock(&dev->wait_all_lock);
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		atomic_inc(&obj->all_hint);
+
+		/*
+		 * obj->all_waiters is protected by dev->wait_all_lock rather
+		 * than obj->lock, so there is no need to acquire obj->lock
+		 * here.
+		 */
+		list_add_tail(&entry->node, &obj->all_waiters);
+	}
+	if (args.alert) {
+		struct ntsync_q_entry *entry = &q->entries[args.count];
+		struct ntsync_obj *obj = entry->obj;
+
+		dev_lock_obj(dev, obj);
+		list_add_tail(&entry->node, &obj->any_waiters);
+		dev_unlock_obj(dev, obj);
+	}
+
+	/* check if we are already signaled */
+
+	try_wake_all(dev, q, NULL);
+
+	mutex_unlock(&dev->wait_all_lock);
+
+	/*
+	 * Check if the alert event is signaled, making sure to do so only
+	 * after checking if the other objects are signaled.
+	 */
+
+	if (args.alert) {
+		struct ntsync_obj *obj = q->entries[args.count].obj;
+
+		if (atomic_read(&q->signaled) == -1) {
+			bool all = ntsync_lock_obj(dev, obj);
+			try_wake_any_obj(obj);
+			ntsync_unlock_obj(dev, obj, all);
+		}
+	}
+
+	/* sleep */
+
+	ret = ntsync_schedule(q, &args);
+
+	/* and finally, unqueue */
+
+	mutex_lock(&dev->wait_all_lock);
+
+	for (i = 0; i < args.count; i++) {
+		struct ntsync_q_entry *entry = &q->entries[i];
+		struct ntsync_obj *obj = entry->obj;
+
+		/*
+		 * obj->all_waiters is protected by dev->wait_all_lock rather
+		 * than obj->lock, so there is no need to acquire it here.
+		 */
+		list_del(&entry->node);
+
+		atomic_dec(&obj->all_hint);
+
+		put_obj(obj);
+	}
+
+	mutex_unlock(&dev->wait_all_lock);
+
+	if (args.alert) {
+		struct ntsync_q_entry *entry = &q->entries[args.count];
+		struct ntsync_obj *obj = entry->obj;
+		bool all;
+
+		all = ntsync_lock_obj(dev, obj);
+		list_del(&entry->node);
+		ntsync_unlock_obj(dev, obj, all);
+
+		put_obj(obj);
+	}
+
+	signaled = atomic_read(&q->signaled);
+	if (signaled != -1) {
+		struct ntsync_wait_args __user *user_args = argp;
+
+		/* even if we caught a signal, we need to communicate success */
+		ret = q->ownerdead ? -EOWNERDEAD : 0;
+
+		if (put_user(signaled, &user_args->index))
+			ret = -EFAULT;
+	} else if (!ret) {
+		ret = -ETIMEDOUT;
+	}
+
+	kfree(q);
+	return ret;
 }
 
 static int ntsync_char_open(struct inode *inode, struct file *file)
@@ -198,6 +1156,8 @@ static int ntsync_char_open(struct inode *inode, struct file *file)
 	if (!dev)
 		return -ENOMEM;
 
+	mutex_init(&dev->wait_all_lock);
+
 	file->private_data = dev;
 	dev->file = file;
 	return nonseekable_open(inode, file);
@@ -219,8 +1179,16 @@ static long ntsync_char_ioctl(struct file *file, unsigned int cmd,
 	void __user *argp = (void __user *)parm;
 
 	switch (cmd) {
+	case NTSYNC_IOC_CREATE_EVENT:
+		return ntsync_create_event(dev, argp);
+	case NTSYNC_IOC_CREATE_MUTEX:
+		return ntsync_create_mutex(dev, argp);
 	case NTSYNC_IOC_CREATE_SEM:
 		return ntsync_create_sem(dev, argp);
+	case NTSYNC_IOC_WAIT_ALL:
+		return ntsync_wait_all(dev, argp);
+	case NTSYNC_IOC_WAIT_ANY:
+		return ntsync_wait_any(dev, argp);
 	default:
 		return -ENOIOCTLCMD;
 	}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a2541f5204af06..5cb8e097ce1fc3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3349,6 +3349,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
 	REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
 	REG("pagemap",    S_IRUSR, proc_pagemap_operations),
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	REG("pagemap_reset", S_IRUSR, proc_pagemap_reset_operations),
+#endif
 #endif
 #ifdef CONFIG_SECURITY
 	DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 4e0c5b57ffdbb8..7676322518cfb9 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -349,6 +349,7 @@ extern const struct file_operations proc_pid_smaps_operations;
 extern const struct file_operations proc_pid_smaps_rollup_operations;
 extern const struct file_operations proc_clear_refs_operations;
 extern const struct file_operations proc_pagemap_operations;
+extern const struct file_operations proc_pagemap_reset_operations;
 
 extern unsigned long task_vsize(struct mm_struct *);
 extern unsigned long task_statm(struct mm_struct *,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 536b7dc4538182..af4fe0b299840b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1363,6 +1363,8 @@ enum clear_refs_types {
 
 struct clear_refs_private {
 	enum clear_refs_types type;
+	unsigned long start, end;
+	bool clear_range;
 };
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -1383,7 +1385,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
 	return folio_maybe_dma_pinned(folio);
 }
 
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *pte)
 {
 	/*
@@ -1393,37 +1395,46 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	 * of how soft-dirty works.
 	 */
 	pte_t ptent = ptep_get(pte);
+	bool ret = false;
 
 	if (pte_present(ptent)) {
 		pte_t old_pte;
 
 		if (pte_is_pinned(vma, addr, ptent))
-			return;
+			return ret;
 		old_pte = ptep_modify_prot_start(vma, addr, pte);
+		ret = pte_soft_dirty(old_pte);
 		ptent = pte_wrprotect(old_pte);
 		ptent = pte_clear_soft_dirty(ptent);
 		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
 	} else if (is_swap_pte(ptent)) {
+		ret = pte_swp_soft_dirty(ptent);
 		ptent = pte_swp_clear_soft_dirty(ptent);
 		set_pte_at(vma->vm_mm, addr, pte, ptent);
 	}
+    return ret;
 }
 #else
-static inline void clear_soft_dirty(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *pte)
 {
+    return false;
 }
 #endif
 
 #if defined(CONFIG_MEM_SOFT_DIRTY) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmdp)
 {
 	pmd_t old, pmd = *pmdp;
+	bool ret = false;
 
 	if (pmd_present(pmd)) {
 		/* See comment in change_huge_pmd() */
 		old = pmdp_invalidate(vma, addr, pmdp);
+
+		ret = pmd_soft_dirty(old);
+
 		if (pmd_dirty(old))
 			pmd = pmd_mkdirty(pmd);
 		if (pmd_young(old))
@@ -1434,14 +1445,17 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
 
 		set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
 	} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+		ret = pmd_swp_soft_dirty(pmd);
 		pmd = pmd_swp_clear_soft_dirty(pmd);
 		set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
 	}
+    return ret;
 }
 #else
-static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+static inline bool clear_soft_dirty_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmdp)
 {
+    return false;
 }
 #endif
 
@@ -1454,6 +1468,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
 	spinlock_t *ptl;
 	struct folio *folio;
 
+	BUG_ON(addr < cp->start || end > cp->end);
+
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
 		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
@@ -1511,9 +1527,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 	struct clear_refs_private *cp = walk->private;
 	struct vm_area_struct *vma = walk->vma;
 
-	if (vma->vm_flags & VM_PFNMAP)
+	if (!cp->clear_range && (vma->vm_flags & VM_PFNMAP))
 		return 1;
 
+	BUG_ON(start < cp->start || end > cp->end);
+
 	/*
 	 * Writing 1 to /proc/pid/clear_refs affects all pages.
 	 * Writing 2 to /proc/pid/clear_refs only affects anonymous pages.
@@ -1537,10 +1555,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
 	struct task_struct *task;
-	char buffer[PROC_NUMBUF] = {};
+	char buffer[18] = {};
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	enum clear_refs_types type;
+	unsigned long start, end;
+	bool clear_range;
 	int itype;
 	int rv;
 
@@ -1548,12 +1568,34 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		count = sizeof(buffer) - 1;
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
-	rv = kstrtoint(strstrip(buffer), 10, &itype);
-	if (rv < 0)
-		return rv;
-	type = (enum clear_refs_types)itype;
-	if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
-		return -EINVAL;
+
+	if (buffer[0] == '6')
+	{
+		static int once;
+
+		if (!once++)
+			printk(KERN_DEBUG "task_mmu: Using POC clear refs range implementation.\n");
+
+		if (count != 17)
+			return -EINVAL;
+
+		type = CLEAR_REFS_SOFT_DIRTY;
+		start = *(unsigned long *)(buffer + 1);
+		end = *(unsigned long *)(buffer + 1 + 8);
+	}
+	else
+	{
+		rv = kstrtoint(strstrip(buffer), 10, &itype);
+		if (rv < 0)
+			return rv;
+		type = (enum clear_refs_types)itype;
+
+		if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
+			return -EINVAL;
+
+		start = 0;
+		end = -1UL;
+	}
 
 	task = get_proc_task(file_inode(file));
 	if (!task)
@@ -1566,40 +1608,86 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			.type = type,
 		};
 
-		if (mmap_write_lock_killable(mm)) {
-			count = -EINTR;
-			goto out_mm;
+		if (start || end != -1UL)
+		{
+			start = min(start, -1UL) & PAGE_MASK;
+			end = min(end, -1UL) & PAGE_MASK;
+
+			if (start >= end)
+			{
+				count = -EINVAL;
+				goto out_mm;
+			}
+			clear_range = true;
 		}
+		else
+		{
+			clear_range = false;
+		}
+
+		cp.start = start;
+		cp.end = end;
+		cp.clear_range = clear_range;
+
 		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
+			if (mmap_write_lock_killable(mm)) {
+				count = -EINTR;
+				goto out_mm;
+			}
+
 			/*
 			 * Writing 5 to /proc/pid/clear_refs resets the peak
 			 * resident set size to this mm's current rss value.
 			 */
 			reset_mm_hiwater_rss(mm);
-			goto out_unlock;
+			mmap_write_unlock(mm);
+			goto out_mm;
 		}
 
 		if (type == CLEAR_REFS_SOFT_DIRTY) {
-			for_each_vma(vmi, vma) {
-				if (!(vma->vm_flags & VM_SOFTDIRTY))
-					continue;
-				vm_flags_clear(vma, VM_SOFTDIRTY);
-				vma_set_page_prot(vma);
+			if (mmap_read_lock_killable(mm)) {
+				count = -EINTR;
+				goto out_mm;
 			}
-
+			if (!clear_range)
+				for_each_vma(vmi, vma) {
+				    if (!(vma->vm_flags & VM_SOFTDIRTY))
+					    continue;
+				    mmap_read_unlock(mm);
+				    if (mmap_write_lock_killable(mm)) {
+					    count = -EINTR;
+					    goto out_mm;
+				    }
+					for_each_vma(vmi, vma) {
+						vm_flags_clear(vma, VM_SOFTDIRTY);
+					    vma_set_page_prot(vma);
+				    }
+				    mmap_write_downgrade(mm);
+				    break;
+				}
 			inc_tlb_flush_pending(mm);
 			mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
-						0, mm, 0, -1UL);
+						0, mm, start, end);
 			mmu_notifier_invalidate_range_start(&range);
 		}
-		walk_page_range(mm, 0, -1, &clear_refs_walk_ops, &cp);
+		else
+		{
+			if (mmap_write_lock_killable(mm)) {
+				count = -EINTR;
+				goto out_mm;
+			}
+		}
+		walk_page_range(mm, start, end == -1UL ? -1 : end, &clear_refs_walk_ops, &cp);
 		if (type == CLEAR_REFS_SOFT_DIRTY) {
 			mmu_notifier_invalidate_range_end(&range);
 			flush_tlb_mm(mm);
 			dec_tlb_flush_pending(mm);
+			mmap_read_unlock(mm);
+		}
+		else
+		{
+			mmap_write_unlock(mm);
 		}
-out_unlock:
-		mmap_write_unlock(mm);
 out_mm:
 		mmput(mm);
 	}
@@ -1621,6 +1709,7 @@ struct pagemapread {
 	int pos, len;		/* units: PM_ENTRY_BYTES, not bytes */
 	pagemap_entry_t *buffer;
 	bool show_pfn;
+	bool reset;
 };
 
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
@@ -1631,6 +1720,7 @@ struct pagemapread {
 #define PM_PFRAME_MASK		GENMASK_ULL(PM_PFRAME_BITS - 1, 0)
 #define PM_SOFT_DIRTY		BIT_ULL(55)
 #define PM_MMAP_EXCLUSIVE	BIT_ULL(56)
+#define PM_SOFT_DIRTY_PAGE	BIT_ULL(57)
 #define PM_UFFD_WP		BIT_ULL(57)
 #define PM_FILE			BIT_ULL(61)
 #define PM_SWAP			BIT_ULL(62)
@@ -1651,6 +1741,14 @@ static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm)
 	return 0;
 }
 
+static int add_addr_to_pagemap(unsigned long addr, struct pagemapread *pm)
+{
+	((unsigned long *)pm->buffer)[pm->pos++] = addr;
+	if (pm->pos >= pm->len)
+		return PM_END_OF_BUFFER;
+	return 0;
+}
+
 static int pagemap_pte_hole(unsigned long start, unsigned long end,
 			    __always_unused int depth, struct mm_walk *walk)
 {
@@ -1658,6 +1756,9 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
 	unsigned long addr = start;
 	int err = 0;
 
+	if (pm->reset)
+		goto out;
+
 	while (addr < end) {
 		struct vm_area_struct *vma = find_vma(walk->mm, addr);
 		pagemap_entry_t pme = make_pme(0, 0);
@@ -1704,13 +1805,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 		flags |= PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 		if (pte_soft_dirty(pte))
-			flags |= PM_SOFT_DIRTY;
+			flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
 		if (pte_uffd_wp(pte))
 			flags |= PM_UFFD_WP;
 	} else if (is_swap_pte(pte)) {
 		swp_entry_t entry;
 		if (pte_swp_soft_dirty(pte))
-			flags |= PM_SOFT_DIRTY;
+			flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
 		if (pte_swp_uffd_wp(pte))
 			flags |= PM_UFFD_WP;
 		entry = pte_to_swp_entry(pte);
@@ -1766,6 +1867,20 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		struct page *page = NULL;
 		struct folio *folio = NULL;
 
+		if (pm->reset)
+		{
+			if (clear_soft_dirty_pmd(vma, addr, pmdp))
+			{
+				for (; addr != end; addr += PAGE_SIZE)
+				{
+					err = add_addr_to_pagemap(addr, pm);
+					if (err)
+						break;
+				}
+			}
+			goto trans_huge_done;
+		}
+
 		if (vma->vm_flags & VM_SOFTDIRTY)
 			flags |= PM_SOFT_DIRTY;
 
@@ -1774,7 +1889,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 
 			flags |= PM_PRESENT;
 			if (pmd_soft_dirty(pmd))
-				flags |= PM_SOFT_DIRTY;
+				flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
 			if (pmd_uffd_wp(pmd))
 				flags |= PM_UFFD_WP;
 			if (pm->show_pfn)
@@ -1795,7 +1910,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 			}
 			flags |= PM_SWAP;
 			if (pmd_swp_soft_dirty(pmd))
-				flags |= PM_SOFT_DIRTY;
+				flags |= PM_SOFT_DIRTY | PM_SOFT_DIRTY_PAGE;
 			if (pmd_swp_uffd_wp(pmd))
 				flags |= PM_UFFD_WP;
 			VM_BUG_ON(!is_pmd_migration_entry(pmd));
@@ -1828,6 +1943,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 					frame += (1 << MAX_SWAPFILES_SHIFT);
 			}
 		}
+trans_huge_done:
 		spin_unlock(ptl);
 		return err;
 	}
@@ -1843,10 +1959,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
 		return err;
 	}
 	for (; addr < end; pte++, addr += PAGE_SIZE) {
-		pagemap_entry_t pme;
+		if (pm->reset)
+		{
+			if (clear_soft_dirty(vma, addr, pte))
+			    err = add_addr_to_pagemap(addr, pm);
+		}
+		else
+		{
+			pagemap_entry_t pme;
 
-		pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
-		err = add_to_pagemap(&pme, pm);
+			pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
+			err = add_to_pagemap(&pme, pm);
+		}
 		if (err)
 			break;
 	}
@@ -1946,8 +2070,8 @@ static const struct mm_walk_ops pagemap_ops = {
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-static ssize_t pagemap_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *ppos)
+static ssize_t do_pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos, bool reset)
 {
 	struct mm_struct *mm = file->private_data;
 	struct pagemapread pm;
@@ -1956,6 +2080,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	unsigned long start_vaddr;
 	unsigned long end_vaddr;
 	int ret = 0, copied = 0;
+	struct mmu_notifier_range range;
+	size_t buffer_len;
 
 	if (!mm || !mmget_not_zero(mm))
 		goto out;
@@ -1971,19 +2097,38 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 
 	/* do not disclose physical addresses: attack vector */
 	pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+	pm.reset = reset;
+
+	buffer_len = min(PAGEMAP_WALK_SIZE >> PAGE_SHIFT, count / PM_ENTRY_BYTES);
 
-	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
-	pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+	pm.buffer = kmalloc_array(buffer_len, PM_ENTRY_BYTES, GFP_KERNEL);
 	ret = -ENOMEM;
 	if (!pm.buffer)
 		goto out_mm;
 
 	src = *ppos;
 	svpfn = src / PM_ENTRY_BYTES;
-	end_vaddr = mm->task_size;
+
+	start_vaddr = svpfn << PAGE_SHIFT;
+
+	if (reset)
+	{
+		if (count < sizeof(end_vaddr))
+		{
+			ret = -EINVAL;
+			goto out_mm;
+		}
+		if (copy_from_user(&end_vaddr, buf, sizeof(end_vaddr)))
+			return -EFAULT;
+		end_vaddr = min(end_vaddr, mm->task_size);
+	}
+	else
+	{
+		end_vaddr = mm->task_size;
+		start_vaddr = end_vaddr;
+	}
 
 	/* watch out for wraparound */
-	start_vaddr = end_vaddr;
 	if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
 		unsigned long end;
 
@@ -2008,18 +2153,35 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		unsigned long end;
 
 		pm.pos = 0;
-		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+		pm.len = min(buffer_len, count / PM_ENTRY_BYTES);
+
+		end = reset ? end_vaddr : (start_vaddr + (pm.len << PAGE_SHIFT));
 		/* overflow ? */
 		if (end < start_vaddr || end > end_vaddr)
 			end = end_vaddr;
+
 		ret = mmap_read_lock_killable(mm);
 		if (ret)
 			goto out_free;
+
+		if (reset)
+		{
+			inc_tlb_flush_pending(mm);
+			mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY,
+						0, mm, start_vaddr, end);
+			mmu_notifier_invalidate_range_start(&range);
+		}
 		ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
+		if (reset)
+		{
+			mmu_notifier_invalidate_range_end(&range);
+			flush_tlb_mm(mm);
+			dec_tlb_flush_pending(mm);
+		}
 		mmap_read_unlock(mm);
-		start_vaddr = end;
 
 		len = min(count, PM_ENTRY_BYTES * pm.pos);
+		BUG_ON(ret && ret != PM_END_OF_BUFFER);
 		if (copy_to_user(buf, pm.buffer, len)) {
 			ret = -EFAULT;
 			goto out_free;
@@ -2027,6 +2189,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		copied += len;
 		buf += len;
 		count -= len;
+
+		start_vaddr = reset && pm.pos == pm.len ? ((unsigned long *)pm.buffer)[pm.pos - 1] + PAGE_SIZE : end;
 	}
 	*ppos += copied;
 	if (!ret || ret == PM_END_OF_BUFFER)
@@ -2040,6 +2204,18 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	return ret;
 }
 
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	return do_pagemap_read(file, buf, count, ppos, false);
+}
+
+static ssize_t pagemap_reset_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	return do_pagemap_read(file, buf, count, ppos, true);
+}
+
 static int pagemap_open(struct inode *inode, struct file *file)
 {
 	struct mm_struct *mm;
@@ -2831,6 +3007,14 @@ const struct file_operations proc_pagemap_operations = {
 	.unlocked_ioctl = do_pagemap_cmd,
 	.compat_ioctl	= do_pagemap_cmd,
 };
+
+const struct file_operations proc_pagemap_reset_operations = {
+	.llseek		= mem_lseek, /* borrow this */
+	.read		= pagemap_reset_read,
+	.open		= pagemap_open,
+	.release	= pagemap_release,
+};
+
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 
 #ifdef CONFIG_NUMA
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b378fbf885ce37..98ddeddddca004 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -516,6 +516,9 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
 #ifdef CONFIG_IRQ_FORCED_THREADING
 # ifdef CONFIG_PREEMPT_RT
 #  define force_irqthreads()	(true)
+# elif defined(CONFIG_FORCE_IRQ_THREADING)
+DECLARE_STATIC_KEY_TRUE(force_irqthreads_key);
+#  define force_irqthreads()	(static_branch_likely(&force_irqthreads_key))
 # else
 DECLARE_STATIC_KEY_FALSE(force_irqthreads_key);
 #  define force_irqthreads()	(static_branch_unlikely(&force_irqthreads_key))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8617adc6becd1f..98a00ed2cdf9a3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,7 +201,7 @@ static inline void __mm_zero_struct_page(struct page *page)
  * that.
  */
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT	(INT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
 extern int sysctl_max_map_count;
 
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fc6b9c87cb0a8c..6ce05520553073 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -52,7 +52,11 @@ extern unsigned int pageblock_order;
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
+#ifdef CONFIG_ZEN_INTERACTIVE
+#define pageblock_order		PAGE_ALLOC_COSTLY_ORDER
+#else
 #define pageblock_order		MAX_PAGE_ORDER
+#endif
 
 #endif /* CONFIG_HUGETLB_PAGE */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0d1d70aded38f6..2a0aba074796b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -782,6 +782,32 @@ struct kmap_ctrl {
 #endif
 };
 
+#ifdef CONFIG_SCHED_BORE
+#define BORE_BC_TIMESTAMP_SHIFT 16
+
+struct bore_bc {
+	u64				timestamp:	48;
+	u64				penalty:	16;
+};
+
+struct bore_ctx {
+	struct bore_bc	subtree;
+	struct bore_bc	group;
+	u64				burst_time;
+	u16				prev_penalty;
+	u16				curr_penalty;
+	union {
+		u16			penalty;
+		struct {
+			u8		_;
+			u8		score;
+		};
+	};
+	bool			stop_update;
+	bool			futex_waiting;
+};
+#endif /* CONFIG_SCHED_BORE */
+
 struct task_struct {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/*
@@ -842,6 +868,9 @@ struct task_struct {
 #ifdef CONFIG_SCHED_CLASS_EXT
 	struct sched_ext_entity		scx;
 #endif
+#ifdef CONFIG_SCHED_BORE
+	struct bore_ctx			bore;
+#endif /* CONFIG_SCHED_BORE */
 	const struct sched_class	*sched_class;
 
 #ifdef CONFIG_SCHED_CORE
diff --git a/include/linux/sched/bore.h b/include/linux/sched/bore.h
new file mode 100644
index 00000000000000..5afb5fed4e102a
--- /dev/null
+++ b/include/linux/sched/bore.h
@@ -0,0 +1,39 @@
+#ifndef _KERNEL_SCHED_BORE_H
+#define _KERNEL_SCHED_BORE_H
+
+#include <linux/sched.h>
+#include <linux/sched/cputime.h>
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+
+#define SCHED_BORE_AUTHOR   "Masahito Suzuki"
+#define SCHED_BORE_PROGNAME "BORE CPU Scheduler modification"
+
+#define SCHED_BORE_VERSION  "6.5.2"
+
+extern u8   __read_mostly sched_bore;
+extern u8   __read_mostly sched_burst_inherit_type;
+extern u8   __read_mostly sched_burst_smoothness;
+extern u8   __read_mostly sched_burst_penalty_offset;
+extern uint __read_mostly sched_burst_penalty_scale;
+extern uint __read_mostly sched_burst_cache_lifetime;
+
+extern u8   effective_prio_bore(struct task_struct *p);
+extern void update_curr_bore(struct task_struct *p, u64 delta_exec);
+extern void restart_burst_bore(struct task_struct *p);
+extern void restart_burst_rescale_deadline_bore(struct task_struct *p);
+extern void task_fork_bore(struct task_struct *p, struct task_struct *parent,
+													u64 clone_flags, u64 now);
+extern void sched_init_bore(void);
+extern void reset_task_bore(struct task_struct *p);
+
+extern int  sched_bore_update_handler(const struct ctl_table *table,
+	int write, void __user *buffer, size_t *lenp, loff_t *ppos);
+extern int  sched_burst_inherit_type_update_handler(const struct ctl_table *table,
+	int write, void __user *buffer, size_t *lenp, loff_t *ppos);
+
+extern void reweight_entity(
+	struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight);
+
+#endif /* _KERNEL_SCHED_BORE_H */
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index d2ee625ea18900..45b7b27f4c137b 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -22,6 +22,7 @@
 #define FUTEX_WAIT_REQUEUE_PI	11
 #define FUTEX_CMP_REQUEUE_PI	12
 #define FUTEX_LOCK_PI2		13
+#define FUTEX_WAIT_MULTIPLE	31
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -93,6 +94,18 @@ struct futex_waitv {
 	__u32 __reserved;
 };
 
+/**
+ * struct futex_wait_block - Block of futexes to be waited for
+ * @uaddr:	User address of the futex
+ * @val:	Futex value expected by userspace
+ * @bitset:	Bitset for the optional bitmasked wakeup
+ */
+struct futex_wait_block {
+	__u32 __user *uaddr;
+	__u32 val;
+	__u32 bitset;
+};
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
diff --git a/include/uapi/linux/ntsync.h b/include/uapi/linux/ntsync.h
index dcfa38fdc93c61..6d06793512b150 100644
--- a/include/uapi/linux/ntsync.h
+++ b/include/uapi/linux/ntsync.h
@@ -11,13 +11,49 @@
 #include <linux/types.h>
 
 struct ntsync_sem_args {
-	__u32 sem;
 	__u32 count;
 	__u32 max;
 };
 
-#define NTSYNC_IOC_CREATE_SEM		_IOWR('N', 0x80, struct ntsync_sem_args)
+struct ntsync_mutex_args {
+	__u32 owner;
+	__u32 count;
+};
+
+struct ntsync_event_args {
+	__u32 manual;
+	__u32 signaled;
+};
+
+#define NTSYNC_WAIT_REALTIME	0x1
+
+struct ntsync_wait_args {
+	__u64 timeout;
+	__u64 objs;
+	__u32 count;
+	__u32 index;
+	__u32 flags;
+	__u32 owner;
+	__u32 alert;
+	__u32 pad;
+};
+
+#define NTSYNC_MAX_WAIT_COUNT 64
+
+#define NTSYNC_IOC_CREATE_SEM		_IOW ('N', 0x80, struct ntsync_sem_args)
+#define NTSYNC_IOC_WAIT_ANY		_IOWR('N', 0x82, struct ntsync_wait_args)
+#define NTSYNC_IOC_WAIT_ALL		_IOWR('N', 0x83, struct ntsync_wait_args)
+#define NTSYNC_IOC_CREATE_MUTEX		_IOW ('N', 0x84, struct ntsync_mutex_args)
+#define NTSYNC_IOC_CREATE_EVENT		_IOW ('N', 0x87, struct ntsync_event_args)
 
-#define NTSYNC_IOC_SEM_POST		_IOWR('N', 0x81, __u32)
+#define NTSYNC_IOC_SEM_RELEASE		_IOWR('N', 0x81, __u32)
+#define NTSYNC_IOC_MUTEX_UNLOCK		_IOWR('N', 0x85, struct ntsync_mutex_args)
+#define NTSYNC_IOC_MUTEX_KILL		_IOW ('N', 0x86, __u32)
+#define NTSYNC_IOC_EVENT_SET		_IOR ('N', 0x88, __u32)
+#define NTSYNC_IOC_EVENT_RESET		_IOR ('N', 0x89, __u32)
+#define NTSYNC_IOC_EVENT_PULSE		_IOR ('N', 0x8a, __u32)
+#define NTSYNC_IOC_SEM_READ		_IOR ('N', 0x8b, struct ntsync_sem_args)
+#define NTSYNC_IOC_MUTEX_READ		_IOR ('N', 0x8c, struct ntsync_mutex_args)
+#define NTSYNC_IOC_EVENT_READ		_IOR ('N', 0x8d, struct ntsync_event_args)
 
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index d3755b2264bdfb..c702425c758c84 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -162,6 +162,40 @@ config THREAD_INFO_IN_TASK
 
 menu "General setup"
 
+config ZEN_INTERACTIVE
+	bool "Tune kernel for interactivity"
+	default y
+	help
+	  Tunes the kernel for responsiveness at the cost of throughput and power usage.
+
+	  --- Block Layer ----------------------------------------
+
+	    Default scheduler for SQ..: mq-deadline ->   bfq
+	    Default scheduler for MQ..:        none ->   kyber
+	    DM-Crypt workqueues.......:        yes  ->   no
+
+	  --- Virtual Memory Subsystem ---------------------------
+
+	    Background-reclaim hugepages...:   no   ->   yes
+	    MG-LRU minimum cache TTL.......:   0    ->   1000 ms
+	    Compact unevictable............:   yes  ->   no
+	    Watermark boost factor.........:   1.5  ->   0
+	    Pageblock order................:  10    ->   3
+	    Swap-in readahead..............:   3    ->   0
+
+	  --- EEVDF CPU Scheduler --------------------------------
+
+	    Minimal granularity............:   0.75 ->   0.4  ms
+	    Migration cost.................:   0.5  ->   0.25 ms
+	    Bandwidth slice size...........:   5    ->   3    ms
+	    Task rebalancing threshold.....:  32    ->   8
+
+	  --- CPUFreq Settings -----------------------------------
+
+	    Ondemand sampling down factor..:   1    ->   5
+	    Ondemand default up threshold..:  80    ->  55
+	    Ondemand micro up threshold....:  95    ->  60
+
 config BROKEN
 	bool
 
@@ -1349,6 +1383,23 @@ config CHECKPOINT_RESTORE
 
 	  If unsure, say N here.
 
+config SCHED_BORE
+	bool "Burst-Oriented Response Enhancer"
+	default y
+	help
+	  In Desktop and Mobile computing, one might prefer interactive
+	  tasks to keep responsive no matter what they run in the background.
+
+	  Enabling this kernel feature modifies the scheduler to discriminate
+	  tasks by their burst time (runtime since it last went sleeping or
+	  yielding state) and prioritize those that run less bursty.
+	  Such tasks usually include window compositor, widgets backend,
+	  terminal emulator, video playback, games and so on.
+	  With a little impact to scheduling fairness, it may improve
+	  responsiveness especially under heavy background workload.
+
+	  If unsure, say Y here.
+
 config SCHED_AUTOGROUP
 	bool "Automatic process group scheduling"
 	select CGROUPS
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 38ef6d06888ef1..4ee584ce93dfe7 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -57,3 +57,20 @@ config HZ
 
 config SCHED_HRTICK
 	def_bool HIGH_RES_TIMERS
+
+config MIN_BASE_SLICE_NS
+	int "Default value for min_base_slice_ns"
+	default 2000000
+	help
+	 The BORE Scheduler automatically calculates the optimal base
+	 slice for the configured HZ using the following equation:
+	 
+	 base_slice_ns =
+	 	1000000000/HZ * DIV_ROUNDUP(min_base_slice_ns, 1000000000/HZ)
+	 
+	 This option sets the default lower bound limit of the base slice
+	 to prevent the loss of task throughput due to overscheduling.
+	 
+	 Setting this value too high can cause the system to boot with
+	 an unnecessarily large base slice, resulting in high scheduling
+	 latency and poor system responsiveness.
diff --git a/kernel/fork.c b/kernel/fork.c
index 97c9afe3efc38d..e14b29847c858a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -113,6 +113,10 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 #include <trace/events/sched.h>
 
 #define CREATE_TRACE_POINTS
@@ -2524,6 +2528,10 @@ __latent_entropy struct task_struct *copy_process(
 	 * Need tasklist lock for parent etc handling!
 	 */
 	write_lock_irq(&tasklist_lock);
+#ifdef CONFIG_SCHED_BORE
+	if (likely(p->pid))
+		task_fork_bore(p, current, clone_flags, p->start_time);
+#endif /* CONFIG_SCHED_BORE */
 
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 4b6da9116aa6c3..12beb7e0ad0f34 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -138,6 +138,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
 	case FUTEX_LOCK_PI2:
 	case FUTEX_WAIT_BITSET:
 	case FUTEX_WAIT_REQUEUE_PI:
+	case FUTEX_WAIT_MULTIPLE:
 		return true;
 	}
 	return false;
@@ -150,13 +151,79 @@ futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
 		return -EINVAL;
 
 	*t = timespec64_to_ktime(*ts);
-	if (cmd == FUTEX_WAIT)
+	if (cmd == FUTEX_WAIT || cmd == FUTEX_WAIT_MULTIPLE)
 		*t = ktime_add_safe(ktime_get(), *t);
 	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
 		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
 	return 0;
 }
 
+/**
+ * futex_read_wait_block - Read an array of futex_wait_block from userspace
+ * @uaddr:	Userspace address of the block
+ * @count:	Number of blocks to be read
+ *
+ * This function creates and allocate an array of futex_q (we zero it to
+ * initialize the fields) and then, for each futex_wait_block element from
+ * userspace, fill a futex_q element with proper values.
+ */
+inline struct futex_vector *futex_read_wait_block(u32 __user *uaddr, u32 count)
+{
+	unsigned int i;
+	struct futex_vector *futexv;
+	struct futex_wait_block fwb;
+	struct futex_wait_block __user *entry =
+		(struct futex_wait_block __user *)uaddr;
+
+	if (!count || count > FUTEX_WAITV_MAX)
+		return ERR_PTR(-EINVAL);
+
+	futexv = kcalloc(count, sizeof(*futexv), GFP_KERNEL);
+	if (!futexv)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < count; i++) {
+		if (copy_from_user(&fwb, &entry[i], sizeof(fwb))) {
+			kfree(futexv);
+			return ERR_PTR(-EFAULT);
+		}
+
+		futexv[i].w.flags = FUTEX_32;
+		futexv[i].w.val = fwb.val;
+		futexv[i].w.uaddr = (uintptr_t) (fwb.uaddr);
+		futexv[i].q = futex_q_init;
+	}
+
+	return futexv;
+}
+
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+			struct hrtimer_sleeper *to);
+
+int futex_opcode_31(ktime_t *abs_time, u32 __user *uaddr, int count)
+{
+	int ret;
+	struct futex_vector *vs;
+	struct hrtimer_sleeper *to = NULL, timeout;
+
+	to = futex_setup_timer(abs_time, &timeout, 0, 0);
+
+	vs = futex_read_wait_block(uaddr, count);
+
+	if (IS_ERR(vs))
+		return PTR_ERR(vs);
+
+	ret = futex_wait_multiple(vs, count, abs_time ? to : NULL);
+	kfree(vs);
+
+	if (to) {
+		hrtimer_cancel(&to->timer);
+		destroy_hrtimer_on_stack(&to->timer);
+	}
+
+	return ret;
+}
+
 SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		const struct __kernel_timespec __user *, utime,
 		u32 __user *, uaddr2, u32, val3)
@@ -176,6 +243,9 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 		tp = &t;
 	}
 
+	if (cmd == FUTEX_WAIT_MULTIPLE)
+		return futex_opcode_31(tp, uaddr, val);
+
 	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
 }
 
@@ -506,6 +576,9 @@ SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 		tp = &t;
 	}
 
+	if (cmd == FUTEX_WAIT_MULTIPLE)
+		return futex_opcode_31(tp, uaddr, val);
+
 	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
 }
 #endif /* CONFIG_COMPAT_32BIT_TIME */
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index a9056acb75eef9..e6042508651c4b 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -4,6 +4,9 @@
 #include <linux/sched/task.h>
 #include <linux/sched/signal.h>
 #include <linux/freezer.h>
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif // CONFIG_SCHED_BORE
 
 #include "futex.h"
 
@@ -367,7 +370,15 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
 		 * is no timeout, or if it has yet to expire.
 		 */
 		if (!timeout || timeout->task)
+#ifdef CONFIG_SCHED_BORE
+		{
+			current->bore.futex_waiting = true;
+#endif // CONFIG_SCHED_BORE
 			schedule();
+#ifdef CONFIG_SCHED_BORE
+			current->bore.futex_waiting = false;
+		}
+#endif // CONFIG_SCHED_BORE
 	}
 	__set_current_state(TASK_RUNNING);
 }
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 529adb1f58593c..72aae01fc431b4 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -116,6 +116,23 @@ config GENERIC_IRQ_STAT_SNAPSHOT
 config IRQ_FORCED_THREADING
        bool
 
+config FORCE_IRQ_THREADING
+	bool "Make IRQ threading compulsory"
+	depends on IRQ_FORCED_THREADING
+	default n
+	help
+
+	  Make IRQ threading mandatory for any IRQ handlers that support it
+	  instead of being optional and requiring the threadirqs kernel
+	  parameter. Instead they can be optionally disabled with the
+	  nothreadirqs kernel parameter.
+
+	  Enabling this may make some architectures not boot with runqueue
+	  sharing and MuQSS.
+
+	  Enable if you are building for a desktop or low latency system,
+	  otherwise say N.
+
 config SPARSE_IRQ
 	bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
 	help
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index f0803d6bd29698..f2dc23a9ece95b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -25,7 +25,18 @@
 #include "internals.h"
 
 #if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
+#ifdef CONFIG_FORCE_IRQ_THREADING
+DEFINE_STATIC_KEY_TRUE(force_irqthreads_key);
+#else
 DEFINE_STATIC_KEY_FALSE(force_irqthreads_key);
+#endif
+
+static int __init setup_noforced_irqthreads(char *arg)
+{
+	static_branch_disable(&force_irqthreads_key);
+	return 0;
+}
+early_param("nothreadirqs", setup_noforced_irqthreads);
 
 static int __init setup_forced_irqthreads(char *arg)
 {
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 976092b7bd4520..83cbd093b8b586 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -32,3 +32,4 @@ obj-y += core.o
 obj-y += fair.o
 obj-y += build_policy.o
 obj-y += build_utility.o
+obj-$(CONFIG_SCHED_BORE) += bore.o
diff --git a/kernel/sched/bore.c b/kernel/sched/bore.c
new file mode 100644
index 00000000000000..c9e76eda35c82d
--- /dev/null
+++ b/kernel/sched/bore.c
@@ -0,0 +1,393 @@
+/*
+ *  Burst-Oriented Response Enhancer (BORE) CPU Scheduler
+ *  Copyright (C) 2021-2025 Masahito Suzuki <firelzrd@gmail.com>
+ */
+#include <linux/cpuset.h>
+#include <linux/sched/task.h>
+#include <linux/sched/bore.h>
+#include "sched.h"
+
+#ifdef CONFIG_SCHED_BORE
+u8   __read_mostly sched_bore                   = 1;
+u8   __read_mostly sched_burst_inherit_type     = 2;
+u8   __read_mostly sched_burst_smoothness       = 1;
+u8   __read_mostly sched_burst_penalty_offset   = 24;
+uint __read_mostly sched_burst_penalty_scale    = 1536;
+uint __read_mostly sched_burst_cache_lifetime   = 75000000;
+static int __maybe_unused maxval_prio    =   39;
+static int __maybe_unused maxval_6_bits  =   63;
+static int __maybe_unused maxval_8_bits  =  255;
+static int __maybe_unused maxval_12_bits = 4095;
+
+#define MAX_BURST_PENALTY ((40U << 8) - 1)
+#define BURST_CACHE_STOP_COUNT 63
+
+static u32 (*inherit_penalty_fn)(struct task_struct *, u64, u64);
+
+static inline u32 log2p1_u64_u32fp(u64 v, u8 fp) {
+	if (!v) return 0;
+	u32 exponent = fls64(v),
+		mantissa = (u32)(v << (64 - exponent) << 1 >> (64 - fp));
+	return exponent << fp | mantissa;
+}
+
+static inline u32 calc_burst_penalty(u64 burst_time) {
+	u32 greed = log2p1_u64_u32fp(burst_time, 8),
+		tolerance = sched_burst_penalty_offset << 8,
+		penalty = max(0, (s32)(greed - tolerance)),
+		scaled_penalty = penalty * sched_burst_penalty_scale >> 10;
+	return min(MAX_BURST_PENALTY, scaled_penalty);
+}
+
+static inline u64 rescale_slice(u64 delta, u8 old_prio, u8 new_prio) {
+	u64 unscaled, rescaled;
+	unscaled = mul_u64_u32_shr(delta   , sched_prio_to_weight[old_prio], 10);
+	rescaled = mul_u64_u32_shr(unscaled, sched_prio_to_wmult [new_prio], 22);
+	return rescaled;
+}
+
+static inline u32 binary_smooth(u32 new, u32 old) {
+	if (new <= old) return new;
+
+	u32 increment = new - old,
+		shift = sched_burst_smoothness,
+		divisor = 1U << shift;
+
+	return old + ((increment + divisor - 1) >> shift);
+}
+
+static void reweight_task_by_prio(struct task_struct *p, int prio) {
+	if (task_has_idle_policy(p)) return;
+
+	struct sched_entity *se = &p->se;
+	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
+
+	if (se->on_rq) {
+		p->bore.stop_update = true;
+		reweight_entity(cfs_rq_of(se), se, weight);
+		p->bore.stop_update = false;
+	} else
+		se->load.weight = weight;
+	se->load.inv_weight = sched_prio_to_wmult[prio];
+}
+
+u8 effective_prio_bore(struct task_struct *p) {
+	int prio = p->static_prio - MAX_RT_PRIO;
+	if (likely(sched_bore))
+		prio += p->bore.score;
+	return (u8)clamp(prio, 0, maxval_prio);
+}
+
+static void update_penalty(struct task_struct *p) {
+	struct bore_ctx *ctx = &p->bore;
+
+	u8  prev_prio = effective_prio_bore(p);
+	u32 penalty = 0;
+
+	if (!(p->flags & PF_KTHREAD)) {
+		u32 curr_penalty = ctx->curr_penalty;
+		penalty = ctx->prev_penalty;
+		if (penalty < curr_penalty)
+			penalty = curr_penalty;
+	}
+	ctx->penalty = penalty;
+
+	u8 new_prio = effective_prio_bore(p);
+	if (new_prio != prev_prio)
+		reweight_task_by_prio(p, new_prio);
+}
+
+void update_curr_bore(struct task_struct *p, u64 delta_exec) {
+	struct bore_ctx *ctx = &p->bore;
+	if (ctx->stop_update) return;
+
+	ctx->burst_time += delta_exec;
+	u32 curr_penalty = ctx->curr_penalty = calc_burst_penalty(ctx->burst_time);
+
+	if (curr_penalty <= ctx->prev_penalty) return;
+	update_penalty(p);
+}
+
+void restart_burst_bore(struct task_struct *p) {
+	struct bore_ctx *ctx = &p->bore;
+	u32 new_penalty = binary_smooth(ctx->curr_penalty, ctx->prev_penalty);
+	ctx->prev_penalty = new_penalty;
+	ctx->curr_penalty = 0;
+	ctx->burst_time = 0;
+	update_penalty(p);
+}
+
+void restart_burst_rescale_deadline_bore(struct task_struct *p) {
+	struct sched_entity *se = &p->se;
+	s64 vscaled, vremain = se->deadline - se->vruntime;
+
+	u8 old_prio = effective_prio_bore(p);
+	restart_burst_bore(p);
+	u8 new_prio = effective_prio_bore(p);
+
+	if (old_prio > new_prio) {
+		vscaled = rescale_slice(abs(vremain), old_prio, new_prio);
+		if (unlikely(vremain < 0))
+			vscaled = -vscaled;
+		se->deadline = se->vruntime + vscaled;
+	}
+}
+
+static inline bool task_is_bore_eligible(struct task_struct *p)
+{return p && p->sched_class == &fair_sched_class && !p->exit_state;}
+
+#ifndef for_each_child_task
+#define for_each_child_task(p, t) \
+	list_for_each_entry(t, &(p)->children, sibling)
+#endif
+
+static inline u32 count_children_upto2(struct task_struct *p) {
+	struct list_head *head = &p->children;
+	struct list_head *next = head->next;
+	return (next != head) + (next->next != head);
+}
+
+static inline bool burst_cache_expired(struct bore_bc *bc, u64 now) {
+	u64 timestamp = bc->timestamp << BORE_BC_TIMESTAMP_SHIFT;
+	return now - timestamp > sched_burst_cache_lifetime;
+}
+
+static void update_burst_cache(struct bore_bc *bc,
+		struct task_struct *p, u32 count, u32 total, u64 now) {
+	u32 average = count ? total / count : 0;
+	bc->penalty = max(average, p->bore.penalty);
+	bc->timestamp = now >> BORE_BC_TIMESTAMP_SHIFT;
+}
+
+static u32 inherit_none(struct task_struct *parent,
+									u64 clone_flags, u64 now)
+{ return 0; }
+
+static u32 inherit_from_parent(struct task_struct *parent,
+									u64 clone_flags, u64 now) {
+	if (clone_flags & CLONE_PARENT)
+		parent = parent->real_parent;
+
+	struct bore_bc *bc = &parent->bore.subtree;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *child;
+		u32 count = 0, total = 0;
+		for_each_child_task(parent, child) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			if (!task_is_bore_eligible(child)) continue;
+			count++;
+			total += child->bore.penalty;
+		}
+
+		update_burst_cache(bc, parent, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+static u32 inherit_from_ancestor_hub(struct task_struct *parent,
+										u64 clone_flags, u64 now) {
+	struct task_struct *ancestor = parent;
+	u32 sole_child_count = 0;
+
+	if (clone_flags & CLONE_PARENT) {
+		ancestor = ancestor->real_parent;
+		sole_child_count = 1;
+	}
+
+	for (struct task_struct *next;
+			(next = ancestor->real_parent) != ancestor &&
+			count_children_upto2(ancestor) <= sole_child_count;
+			ancestor = next, sole_child_count = 1) {}
+
+	struct bore_bc *bc = &ancestor->bore.subtree;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *direct_child;
+		u32 count = 0, total = 0;
+		for_each_child_task(ancestor, direct_child) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			struct task_struct *descendant = direct_child;
+			while (count_children_upto2(descendant) == 1)
+				descendant = list_first_entry(&descendant->children,
+												struct task_struct, sibling);
+
+			if (!task_is_bore_eligible(descendant)) continue;
+			count++;
+			total += descendant->bore.penalty;
+		}
+
+		update_burst_cache(bc, ancestor, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+static u32 inherit_from_thread_group(struct task_struct *p, u64 now) {
+	struct task_struct *leader = p->group_leader;
+	struct bore_bc *bc = &leader->bore.group;
+
+	if (burst_cache_expired(bc, now)) {
+		struct task_struct *sibling;
+		u32 count = 0, total = 0;
+
+		for_each_thread(leader, sibling) {
+			if (count >= BURST_CACHE_STOP_COUNT) break;
+
+			if (!task_is_bore_eligible(sibling)) continue;
+			count++;
+			total += sibling->bore.penalty;
+		}
+
+		update_burst_cache(bc, leader, count, total, now);
+	}
+
+	return bc->penalty;
+}
+
+void task_fork_bore(struct task_struct *p,
+	               struct task_struct *parent, u64 clone_flags, u64 now) {
+	if (!task_is_bore_eligible(p) || unlikely(!sched_bore)) return;
+
+	struct bore_ctx *ctx = &p->bore;
+	u32 inherited_penalty = (clone_flags & CLONE_THREAD)?
+		inherit_from_thread_group(parent, now):
+		inherit_penalty_fn(parent, clone_flags, now);
+
+	if (ctx->prev_penalty < inherited_penalty)
+		ctx->prev_penalty = inherited_penalty;
+	ctx->curr_penalty  = 0;
+	ctx->burst_time    = 0;
+	ctx->stop_update   = false;
+	ctx->futex_waiting = false;
+	update_penalty(p);
+}
+
+void reset_task_bore(struct task_struct *p)
+{ memset(&p->bore, 0, sizeof(struct bore_ctx)); }
+
+static void update_inherit_type(void) {
+	switch(sched_burst_inherit_type) {
+	case 1:
+		inherit_penalty_fn = inherit_from_parent;
+		break;
+	case 2:
+		inherit_penalty_fn = inherit_from_ancestor_hub;
+		break;
+	default:
+		inherit_penalty_fn = inherit_none;
+	}
+}
+
+void __init sched_init_bore(void) {
+	printk(KERN_INFO "%s %s by %s\n",
+		SCHED_BORE_PROGNAME, SCHED_BORE_VERSION, SCHED_BORE_AUTHOR);
+
+	reset_task_bore(&init_task);
+	update_inherit_type();
+}
+
+static void readjust_all_task_weights(void) {
+	struct task_struct *task;
+	struct rq *rq;
+	struct rq_flags rf;
+
+	scoped_guard(write_lock_irq, &tasklist_lock)
+	for_each_process(task) {
+		if (!task_is_bore_eligible(task)) continue;
+		rq = task_rq_lock(task, &rf);
+		update_rq_clock(rq);
+		reweight_task_by_prio(task, effective_prio_bore(task));
+		task_rq_unlock(rq, task, &rf);
+	}
+}
+
+int sched_bore_update_handler(const struct ctl_table *table,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos) {
+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	readjust_all_task_weights();
+
+	return 0;
+}
+
+int sched_burst_inherit_type_update_handler(const struct ctl_table *table,
+		int write, void __user *buffer, size_t *lenp, loff_t *ppos) {
+	int ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
+	if (ret || !write)
+		return ret;
+
+	update_inherit_type();
+
+	return 0;
+}
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table sched_bore_sysctls[] = {
+	{
+		.procname	= "sched_bore",
+		.data		= &sched_bore,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = sched_bore_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "sched_burst_inherit_type",
+		.data		= &sched_burst_inherit_type,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = sched_burst_inherit_type_update_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_TWO,
+	},
+	{
+		.procname	= "sched_burst_smoothness",
+		.data		= &sched_burst_smoothness,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_THREE,
+	},
+	{
+		.procname	= "sched_burst_penalty_offset",
+		.data		= &sched_burst_penalty_offset,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler = proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &maxval_6_bits,
+	},
+	{
+		.procname	= "sched_burst_penalty_scale",
+		.data		= &sched_burst_penalty_scale,
+		.maxlen		= sizeof(uint),
+		.mode		= 0644,
+		.proc_handler = proc_douintvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &maxval_12_bits,
+	},
+	{
+		.procname	= "sched_burst_cache_lifetime",
+		.data		= &sched_burst_cache_lifetime,
+		.maxlen		= sizeof(uint),
+		.mode		= 0644,
+		.proc_handler = proc_douintvec,
+	},
+};
+
+static int __init sched_bore_sysctl_init(void) {
+	register_sysctl_init("kernel", sched_bore_sysctls);
+	return 0;
+}
+late_initcall(sched_bore_sysctl_init);
+
+#endif // CONFIG_SYSCTL
+#endif /* CONFIG_SCHED_BORE */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e9bb1b4c58421f..83b743b823a7eb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -97,6 +97,10 @@
 #include "../../io_uring/io-wq.h"
 #include "../smpboot.h"
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
 
@@ -1370,7 +1374,11 @@ int tg_nop(struct task_group *tg, void *data)
 
 void set_load_weight(struct task_struct *p, bool update_load)
 {
+#ifdef CONFIG_SCHED_BORE
+	int prio = effective_prio_bore(p);
+#else /* !CONFIG_SCHED_BORE */
 	int prio = p->static_prio - MAX_RT_PRIO;
+#endif /* CONFIG_SCHED_BORE */
 	struct load_weight lw;
 
 	if (task_has_idle_policy(p)) {
@@ -8393,6 +8401,10 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+	sched_init_bore();
+#endif /* CONFIG_SCHED_BORE */
+
 	wait_bit_init();
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 1e3bc0774efd51..71fe8719a9bf28 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -167,7 +167,53 @@ static const struct file_operations sched_feat_fops = {
 };
 
 #ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_BORE
+#define DEFINE_SYSCTL_SCHED_FUNC(name, update_func) \
+static ssize_t sched_##name##_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) \
+{ \
+	char buf[16]; \
+	unsigned int value; \
+\
+	if (cnt > 15) \
+		cnt = 15; \
+\
+	if (copy_from_user(&buf, ubuf, cnt)) \
+		return -EFAULT; \
+	buf[cnt] = '\0'; \
+\
+	if (kstrtouint(buf, 10, &value)) \
+		return -EINVAL; \
+\
+	sysctl_sched_##name = value; \
+	sched_update_##update_func(); \
+\
+	*ppos += cnt; \
+	return cnt; \
+} \
+\
+static int sched_##name##_show(struct seq_file *m, void *v) \
+{ \
+	seq_printf(m, "%d\n", sysctl_sched_##name); \
+	return 0; \
+} \
+\
+static int sched_##name##_open(struct inode *inode, struct file *filp) \
+{ \
+	return single_open(filp, sched_##name##_show, NULL); \
+} \
+\
+static const struct file_operations sched_##name##_fops = { \
+	.open		= sched_##name##_open, \
+	.write		= sched_##name##_write, \
+	.read		= seq_read, \
+	.llseek		= seq_lseek, \
+	.release	= single_release, \
+};
+
+DEFINE_SYSCTL_SCHED_FUNC(min_base_slice, min_base_slice)
 
+#undef DEFINE_SYSCTL_SCHED_FUNC
+#else /* !CONFIG_SCHED_BORE */
 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf,
 				   size_t cnt, loff_t *ppos)
 {
@@ -213,7 +259,7 @@ static const struct file_operations sched_scaling_fops = {
 	.llseek		= seq_lseek,
 	.release	= single_release,
 };
-
+#endif /* CONFIG_SCHED_BORE */
 #endif /* SMP */
 
 #ifdef CONFIG_PREEMPT_DYNAMIC
@@ -504,13 +550,20 @@ static __init int sched_init_debug(void)
 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+	debugfs_create_file("min_base_slice_ns", 0644, debugfs_sched, NULL, &sched_min_base_slice_fops);
+	debugfs_create_u32("base_slice_ns", 0444, debugfs_sched, &sysctl_sched_base_slice);
+#else /* !CONFIG_SCHED_BORE */
 	debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
+#endif /* CONFIG_SCHED_BORE */
 
 	debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
 	debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
 
 #ifdef CONFIG_SMP
+#if !defined(CONFIG_SCHED_BORE)
 	debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
+#endif /* CONFIG_SCHED_BORE */
 	debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
 	debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
 
@@ -755,6 +808,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
 		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
 
+#ifdef CONFIG_SCHED_BORE
+	SEQ_printf(m, " %2d", p->bore.score);
+#endif /* CONFIG_SCHED_BORE */
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, "   %d      %d", task_node(p), task_numa_group_id(p));
 #endif
@@ -1244,6 +1300,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 
 	P(se.load.weight);
 #ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_BORE
+	P(bore.score);
+#endif /* CONFIG_SCHED_BORE */
 	P(se.avg.load_sum);
 	P(se.avg.runnable_sum);
 	P(se.avg.util_sum);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 443f6a9ef3f8f6..0624f23cf0547e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -55,6 +55,10 @@
 #include "stats.h"
 #include "autogroup.h"
 
+#ifdef CONFIG_SCHED_BORE
+#include <linux/sched/bore.h>
+#endif /* CONFIG_SCHED_BORE */
+
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -64,19 +68,42 @@
  *   SCHED_TUNABLESCALING_LOG - scaled logarithmically, *1+ilog(ncpus)
  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
  *
- * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
+ * BORE : default SCHED_TUNABLESCALING_NONE = *1 constant
+ * EEVDF: default SCHED_TUNABLESCALING_LOG  = *(1+ilog(ncpus))
  */
+#ifdef CONFIG_SCHED_BORE
+unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
+#else /* !CONFIG_SCHED_BORE */
 unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
+#endif /* CONFIG_SCHED_BORE */
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
  *
- * (default: 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds)
+ * BORE : base_slice = minimum multiple of nsecs_per_tick >= min_base_slice
+ * (default min_base_slice = 2000000 constant, units: nanoseconds)
+ * EEVDF: default 0.70 msec * (1 + ilog(ncpus)), units: nanoseconds
  */
+
+#ifdef CONFIG_SCHED_BORE
+static const unsigned int nsecs_per_tick       = 1000000000ULL / HZ;
+unsigned int sysctl_sched_min_base_slice       = CONFIG_MIN_BASE_SLICE_NS;
+__read_mostly uint sysctl_sched_base_slice     = nsecs_per_tick;
+#else /* !CONFIG_SCHED_BORE */
+# ifdef CONFIG_ZEN_INTERACTIVE
+unsigned int sysctl_sched_base_slice			= 400000ULL;
+static unsigned int normalized_sysctl_sched_base_slice	= 400000ULL;
+# else /* !CONFIG_ZEN_INTERACTIVE */
 unsigned int sysctl_sched_base_slice			= 700000ULL;
 static unsigned int normalized_sysctl_sched_base_slice	= 700000ULL;
+# endif /* CONFIG_ZEN_INTERACTIVE */
+#endif /* CONFIG_SCHED_BORE */
 
+#ifdef CONFIG_ZEN_INTERACTIVE
+const_debug unsigned int sysctl_sched_migration_cost	= 250000UL;
+#else
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
+#endif
 
 static int __init setup_sched_thermal_decay_shift(char *str)
 {
@@ -121,8 +148,12 @@ int __weak arch_asym_cpu_priority(int cpu)
  *
  * (default: 5 msec, units: microseconds)
  */
+#ifdef CONFIG_ZEN_INTERACTIVE
+static unsigned int sysctl_sched_cfs_bandwidth_slice		= 3000UL;
+#else
 static unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
 #endif
+#endif
 
 #ifdef CONFIG_NUMA_BALANCING
 /* Restrict the NUMA promotion throughput (MB/s) for each target node. */
@@ -188,6 +219,13 @@ static inline void update_load_set(struct load_weight *lw, unsigned long w)
  *
  * This idea comes from the SD scheduler of Con Kolivas:
  */
+#ifdef CONFIG_SCHED_BORE
+static void update_sysctl(void) {
+	sysctl_sched_base_slice = nsecs_per_tick *
+		max(1UL, DIV_ROUND_UP(sysctl_sched_min_base_slice, nsecs_per_tick));
+}
+void sched_update_min_base_slice(void) { update_sysctl(); }
+#else /* !CONFIG_SCHED_BORE */
 static unsigned int get_update_sysctl_factor(void)
 {
 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
@@ -218,6 +256,7 @@ static void update_sysctl(void)
 	SET_SYSCTL(sched_base_slice);
 #undef SET_SYSCTL
 }
+#endif /* CONFIG_SCHED_BORE */
 
 void __init sched_init_granularity(void)
 {
@@ -695,6 +734,9 @@ static s64 entity_lag(u64 avruntime, struct sched_entity *se)
 
 	vlag = avruntime - se->vruntime;
 	limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+#ifdef CONFIG_SCHED_BORE
+	limit >>= !!sched_bore;
+#endif /* CONFIG_SCHED_BORE */
 
 	return clamp(vlag, -limit, limit);
 }
@@ -941,7 +983,16 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
 
+#if !defined(CONFIG_SCHED_BORE)
 	if (sched_feat(RUN_TO_PARITY) && curr && protect_slice(curr))
+#else /* CONFIG_SCHED_BORE */
+	bool run_to_parity = likely(sched_bore) ?
+		sched_feat(RUN_TO_PARITY_BORE) : sched_feat(RUN_TO_PARITY);
+	if (run_to_parity && curr && protect_slice(curr) &&
+		(!entity_is_task(curr) ||
+		 !task_of(curr)->bore.futex_waiting ||
+		 unlikely(!sched_bore)))
+#endif /* CONFIG_SCHED_BORE */
 		return curr;
 
 	/* Pick the leftmost entity if it's eligible */
@@ -1000,6 +1051,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
  * Scheduling class statistics methods:
  */
 #ifdef CONFIG_SMP
+#if !defined(CONFIG_SCHED_BORE)
 int sched_update_scaling(void)
 {
 	unsigned int factor = get_update_sysctl_factor();
@@ -1011,6 +1063,7 @@ int sched_update_scaling(void)
 
 	return 0;
 }
+#endif /* CONFIG_SCHED_BORE */
 #endif
 #endif
 
@@ -1248,6 +1301,9 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (entity_is_task(curr)) {
 		struct task_struct *p = task_of(curr);
 
+#ifdef CONFIG_SCHED_BORE
+		update_curr_bore(p, delta_exec);
+#endif /* CONFIG_SCHED_BORE */
 		update_curr_task(p, delta_exec);
 
 		/*
@@ -3896,7 +3952,7 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime,
 	se->deadline = avruntime + vslice;
 }
 
-static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
+void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 			    unsigned long weight)
 {
 	bool curr = cfs_rq->curr == se;
@@ -5296,12 +5352,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 static void
 place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	u64 vslice, vruntime = avg_vruntime(cfs_rq);
+	u64 vslice = 0, vruntime = avg_vruntime(cfs_rq);
 	s64 lag = 0;
 
 	if (!se->custom_slice)
 		se->slice = sysctl_sched_base_slice;
-	vslice = calc_delta_fair(se->slice, se);
 
 	/*
 	 * Due to how V is constructed as the weighted average of entities,
@@ -5386,7 +5441,18 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		se->rel_deadline = 0;
 		return;
 	}
-
+#ifdef CONFIG_SCHED_BORE
+	if (entity_is_task(se) &&
+			likely(sched_bore) &&
+			task_of(se)->bore.futex_waiting)
+		goto vslice_found;
+#endif /* !CONFIG_SCHED_BORE */
+	vslice = calc_delta_fair(se->slice, se);
+#ifdef CONFIG_SCHED_BORE
+	if (likely(sched_bore))
+		vslice >>= !!(flags & (ENQUEUE_INITIAL | ENQUEUE_WAKEUP));
+	else
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * When joining the competition; the existing tasks will be,
 	 * on average, halfway through their slice, as such start tasks
@@ -5395,6 +5461,9 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
 		vslice /= 2;
 
+#ifdef CONFIG_SCHED_BORE
+vslice_found:
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * EEVDF: vd_i = ve_i + r_i/w_i
 	 */
@@ -5407,7 +5476,7 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 static inline bool cfs_bandwidth_used(void);
 
 static void
-requeue_delayed_entity(struct sched_entity *se);
+requeue_delayed_entity(struct sched_entity *se, int flags);
 
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -5570,6 +5639,10 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		if (sched_feat(DELAY_DEQUEUE) && delay &&
 		    !entity_eligible(cfs_rq, se)) {
 			update_load_avg(cfs_rq, se, 0);
+#ifdef CONFIG_SCHED_BORE
+			if (sched_feat(DELAY_ZERO) && likely(sched_bore))
+				update_entity_lag(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 			set_delayed(se);
 			return false;
 		}
@@ -6990,7 +7063,7 @@ static int sched_idle_cpu(int cpu)
 #endif
 
 static void
-requeue_delayed_entity(struct sched_entity *se)
+requeue_delayed_entity(struct sched_entity *se, int flags)
 {
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
@@ -7003,13 +7076,22 @@ requeue_delayed_entity(struct sched_entity *se)
 	SCHED_WARN_ON(!se->on_rq);
 
 	if (sched_feat(DELAY_ZERO)) {
+#ifdef CONFIG_SCHED_BORE
+		if (likely(sched_bore))
+			flags |= ENQUEUE_WAKEUP;
+		else {
+#endif /* CONFIG_SCHED_BORE */
+		flags = 0;
 		update_entity_lag(cfs_rq, se);
+#ifdef CONFIG_SCHED_BORE
+		}
+#endif /* CONFIG_SCHED_BORE */
 		if (se->vlag > 0) {
 			cfs_rq->nr_running--;
 			if (se != cfs_rq->curr)
 				__dequeue_entity(cfs_rq, se);
 			se->vlag = 0;
-			place_entity(cfs_rq, se, 0);
+			place_entity(cfs_rq, se, flags);
 			if (se != cfs_rq->curr)
 				__enqueue_entity(cfs_rq, se);
 			cfs_rq->nr_running++;
@@ -7046,7 +7128,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		util_est_enqueue(&rq->cfs, p);
 
 	if (flags & ENQUEUE_DELAYED) {
-		requeue_delayed_entity(se);
+		requeue_delayed_entity(se, flags);
 		return;
 	}
 
@@ -7064,7 +7146,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	for_each_sched_entity(se) {
 		if (se->on_rq) {
 			if (se->sched_delayed)
-				requeue_delayed_entity(se);
+				requeue_delayed_entity(se, flags);
 			break;
 		}
 		cfs_rq = cfs_rq_of(se);
@@ -7286,6 +7368,15 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		util_est_dequeue(&rq->cfs, p);
 
 	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+#ifdef CONFIG_SCHED_BORE
+	struct cfs_rq *cfs_rq = &rq->cfs;
+	struct sched_entity *se = &p->se;
+	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+		if (cfs_rq->curr == se)
+			update_curr(cfs_rq_of(&p->se));
+		restart_burst_bore(p);
+	}
+#endif /* CONFIG_SCHED_BORE */
 	if (dequeue_entities(rq, &p->se, flags) < 0)
 		return false;
 
@@ -7646,9 +7737,14 @@ static inline int sched_balance_find_dst_cpu(struct sched_domain *sd, struct tas
 	return new_cpu;
 }
 
+static inline bool is_idle_cpu(int cpu)
+{
+	return available_idle_cpu(cpu) || sched_idle_cpu(cpu);
+}
+
 static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 {
-	if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+	if (is_idle_cpu(cpu) &&
 	    sched_cpu_cookie_match(cpu_rq(cpu), p))
 		return cpu;
 
@@ -7659,6 +7755,24 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 DEFINE_STATIC_KEY_FALSE(sched_smt_present);
 EXPORT_SYMBOL_GPL(sched_smt_present);
 
+/*
+ * Return true if all the CPUs in the SMT core where @cpu belongs are idle,
+ * false otherwise.
+ */
+static bool is_idle_core(int cpu)
+{
+	int sibling;
+
+	if (!sched_smt_active())
+		return is_idle_cpu(cpu);
+
+	for_each_cpu(sibling, cpu_smt_mask(cpu))
+		if (!is_idle_cpu(sibling))
+			return false;
+
+	return true;
+}
+
 static inline void set_idle_cores(int cpu, int val)
 {
 	struct sched_domain_shared *sds;
@@ -7741,29 +7855,6 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 	return -1;
 }
 
-/*
- * Scan the local SMT mask for idle CPUs.
- */
-static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
-{
-	int cpu;
-
-	for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
-		if (cpu == target)
-			continue;
-		/*
-		 * Check if the CPU is in the LLC scheduling domain of @target.
-		 * Due to isolcpus, there is no guarantee that all the siblings are in the domain.
-		 */
-		if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
-			continue;
-		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
-			return cpu;
-	}
-
-	return -1;
-}
-
 #else /* CONFIG_SCHED_SMT */
 
 static inline void set_idle_cores(int cpu, int val)
@@ -7780,9 +7871,9 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
 	return __select_idle_cpu(core, p);
 }
 
-static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+static inline bool is_idle_core(int cpu)
 {
-	return -1;
+	return is_idle_cpu(cpu);
 }
 
 #endif /* CONFIG_SCHED_SMT */
@@ -7879,7 +7970,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
 	for_each_cpu_wrap(cpu, cpus, target) {
 		unsigned long cpu_cap = capacity_of(cpu);
 
-		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
+		if (!is_idle_cpu(cpu))
 			continue;
 
 		fits = util_fits_cpu(task_util, util_min, util_max, cpu);
@@ -7950,7 +8041,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 */
 	lockdep_assert_irqs_disabled();
 
-	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
+	if (is_idle_core(target) &&
 	    asym_fits_cpu(task_util, util_min, util_max, target))
 		return target;
 
@@ -7958,7 +8049,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 * If the previous CPU is cache affine and idle, don't be stupid:
 	 */
 	if (prev != target && cpus_share_cache(prev, target) &&
-	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
+	    is_idle_core(prev) &&
 	    asym_fits_cpu(task_util, util_min, util_max, prev)) {
 
 		if (!static_branch_unlikely(&sched_cluster_active) ||
@@ -7990,7 +8081,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (recent_used_cpu != prev &&
 	    recent_used_cpu != target &&
 	    cpus_share_cache(recent_used_cpu, target) &&
-	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+	    is_idle_core(recent_used_cpu) &&
 	    cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
 	    asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
 
@@ -8026,16 +8117,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (!sd)
 		return target;
 
-	if (sched_smt_active()) {
+	if (sched_smt_active())
 		has_idle_core = test_idle_cores(target);
 
-		if (!has_idle_core && cpus_share_cache(prev, target)) {
-			i = select_idle_smt(p, sd, prev);
-			if ((unsigned int)i < nr_cpumask_bits)
-				return i;
-		}
-	}
-
 	i = select_idle_cpu(p, sd, has_idle_core, target);
 	if ((unsigned)i < nr_cpumask_bits)
 		return i;
@@ -9106,16 +9190,25 @@ static void yield_task_fair(struct rq *rq)
 	/*
 	 * Are we the only task in the tree?
 	 */
+#if !defined(CONFIG_SCHED_BORE)
 	if (unlikely(rq->nr_running == 1))
 		return;
 
 	clear_buddies(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 
 	update_rq_clock(rq);
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
 	update_curr(cfs_rq);
+#ifdef CONFIG_SCHED_BORE
+	restart_burst_rescale_deadline_bore(curr);
+	if (unlikely(rq->nr_running == 1))
+		return;
+
+	clear_buddies(cfs_rq, se);
+#endif /* CONFIG_SCHED_BORE */
 	/*
 	 * Tell update_rq_clock() that we've just updated,
 	 * so we don't do microscopic update in schedule()
@@ -13294,6 +13387,9 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p)
 	SCHED_WARN_ON(p->se.sched_delayed);
 
 	attach_task_cfs_rq(p);
+#ifdef CONFIG_SCHED_BORE
+	reset_task_bore(p);
+#endif /* CONFIG_SCHED_BORE */
 
 	set_task_max_allowed_capacity(p);
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 050d7503064e3a..b81b5a14750e16 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -18,6 +18,9 @@ SCHED_FEAT(PLACE_REL_DEADLINE, true)
  * 0-lag point or until is has exhausted it's slice.
  */
 SCHED_FEAT(RUN_TO_PARITY, true)
+#ifdef CONFIG_SCHED_BORE
+SCHED_FEAT(RUN_TO_PARITY_BORE, false)
+#endif /* CONFIG_SCHED_BORE */
 /*
  * Allow wakeup of tasks with a shorter slice to cancel RESPECT_SLICE for
  * current.
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d79de755c1c269..0610b03a69040f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2084,7 +2084,11 @@ static inline void update_sched_domain_debugfs(void) { }
 static inline void dirty_sched_domain_sysctl(int cpu) { }
 #endif
 
+#ifdef CONFIG_SCHED_BORE
+extern void sched_update_min_base_slice(void);
+#else /* !CONFIG_SCHED_BORE */
 extern int sched_update_scaling(void);
+#endif /* CONFIG_SCHED_BORE */
 
 static inline const struct cpumask *task_user_cpus(struct task_struct *p)
 {
@@ -2797,7 +2801,7 @@ extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
 extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 
-#ifdef CONFIG_PREEMPT_RT
+#if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_ZEN_INTERACTIVE)
 # define SCHED_NR_MIGRATE_BREAK 8
 #else
 # define SCHED_NR_MIGRATE_BREAK 32
@@ -2806,7 +2810,12 @@ extern void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
+#ifdef CONFIG_SCHED_BORE
+extern unsigned int sysctl_sched_min_base_slice;
+extern __read_mostly uint sysctl_sched_base_slice;
+#else /* !CONFIG_SCHED_BORE */
 extern unsigned int sysctl_sched_base_slice;
+#endif /* CONFIG_SCHED_BORE */
 
 #ifdef CONFIG_SCHED_DEBUG
 extern int sysctl_resched_latency_warn_ms;
diff --git a/mm/Kconfig b/mm/Kconfig
index 33fa51d608dc51..2c9579015f885f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -648,7 +648,7 @@ config COMPACTION
 config COMPACT_UNEVICTABLE_DEFAULT
 	int
 	depends on COMPACTION
-	default 0 if PREEMPT_RT
+	default 0 if PREEMPT_RT || ZEN_INTERACTIVE
 	default 1
 
 #
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f94a9d41358555..5aa8937ae70f63 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -65,7 +65,11 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
 	(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
+#ifdef CONFIG_ZEN_INTERACTIVE
+	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG)|
+#else
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
+#endif
 	(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
 	(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
diff --git a/mm/internal.h b/mm/internal.h
index 9e0577413087c2..6df892978f899c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -734,6 +734,7 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
 extern bool free_pages_prepare(struct page *page, unsigned int order);
 
 extern int user_min_free_kbytes;
+extern atomic_long_t kswapd_waiters;
 
 void free_unref_page(struct page *page, unsigned int order);
 void free_unref_folios(struct folio_batch *fbatch);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 882903f42300b8..064a480def7d85 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -88,6 +88,8 @@ typedef int __bitwise fpi_t;
  */
 #define FPI_TO_TAIL		((__force fpi_t)BIT(1))
 
+atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);
+
 /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
 static DEFINE_MUTEX(pcp_batch_high_lock);
 #define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
@@ -271,7 +273,11 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
 
 int min_free_kbytes = 1024;
 int user_min_free_kbytes = -1;
+#ifdef CONFIG_ZEN_INTERACTIVE
+static int watermark_boost_factor __read_mostly;
+#else
 static int watermark_boost_factor __read_mostly = 15000;
+#endif
 static int watermark_scale_factor = 10;
 
 /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
@@ -4255,6 +4261,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned int cpuset_mems_cookie;
 	unsigned int zonelist_iter_cookie;
 	int reserve_flags;
+	bool woke_kswapd = false;
 
 	if (unlikely(nofail)) {
 		/*
@@ -4314,8 +4321,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 			goto nopage;
 	}
 
-	if (alloc_flags & ALLOC_KSWAPD)
+	if (alloc_flags & ALLOC_KSWAPD) {
+		if (!woke_kswapd) {
+			atomic_long_inc(&kswapd_waiters);
+			woke_kswapd = true;
+		}
 		wake_all_kswapds(order, gfp_mask, ac);
+	}
 
 	/*
 	 * The adjusted alloc_flags might result in immediate success, so try
@@ -4525,9 +4537,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto retry;
 	}
 fail:
-	warn_alloc(gfp_mask, ac->nodemask,
-			"page allocation failure: order:%u", order);
 got_pg:
+	if (woke_kswapd)
+		atomic_long_dec(&kswapd_waiters);
+	if (!page)
+		warn_alloc(gfp_mask, ac->nodemask,
+				"page allocation failure: order:%u", order);
 	return page;
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 59f30a981c6f96..bbe49ea7b80edd 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1080,6 +1080,10 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
  */
 void __init swap_setup(void)
 {
+#ifdef CONFIG_ZEN_INTERACTIVE
+	/* Only swap-in pages requested, avoid readahead */
+	page_cluster = 0;
+#else
 	unsigned long megs = totalram_pages() >> (20 - PAGE_SHIFT);
 
 	/* Use a smaller cluster for small-memory machines */
@@ -1091,4 +1095,5 @@ void __init swap_setup(void)
 	 * Right now other parts of the system means that we
 	 * _really_ don't want to cluster much more
 	 */
+#endif
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0eb5d510d4f6b6..7bea4c6e1f520a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4007,7 +4007,11 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
 }
 
 /* to protect the working set of the last N jiffies */
+#ifdef CONFIG_ZEN_INTERACTIVE
+static unsigned long lru_gen_min_ttl __read_mostly = HZ;
+#else
 static unsigned long lru_gen_min_ttl __read_mostly;
+#endif
 
 static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 {
@@ -6364,7 +6368,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 	return 0;
 }
 
-static bool allow_direct_reclaim(pg_data_t *pgdat)
+static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
 {
 	struct zone *zone;
 	unsigned long pfmemalloc_reserve = 0;
@@ -6393,6 +6397,10 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
 
 	wmark_ok = free_pages > pfmemalloc_reserve / 2;
 
+	/* The throttled direct reclaimer is now a kswapd waiter */
+	if (unlikely(!using_kswapd && !wmark_ok))
+		atomic_long_inc(&kswapd_waiters);
+
 	/* kswapd must be awake if processes are being throttled */
 	if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
 		if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
@@ -6458,7 +6466,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
 
 		/* Throttle based on the first usable node */
 		pgdat = zone->zone_pgdat;
-		if (allow_direct_reclaim(pgdat))
+		if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
 			goto out;
 		break;
 	}
@@ -6480,11 +6488,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
 	 */
 	if (!(gfp_mask & __GFP_FS))
 		wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
-			allow_direct_reclaim(pgdat), HZ);
+			allow_direct_reclaim(pgdat, true), HZ);
 	else
 		/* Throttle until kswapd wakes the process */
 		wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
-			allow_direct_reclaim(pgdat));
+			allow_direct_reclaim(pgdat, true));
+
+	if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
+		atomic_long_dec(&kswapd_waiters);
 
 	if (fatal_signal_pending(current))
 		return true;
@@ -6987,14 +6998,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
 		 * able to safely make forward progress. Wake them
 		 */
 		if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
-				allow_direct_reclaim(pgdat))
+				allow_direct_reclaim(pgdat, true))
 			wake_up_all(&pgdat->pfmemalloc_wait);
 
 		/* Check if kswapd should be suspending */
 		__fs_reclaim_release(_THIS_IP_);
 		ret = kthread_freezable_should_stop(&was_frozen);
 		__fs_reclaim_acquire(_THIS_IP_);
-		if (was_frozen || ret)
+		if (was_frozen || ret || !atomic_long_read(&kswapd_waiters))
 			break;
 
 		/*
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9cf769d415687d..d2ae8d2db3c6bc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += devices/error_logs
 TARGETS += devices/probe
 TARGETS += dmabuf-heaps
 TARGETS += drivers/dma-buf
+TARGETS += drivers/ntsync
 TARGETS += drivers/s390x/uvdevice
 TARGETS += drivers/net
 TARGETS += drivers/net/bonding
diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
new file mode 100644
index 00000000000000..848573a3d3eafc
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/.gitignore
@@ -0,0 +1 @@
+ntsync
diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
new file mode 100644
index 00000000000000..dbf2b055c0b287
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/Makefile
@@ -0,0 +1,7 @@
+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
+TEST_GEN_PROGS := ntsync
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
new file mode 100644
index 00000000000000..60539c826d0624
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/config
@@ -0,0 +1 @@
+CONFIG_WINESYNC=y
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
new file mode 100644
index 00000000000000..3aad311574c44e
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Various unit tests for the "ntsync" synchronization primitive driver.
+ *
+ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/ntsync.h>
+#include "../../kselftest_harness.h"
+
+static int read_sem_state(int sem, __u32 *count, __u32 *max)
+{
+	struct ntsync_sem_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
+	*count = args.count;
+	*max = args.max;
+	return ret;
+}
+
+#define check_sem_state(sem, count, max) \
+	({ \
+		__u32 __count, __max; \
+		int ret = read_sem_state((sem), &__count, &__max); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((count), __count); \
+		EXPECT_EQ((max), __max); \
+	})
+
+static int release_sem(int sem, __u32 *count)
+{
+	return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count);
+}
+
+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
+{
+	struct ntsync_mutex_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
+	*count = args.count;
+	*owner = args.owner;
+	return ret;
+}
+
+#define check_mutex_state(mutex, count, owner) \
+	({ \
+		__u32 __count, __owner; \
+		int ret = read_mutex_state((mutex), &__count, &__owner); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((count), __count); \
+		EXPECT_EQ((owner), __owner); \
+	})
+
+static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
+{
+	struct ntsync_mutex_args args;
+	int ret;
+
+	args.owner = owner;
+	args.count = 0xdeadbeef;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
+	*count = args.count;
+	return ret;
+}
+
+static int read_event_state(int event, __u32 *signaled, __u32 *manual)
+{
+	struct ntsync_event_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
+	*signaled = args.signaled;
+	*manual = args.manual;
+	return ret;
+}
+
+#define check_event_state(event, signaled, manual) \
+	({ \
+		__u32 __signaled, __manual; \
+		int ret = read_event_state((event), &__signaled, &__manual); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((signaled), __signaled); \
+		EXPECT_EQ((manual), __manual); \
+	})
+
+static int wait_objs(int fd, unsigned long request, __u32 count,
+		     const int *objs, __u32 owner, int alert, __u32 *index)
+{
+	struct ntsync_wait_args args = {0};
+	struct timespec timeout;
+	int ret;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
+	args.count = count;
+	args.objs = (uintptr_t)objs;
+	args.owner = owner;
+	args.index = 0xdeadbeef;
+	args.alert = alert;
+	ret = ioctl(fd, request, &args);
+	*index = args.index;
+	return ret;
+}
+
+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
+}
+
+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
+}
+
+static int wait_any_alert(int fd, __u32 count, const int *objs,
+			  __u32 owner, int alert, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
+			 count, objs, owner, alert, index);
+}
+
+static int wait_all_alert(int fd, __u32 count, const int *objs,
+			  __u32 owner, int alert, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
+			 count, objs, owner, alert, index);
+}
+
+TEST(semaphore_state)
+{
+	struct ntsync_sem_args sem_args;
+	struct timespec timeout;
+	__u32 count, index;
+	int fd, ret, sem;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 3;
+	sem_args.max = 2;
+	sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_EQ(-1, sem);
+	EXPECT_EQ(EINVAL, errno);
+
+	sem_args.count = 2;
+	sem_args.max = 2;
+	sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, sem);
+	check_sem_state(sem, 2, 2);
+
+	count = 0;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_sem_state(sem, 2, 2);
+
+	count = 1;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 2, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(sem, 1, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(sem, 0, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	count = 3;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 0, 2);
+
+	count = 2;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(sem, 2, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+
+	count = 1;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(sem, 1, 2);
+
+	count = ~0u;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 1, 2);
+
+	close(sem);
+
+	close(fd);
+}
+
+TEST(mutex_state)
+{
+	struct ntsync_mutex_args mutex_args;
+	__u32 owner, count, index;
+	struct timespec timeout;
+	int fd, ret, mutex;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 0;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_EQ(-1, mutex);
+	EXPECT_EQ(EINVAL, errno);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 2;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_EQ(-1, mutex);
+	EXPECT_EQ(EINVAL, errno);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 2;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, 2, 123);
+
+	ret = unlock_mutex(mutex, 0, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	ret = unlock_mutex(mutex, 456, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+	check_mutex_state(mutex, 2, 123);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_mutex_state(mutex, 1, 123);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, count);
+	check_mutex_state(mutex, 0, 0);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+
+	ret = wait_any(fd, 1, &mutex, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 456);
+
+	ret = wait_any(fd, 1, &mutex, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 2, 456);
+
+	ret = unlock_mutex(mutex, 456, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_mutex_state(mutex, 1, 456);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	owner = 0;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	owner = 123;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+	check_mutex_state(mutex, 1, 456);
+
+	owner = 456;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	owner = 123;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	close(mutex);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, 0, 0);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	close(mutex);
+
+	mutex_args.owner = 123;
+	mutex_args.count = ~0u;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, ~0u, 123);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(mutex);
+
+	close(fd);
+}
+
+TEST(manual_event_state)
+{
+	struct ntsync_event_args event_args;
+	__u32 index, signaled;
+	int fd, event, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	event_args.manual = 1;
+	event_args.signaled = 0;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+	check_event_state(event, 0, 1);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 1, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 1, 1);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_event_state(event, 1, 1);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 1);
+
+	close(event);
+
+	close(fd);
+}
+
+TEST(auto_event_state)
+{
+	struct ntsync_event_args event_args;
+	__u32 index, signaled;
+	int fd, event, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	event_args.manual = 0;
+	event_args.signaled = 1;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	check_event_state(event, 1, 0);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 1, 0);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_event_state(event, 0, 0);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 0);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 0);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 0);
+
+	close(event);
+
+	close(fd);
+}
+
+TEST(test_wait_any)
+{
+	int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 owner, index, count, i;
+	struct timespec timeout;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	owner = 123;
+	ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(1, index);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+
+	close(objs[1]);
+
+	/* test waiting on the same object twice */
+
+	count = 2;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	objs[1] = objs[0];
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+
+	ret = wait_any(fd, 0, NULL, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
+		objs[i] = objs[0];
+
+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	ret = wait_any(fd, -1, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	close(objs[0]);
+
+	close(fd);
+}
+
+TEST(test_wait_all)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 owner, index, count;
+	int objs[2], fd, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_all(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	count = 3;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 2, 3);
+	check_mutex_state(objs[1], 3, 123);
+
+	owner = 123;
+	ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	close(objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_event_state(objs[1], 1, 1);
+
+	close(objs[1]);
+
+	/* test waiting on the same object twice */
+	objs[1] = objs[0];
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	close(objs[0]);
+
+	close(fd);
+}
+
+struct wake_args {
+	int fd;
+	int obj;
+};
+
+struct wait_args {
+	int fd;
+	unsigned long request;
+	struct ntsync_wait_args *args;
+	int ret;
+	int err;
+};
+
+static void *wait_thread(void *arg)
+{
+	struct wait_args *args = arg;
+
+	args->ret = ioctl(args->fd, args->request, args->args);
+	args->err = errno;
+	return NULL;
+}
+
+static __u64 get_abs_timeout(unsigned int ms)
+{
+	struct timespec timeout;
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+	return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
+}
+
+static int wait_for_thread(pthread_t thread, unsigned int ms)
+{
+	struct timespec timeout;
+
+	clock_gettime(CLOCK_REALTIME, &timeout);
+	timeout.tv_nsec += ms * 1000000;
+	timeout.tv_sec += (timeout.tv_nsec / 1000000000);
+	timeout.tv_nsec %= 1000000000;
+	return pthread_timedjoin_np(thread, NULL, &timeout);
+}
+
+TEST(wake_any)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 count, index, signaled;
+	int objs[2], fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 1;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	/* test waking the semaphore */
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 456;
+	wait_args.index = 0xdeadbeef;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(objs[0], 0, 3);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(0, wait_args.index);
+
+	/* test waking the mutex */
+
+	/* first grab it again for owner 123 */
+	ret = wait_any(fd, 1, &objs[1], 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.owner = 456;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, mutex_args.count);
+	check_mutex_state(objs[1], 1, 456);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	close(objs[1]);
+
+	/* test waking events */
+
+	event_args.manual = false;
+	event_args.signaled = false;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	close(objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = false;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 1, 1);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 1);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	/* delete an object while it's being waited on */
+
+	wait_args.timeout = get_abs_timeout(200);
+	wait_args.owner = 123;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	ret = wait_for_thread(thread, 200);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(-1, thread_args.ret);
+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+	close(fd);
+}
+
+TEST(wake_all)
+{
+	struct ntsync_event_args manual_event_args = {0};
+	struct ntsync_event_args auto_event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 count, index, signaled;
+	int objs[4], fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 1;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	manual_event_args.manual = true;
+	manual_event_args.signaled = true;
+	objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
+	EXPECT_LE(0, objs[2]);
+
+	auto_event_args.manual = false;
+	auto_event_args.signaled = true;
+	objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
+	EXPECT_EQ(0, objs[3]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 4;
+	wait_args.owner = 456;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	check_sem_state(objs[0], 1, 3);
+
+	ret = wait_any(fd, 1, &objs[0], 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	count = 2;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(objs[0], 2, 3);
+
+	ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 456);
+	check_event_state(objs[2], 1, 1);
+	check_event_state(objs[3], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+
+	/* delete an object while it's being waited on */
+
+	wait_args.timeout = get_abs_timeout(200);
+	wait_args.owner = 123;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	close(objs[0]);
+	close(objs[1]);
+	close(objs[2]);
+	close(objs[3]);
+
+	ret = wait_for_thread(thread, 200);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(-1, thread_args.ret);
+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+	close(fd);
+}
+
+TEST(alert_any)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 index, count, signaled;
+	struct wait_args thread_args;
+	int objs[2], event, fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 2;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	sem_args.count = 1;
+	sem_args.max = 2;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	/* test wakeup via alert */
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 123;
+	wait_args.index = 0xdeadbeef;
+	wait_args.alert = event;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(2, wait_args.index);
+
+	close(event);
+
+	/* test with an auto-reset event */
+
+	event_args.manual = false;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(event);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	close(fd);
+}
+
+TEST(alert_all)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 index, count, signaled;
+	int objs[2], event, fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 2;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	sem_args.count = 1;
+	sem_args.max = 2;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	/* test wakeup via alert */
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 123;
+	wait_args.index = 0xdeadbeef;
+	wait_args.alert = event;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(2, wait_args.index);
+
+	close(event);
+
+	/* test with an auto-reset event */
+
+	event_args.manual = false;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	count = 2;
+	ret = release_sem(objs[1], &count);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(event);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	close(fd);
+}
+
+#define STRESS_LOOPS 10000
+#define STRESS_THREADS 4
+
+static unsigned int stress_counter;
+static int stress_device, stress_start_event, stress_mutex;
+
+static void *stress_thread(void *arg)
+{
+	struct ntsync_wait_args wait_args = {0};
+	__u32 index, count, i;
+	int ret;
+
+	wait_args.timeout = UINT64_MAX;
+	wait_args.count = 1;
+	wait_args.objs = (uintptr_t)&stress_start_event;
+	wait_args.owner = gettid();
+	wait_args.index = 0xdeadbeef;
+
+	ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+	wait_args.objs = (uintptr_t)&stress_mutex;
+
+	for (i = 0; i < STRESS_LOOPS; ++i) {
+		ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+		++stress_counter;
+
+		unlock_mutex(stress_mutex, wait_args.owner, &count);
+	}
+
+	return NULL;
+}
+
+TEST(stress_wait)
+{
+	struct ntsync_event_args event_args;
+	struct ntsync_mutex_args mutex_args;
+	pthread_t threads[STRESS_THREADS];
+	__u32 signaled, i;
+	int ret;
+
+	stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, stress_device);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, stress_mutex);
+
+	event_args.manual = 1;
+	event_args.signaled = 0;
+	stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, stress_start_event);
+
+	for (i = 0; i < STRESS_THREADS; ++i)
+		pthread_create(&threads[i], NULL, stress_thread, NULL);
+
+	ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	for (i = 0; i < STRESS_THREADS; ++i) {
+		ret = pthread_join(threads[i], NULL);
+		EXPECT_EQ(0, ret);
+	}
+
+	EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
+
+	close(stress_start_event);
+	close(stress_mutex);
+	close(stress_device);
+}
+
+TEST_HARNESS_MAIN