diff --git a/disaggregated_memory/boards/arm_dm_board.py b/disaggregated_memory/boards/arm_dm_board.py
new file mode 100644
index 0000000000..fb965d51b2
--- /dev/null
+++ b/disaggregated_memory/boards/arm_dm_board.py
@@ -0,0 +1,284 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import (
+ Port,
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ ArmSystem,
+)
+
+from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation
+from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease
+from m5.objects.ArmFsWorkload import ArmFsLinux
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+import os
+import m5
+from abc import ABCMeta
+from gem5.components.boards.arm_board import ArmBoard
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+
+class ArmAbstractDMBoard(ArmBoard):
+ """
+ A high-level ARM board that can zNUMA-capable systems with a remote
+ memories. This board is extended from the ArmBoard from Gem5 standard
+ library. This board assumes that you will be booting Linux. This board can
+ be used to do disaggregated ARM system research while accelerating the
+ simulation using kvm.
+
+ **Limitations**
+ * kvm is only supported in a gem5-only setup.
+ """
+
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ cache_hierarchy: AbstractCacheHierarchy,
+ remote_memory_addr_range: AddrRange,
+ platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
+ release: ArmRelease = ArmDefaultRelease(),
+ ) -> None:
+ # The structure of this board is similar to the RISCV DM board.
+ self._localMemory = local_memory
+ # remote_memory can either be an interface or an external memory
+ # This abstract disaggregated memory does not know what this type of
+ # memory is. it only needs to know the address range for this memory.
+ # from this range, we'll figure out the size.
+ self._remoteMemoryAddrRange = remote_memory_addr_range
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ memory=local_memory,
+ cache_hierarchy=cache_hierarchy,
+ platform=platform,
+ release=release,
+ )
+ self.local_memory = local_memory
+
+ @overrides(ArmBoard)
+ def get_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+
+ :returns: The memory system.
+ """
+ raise NotImplementedError
+
+ def get_local_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The local memory system.
+ """
+ return self._localMemory
+
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ This has to be implemeted by the child class as we don't know if
+ this board is simulating Gem5 memory or some external simulator
+ memory.
+ :returns: The remote memory system.
+ """
+ raise NotImplementedError
+
+ def get_remote_memory_size(self) -> "str":
+ """Get the remote memory size to setup the NUMA nodes."""
+ return self._remoteMemoryAddrRange.size()
+
+ @overrides(ArmBoard)
+ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_local_memory().get_mem_ports()
+
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ """Get the memory (RAM) ports connected to the board.
+ This has to be implemeted by the child class as we don't know if
+ this board is simulating Gem5 memory or some external simulator
+ memory.
+ :returns: A tuple of mem_ports.
+ """
+ raise NotImplementedError
+
+ def get_remote_memory_addr_range(self):
+ raise NotImplementedError
+ return self._remote_memory_range
+
+ @overrides(ArmBoard)
+ def _setup_board(self) -> None:
+
+ # This board is expected to run full-system simulation.
+ # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py`
+ self.workload = ArmFsLinux()
+
+ # We are fixing the following variable for the ArmSystem to work. The
+ # security extension is checked while generating the dtb file in
+ # realview. This board does not have security extension enabled.
+ self._have_psci = False
+
+ # highest_el_is_64 is set to True. True if the register width of the
+ # highest implemented exception level is 64 bits.
+ self.highest_el_is_64 = True
+
+ # Setting up the voltage and the clock domain here for the ARM board.
+ # The ArmSystem/RealView expects voltage_domain to be a parameter.
+ # The voltage and the clock frequency are taken from the devices.py
+ # file from configs/example/arm. We set the clock to the same frequency
+ # as the user specified in the config script.
+ self.voltage_domain = VoltageDomain(voltage="1.0V")
+ self.clk_domain = SrcClockDomain(
+ clock=self._clk_freq, voltage_domain=self.voltage_domain
+ )
+
+ # The ARM board supports both Terminal and VncServer.
+ self.terminal = Terminal()
+ self.vncserver = VncServer()
+
+ # Incoherent I/O Bus
+ self.iobus = IOXBar()
+ self.iobus.badaddr_responder = BadAddr()
+ self.iobus.default = self.iobus.badaddr_responder.pio
+
+ # We now need to setup the dma_ports.
+ self._dma_ports = None
+
+ # RealView sets up most of the on-chip and off-chip devices and GIC
+ # for the ARM board. These devices' information is also used to
+ # generate the dtb file. We then connect the I/O devices to the
+ # I/O bus.
+ self._setup_io_devices()
+
+ # Once the realview is setup, we can continue setting up the memory
+ # ranges. ArmBoard's memory can only be setup once realview is
+ # initialized.
+ local_memory = self.get_local_memory()
+ mem_size = local_memory.get_size()
+
+ # The following code is taken from configs/example/arm/devices.py. It
+ # sets up all the memory ranges for the board.
+ self.mem_ranges = []
+ success = False
+ # self.mem_ranges.append(self.get_remote_memory_addr_range())
+ for mem_range in self.realview._mem_regions:
+ size_in_range = min(mem_size, mem_range.size())
+ self.mem_ranges.append(
+ AddrRange(start=mem_range.start, size=size_in_range)
+ )
+
+ mem_size -= size_in_range
+ if mem_size == 0:
+ success = True
+ break
+
+ if success:
+ local_memory.set_memory_range(self.mem_ranges)
+ else:
+ raise ValueError("Memory size too big for platform capabilities")
+ # At the end of the local_memory, append the remote memory range.
+ self.mem_ranges.append(self._remoteMemoryAddrRange)
+
+ # The PCI Devices. PCI devices can be added via the `_add_pci_device`
+ # function.
+ self._pci_devices = []
+
+ # set remtoe memory in the child board
+ self._set_remote_memory_ranges()
+
+ def _set_remote_memory_ranges(self):
+ raise NotImplementedError
+
+ @overrides(ArmSystem)
+ def generateDeviceTree(self, state):
+ # Generate a device tree root node for the system by creating the root
+ # node and adding the generated subnodes of all children.
+ # When a child needs to add multiple nodes, this is done by also
+ # creating a node called '/' which will then be merged with the
+ # root instead of appended.
+
+ def generateMemNode(numa_node_id, mem_range):
+ node = FdtNode(f"memory@{int(mem_range.start):x}")
+ node.append(FdtPropertyStrings("device_type", ["memory"]))
+ node.append(
+ FdtPropertyWords(
+ "reg",
+ state.addrCells(mem_range.start)
+ + state.sizeCells(mem_range.size()),
+ )
+ )
+ node.append(FdtPropertyWords("numa-node-id", [numa_node_id]))
+ return node
+
+ root = FdtNode("/")
+ root.append(state.addrCellsProperty())
+ root.append(state.sizeCellsProperty())
+
+ # Add memory nodes
+ for mem_range in self.mem_ranges:
+ root.append(generateMemNode(0, mem_range))
+ root.append(generateMemNode(1, self._remoteMemoryAddrRange))
+
+ for node in self.recurseDeviceTree(state):
+ # Merge root nodes instead of adding them (for children
+ # that need to add multiple root level nodes)
+ if node.get_name() == root.get_name():
+ root.merge(node)
+ else:
+ root.append(node)
+
+ return root
+
+ @overrides(ArmBoard)
+ def get_default_kernel_args(self) -> List[str]:
+
+ # The default kernel string is taken from the devices.py file.
+ return [
+ "console=ttyAMA0",
+ "lpj=19988480",
+ "norandmaps",
+ "root={root_value}",
+ "rw",
+ ]
diff --git a/disaggregated_memory/boards/arm_gem5_board.py b/disaggregated_memory/boards/arm_gem5_board.py
new file mode 100644
index 0000000000..1323256f6f
--- /dev/null
+++ b/disaggregated_memory/boards/arm_gem5_board.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# import os
+# import sys
+
+# # all the source files are one directory above.
+# sys.path.append(
+# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+# )
+
+from m5.objects import (
+ Port,
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ ArmSystem,
+)
+
+from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation
+from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease
+from m5.objects.ArmFsWorkload import ArmFsLinux
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+import os
+import m5
+from abc import ABCMeta
+
+from memories.remote_memory import RemoteChanneledMemory
+from boards.arm_dm_board import ArmAbstractDMBoard
+
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+
+class ArmGem5DMBoard(ArmAbstractDMBoard):
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: AbstractMemorySystem,
+ cache_hierarchy: AbstractCacheHierarchy,
+ remote_memory_addr_range: AddrRange = None,
+ platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
+ release: ArmRelease = ArmDefaultRelease(),
+ ) -> None:
+ self._localMemory = local_memory
+ self._remoteMemory = remote_memory
+ # If the remote_memory_addr_range is not provided, we'll assume that
+ # it starts at 0x80000000 + local_memory_size and ends at it's own size
+ if remote_memory_addr_range is None:
+ remote_memory_addr_range = AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=remote_memory_addr_range,
+ cache_hierarchy=cache_hierarchy,
+ platform=platform,
+ release=release,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = remote_memory
+
+ @overrides(ArmAbstractDMBoard)
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ return self._remoteMemory
+
+ @overrides(ArmAbstractDMBoard)
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_remote_memory().get_mem_ports()
+
+ @overrides(ArmAbstractDMBoard)
+ def _set_remote_memory_ranges(self):
+ self.get_remote_memory().set_memory_range(
+ [self._remoteMemoryAddrRange]
+ )
+
+ @overrides(ArmAbstractDMBoard)
+ def get_default_kernel_args(self) -> List[str]:
+
+ # The default kernel string is taken from the devices.py file.
+ return [
+ "console=ttyAMA0",
+ "lpj=19988480",
+ "norandmaps",
+ "root={root_value}",
+ "rw",
+ "init=/root/gem5-init.sh",
+ ]
+
+ @overrides(ArmAbstractDMBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ self.get_remote_memory().incorporate_memory(self)
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+ # need to connect the remote links to the board.
+ if self.get_cache_hierarchy().is_ruby():
+ fatal(
+ "remote memory is only supported in classic caches at " +
+ "the moment!")
+ if isinstance(self.get_remote_memory(), RemoteChanneledMemory):
+ for ports in self.get_remote_memory().remote_links:
+ self.get_cache_hierarchy().membus.mem_side_ports = \
+ ports.cpu_side_ports
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(ArmAbstractDMBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
+ self.get_remote_memory()._post_instantiate()
diff --git a/disaggregated_memory/boards/arm_sst_board.py b/disaggregated_memory/boards/arm_sst_board.py
new file mode 100644
index 0000000000..1e60803ba8
--- /dev/null
+++ b/disaggregated_memory/boards/arm_sst_board.py
@@ -0,0 +1,239 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# import os
+# import sys
+
+# # all the source files are one directory above.
+# sys.path.append(
+# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+# )
+
+from m5.objects import (
+ Port,
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ NoncoherentXBar,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ ArmSystem,
+)
+
+from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation
+from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease
+from m5.objects.ArmFsWorkload import ArmFsLinux
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+import os
+import m5
+from abc import ABCMeta
+
+from memories.remote_memory import RemoteChanneledMemory
+from boards.arm_dm_board import ArmAbstractDMBoard
+
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+
+class ArmSstDMBoard(ArmAbstractDMBoard):
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: "ExternalRemoteMemoryInterface",
+ cache_hierarchy: AbstractCacheHierarchy,
+ platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(),
+ release: ArmRelease = ArmDefaultRelease(),
+ ) -> None:
+ self._localMemory = local_memory
+ # Since the remote memory is defined in SST's side, we only need the
+ # size of this memory while setting up stuff from Gem5's side.
+ self._remoteMemory = remote_memory
+ # The remote memory is either setup with a size or an address range.
+ # We need to determine if the address range is set. if not, then we
+ # need to find the starting and ending of the the external memory
+ # range.
+ if not self._remoteMemory.get_set_using_addr_ranges():
+ # Address ranges were not set, but the system knows the size
+ # If the remote_memory_addr_range is not provided, we'll assume
+ # that it starts at 0x80000000 + local_memory_size and ends at it's
+ # own size
+ self._remoteMemory.remote_memory.physical_address_ranges = [
+ AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ ]
+ # We need a size as a string to setup this memory.
+ self._remoteMemorySize = self._remoteMemory.get_size()
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[
+ 0
+ ],
+ cache_hierarchy=cache_hierarchy,
+ platform=platform,
+ release=release,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = self._remoteMemory.remote_memory
+
+ @overrides(ArmAbstractDMBoard)
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ return self._remoteMemory
+
+ @overrides(ArmAbstractDMBoard)
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return [
+ (
+ self.get_remote_memory().physical_address_ranges,
+ self.get_remote_memory().port,
+ )
+ ]
+
+ @overrides(ArmAbstractDMBoard)
+ def _set_remote_memory_ranges(self):
+ pass
+ # self.get_remote_memory().set_memory_range(
+ # [self._remoteMemoryAddrRange]
+ # )
+
+ @overrides(ArmAbstractDMBoard)
+ def get_default_kernel_args(self) -> List[str]:
+
+ # The default kernel string is taken from the devices.py file.
+ return [
+ "console=ttyAMA0",
+ "lpj=19988480",
+ "norandmaps",
+ "root={root_value}",
+ "rw",
+ "init=/root/gem5-init.sh",
+ ]
+
+ @overrides(ArmAbstractDMBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ # we need to find whether there is any external latency. if yes, then
+ # add xbar to add this latency.
+
+ if self.get_remote_memory().is_xbar_required():
+ self.remote_link = NoncoherentXBar(
+ frontend_latency=0,
+ forward_latency=0,
+ response_latency=self.get_remote_memory()._remote_memory_latency,
+ width=64,
+ )
+ # connect the remote memory port to the remote link
+ self.get_remote_memory().remote_memory.port = (
+ self.remote_link.mem_side_ports
+ )
+ # The remote link is then connected to the membus
+ self.get_cache_hierarchy().membus.mem_side_ports = (
+ self.remote_link.cpu_side_ports
+ )
+ else:
+ # Connect the external memory directly to the motherboard.
+ self.get_remote_memory().remote_memory.port = (
+ self.get_cache_hierarchy().membus.mem_side_ports
+ )
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+ # need to connect the remote links to the board.
+ if self.get_cache_hierarchy().is_ruby():
+ fatal(
+ "remote memory is only supported in classic caches at " +
+ "the moment!")
+ if isinstance(self.get_remote_memory(), RemoteChanneledMemory):
+ for ports in self.get_remote_memory().remote_links:
+ self.get_cache_hierarchy().membus.mem_side_ports = \
+ ports.cpu_side_ports
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(ArmAbstractDMBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
diff --git a/disaggregated_memory/boards/riscv_dm_board.py b/disaggregated_memory/boards/riscv_dm_board.py
new file mode 100644
index 0000000000..f9660748e8
--- /dev/null
+++ b/disaggregated_memory/boards/riscv_dm_board.py
@@ -0,0 +1,482 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from typing import List, Optional, Sequence, Tuple
+
+from gem5.components.boards.riscv_board import RiscvBoard
+
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.utils.override import overrides
+from gem5.resources.resource import AbstractResource
+from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload
+from gem5.components.boards.abstract_system_board import AbstractSystemBoard
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+
+from gem5.isas import ISA
+
+import m5
+
+from m5.objects import AddrRange, HiFive, Frequency, Port
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+
+class RiscvAbstractDMBoard(RiscvBoard):
+ """
+ A high-level RISCV board that can zNUMA-capable systems with a remote
+ memories. This board is extended from the RiscvBoard from Gem5 standard
+ library. At a high-level, this is based on the HiFive Unmatched board from
+ SiFive. This board assumes that you will be booting Linux.
+
+ **Limitations**
+ * There is only one Plic and Clint controller supported by this board,
+ which make this board only capable of simulating zNUMA nodes and not
+ full-fledged NUMA nodes.
+ """
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory_addr_range: AddrRange,
+ cache_hierarchy: AbstractCacheHierarchy,
+ ) -> None:
+ self._localMemory = local_memory
+ # remote_memory can either be an interface or an external memory
+ # This abstract disaggregated memory does not know what this type of
+ # memory is. it only needs to know the address range for this memory.
+ # from this range, we'll figure out the size.
+ self._remoteMemoryAddrRange = remote_memory_addr_range
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ memory=local_memory,
+ cache_hierarchy=cache_hierarchy,
+ )
+ self.local_memory = local_memory
+
+ if processor.get_isa() != ISA.RISCV:
+ raise Exception(
+ "The RISCVBoard requires a processor using the"
+ "RISCV ISA. Current processor ISA: "
+ f"'{processor.get_isa().name}'."
+ )
+
+ @overrides(AbstractSystemBoard)
+ def get_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+
+ :returns: The memory system.
+ """
+ raise NotImplementedError
+
+ def get_local_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The local memory system.
+ """
+ return self._localMemory
+
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ This has to be implemeted by the child class as we don't know if
+ this board is simulating Gem5 memory or some external simulator
+ memory.
+ :returns: The remote memory system.
+ """
+ raise NotImplementedError
+
+ def get_remote_memory_size(self) -> "str":
+ """Get the remote memory size to setup the NUMA nodes."""
+ return self._remoteMemoryAddrRange.size()
+
+ @overrides(AbstractSystemBoard)
+ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_local_memory().get_mem_ports()
+
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ """Get the memory (RAM) ports connected to the board.
+ This has to be implemeted by the child class as we don't know if
+ this board is simulating Gem5 memory or some external simulator
+ memory.
+ :returns: A tuple of mem_ports.
+ """
+ raise NotImplementedError
+
+ @overrides(AbstractSystemBoard)
+ def _setup_memory_ranges(self):
+ # the memory has to be setup for both the memory ranges. there is one
+ # local memory range, close to the host machine and the other range is
+ # pure memory, far from the host.
+ local_memory = self.get_local_memory()
+ # remote_memory = self.get_remote_memory_size()
+
+ local_mem_size = local_memory.get_size()
+ remote_mem_size = self.get_remote_memory_size()
+
+ # local memory range will always start from 0x80000000. The remote
+ # memory can start and end anywhere as long as it is consistent
+ # with the dtb.
+ self._local_mem_ranges = [
+ AddrRange(start=0x80000000, size=local_mem_size)
+ ]
+
+ # The remote memory starts anywhere after the local memory ends. We
+ # rely on the user to start and end this range.
+ self._remote_mem_ranges = [self._remoteMemoryAddrRange]
+
+ # using a _global_ memory range to keep a track of all the memory
+ # ranges. This is used to generate the dtb for this machine
+ self._global_mem_ranges = []
+ self._global_mem_ranges.append(self._local_mem_ranges[0])
+ self._global_mem_ranges.append(self._remote_mem_ranges[0])
+
+ # setting the memory ranges for both of the memory ranges. we cannot
+ # incorporate the memory at using this abstract board.
+
+ self._incorporate_memory_range()
+
+ def _incorporate_memory_range(self):
+ """
+ The child board only can incorporate this memory range"""
+
+ raise NotImplementedError(
+ "Cannot incorporte the memory using an Abstract-like board."
+ )
+
+ @overrides(RiscvBoard)
+ def generate_device_tree(self, outdir: str) -> None:
+ """Creates the dtb and dts files.
+ Creates two files in the outdir: 'device.dtb' and 'device.dts'
+ :param outdir: Directory to output the files
+ """
+ state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1)
+ root = FdtNode("/")
+ root.append(state.addrCellsProperty())
+ root.append(state.sizeCellsProperty())
+ root.appendCompatible(["riscv-virtio"])
+
+ for idx, mem_range in enumerate(self._global_mem_ranges):
+ node = FdtNode("memory@%x" % int(mem_range.start))
+ node.append(FdtPropertyStrings("device_type", ["memory"]))
+ node.append(
+ FdtPropertyWords(
+ "reg",
+ state.addrCells(mem_range.start)
+ + state.sizeCells(mem_range.size()),
+ )
+ )
+ # adding the NUMA node information so that the OS can identify all
+ # the NUMA ranges.
+ node.append(FdtPropertyWords("numa-node-id", [idx]))
+ root.append(node)
+
+ # See Documentation/devicetree/bindings/riscv/cpus.txt for details.
+ cpus_node = FdtNode("cpus")
+ cpus_state = FdtState(addr_cells=1, size_cells=0)
+ cpus_node.append(cpus_state.addrCellsProperty())
+ cpus_node.append(cpus_state.sizeCellsProperty())
+ # Used by the CLINT driver to set the timer frequency. Value taken from
+ # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz)
+ cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000]))
+
+ for i, core in enumerate(self.get_processor().get_cores()):
+ node = FdtNode(f"cpu@{i}")
+ node.append(FdtPropertyStrings("device_type", "cpu"))
+ node.append(FdtPropertyWords("reg", state.CPUAddrCells(i)))
+ # The CPUs are also associated to the NUMA nodes. All the CPUs are
+ # bound to the first NUMA node.
+ node.append(FdtPropertyWords("numa-node-id", [0]))
+ node.append(FdtPropertyStrings("mmu-type", "riscv,sv48"))
+ node.append(FdtPropertyStrings("status", "okay"))
+ node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc"))
+ # TODO: Should probably get this from the core.
+ freq = self.clk_domain.clock[0].frequency
+ node.append(FdtPropertyWords("clock-frequency", freq))
+ node.appendCompatible(["riscv"])
+ int_phandle = state.phandle(f"cpu@{i}.int_state")
+ node.appendPhandle(f"cpu@{i}")
+
+ int_node = FdtNode("interrupt-controller")
+ int_state = FdtState(interrupt_cells=1)
+ int_phandle = int_state.phandle(f"cpu@{i}.int_state")
+ int_node.append(int_state.interruptCellsProperty())
+ int_node.append(FdtProperty("interrupt-controller"))
+ int_node.appendCompatible("riscv,cpu-intc")
+ int_node.append(FdtPropertyWords("phandle", [int_phandle]))
+
+ node.append(int_node)
+ cpus_node.append(node)
+
+ root.append(cpus_node)
+
+ soc_node = FdtNode("soc")
+ soc_state = FdtState(addr_cells=2, size_cells=2)
+ soc_node.append(soc_state.addrCellsProperty())
+ soc_node.append(soc_state.sizeCellsProperty())
+ soc_node.append(FdtProperty("ranges"))
+ soc_node.appendCompatible(["simple-bus"])
+
+ # CLINT node
+ clint = self.platform.clint
+ clint_node = clint.generateBasicPioDeviceNode(
+ soc_state, "clint", clint.pio_addr, clint.pio_size
+ )
+ int_extended = list()
+ for i, core in enumerate(self.get_processor().get_cores()):
+ phandle = soc_state.phandle(f"cpu@{i}.int_state")
+ int_extended.append(phandle)
+ int_extended.append(0x3)
+ int_extended.append(phandle)
+ int_extended.append(0x7)
+ clint_node.append(
+ FdtPropertyWords("interrupts-extended", int_extended)
+ )
+ # NUMA information is also associated with the CLINT controller.
+ # In this board, the objective to associate one NUMA node to the CPUs
+ # and the other node with no CPUs. To generalize this, an additional
+ # CLINT controller has to be created on this board, which will make it
+ # completely NUMA, instead of just disaggregated NUMA-like board.
+ clint_node.append(FdtPropertyWords("numa-node-id", [0]))
+ clint_node.appendCompatible(["riscv,clint0"])
+ soc_node.append(clint_node)
+
+ # PLIC node
+ plic = self.platform.plic
+ plic_node = plic.generateBasicPioDeviceNode(
+ soc_state, "plic", plic.pio_addr, plic.pio_size
+ )
+
+ int_state = FdtState(addr_cells=0, interrupt_cells=1)
+ plic_node.append(int_state.addrCellsProperty())
+ plic_node.append(int_state.interruptCellsProperty())
+
+ phandle = int_state.phandle(plic)
+ plic_node.append(FdtPropertyWords("phandle", [phandle]))
+ # Similar to the CLINT interrupt controller, another PLIC controller is
+ # required to make this board a general NUMA like board.
+ plic_node.append(FdtPropertyWords("numa-node-id", [0]))
+ plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1]))
+
+ int_extended = list()
+ for i, core in enumerate(self.get_processor().get_cores()):
+ phandle = state.phandle(f"cpu@{i}.int_state")
+ int_extended.append(phandle)
+ int_extended.append(0xB)
+ int_extended.append(phandle)
+ int_extended.append(0x9)
+
+ plic_node.append(FdtPropertyWords("interrupts-extended", int_extended))
+ plic_node.append(FdtProperty("interrupt-controller"))
+ plic_node.appendCompatible(["riscv,plic0"])
+
+ soc_node.append(plic_node)
+
+ # PCI
+ pci_state = FdtState(
+ addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1
+ )
+ pci_node = FdtNode("pci")
+
+ if int(self.platform.pci_host.conf_device_bits) == 8:
+ pci_node.appendCompatible("pci-host-cam-generic")
+ elif int(self.platform.pci_host.conf_device_bits) == 12:
+ pci_node.appendCompatible("pci-host-ecam-generic")
+ else:
+ m5.fatal("No compatibility string for the set conf_device_width")
+
+ pci_node.append(FdtPropertyStrings("device_type", ["pci"]))
+
+ # Cell sizes of child nodes/peripherals
+ pci_node.append(pci_state.addrCellsProperty())
+ pci_node.append(pci_state.sizeCellsProperty())
+ pci_node.append(pci_state.interruptCellsProperty())
+ # PCI address for CPU
+ pci_node.append(
+ FdtPropertyWords(
+ "reg",
+ soc_state.addrCells(self.platform.pci_host.conf_base)
+ + soc_state.sizeCells(self.platform.pci_host.conf_size),
+ )
+ )
+
+ # Ranges mapping
+ # For now some of this is hard coded, because the PCI module does not
+ # have a proper full understanding of the memory map, but adapting the
+ # PCI module is beyond the scope of what I'm trying to do here.
+ # Values are taken from the ARM VExpress_GEM5_V1 platform.
+ ranges = []
+ # Pio address range
+ ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0)
+ ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base)
+ ranges += pci_state.sizeCells(0x10000) # Fixed size
+
+ # AXI memory address range
+ ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0)
+ ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base)
+ ranges += pci_state.sizeCells(0x40000000) # Fixed size
+ pci_node.append(FdtPropertyWords("ranges", ranges))
+
+ # Interrupt mapping
+ plic_handle = int_state.phandle(plic)
+ int_base = self.platform.pci_host.int_base
+
+ interrupts = []
+
+ for i in range(int(self.platform.pci_host.int_count)):
+ interrupts += self.platform.pci_host.pciFdtAddr(
+ device=i, addr=0
+ ) + [int(i) + 1, plic_handle, int(int_base) + i]
+
+ pci_node.append(FdtPropertyWords("interrupt-map", interrupts))
+
+ int_count = int(self.platform.pci_host.int_count)
+ if int_count & (int_count - 1):
+ fatal("PCI interrupt count should be power of 2")
+
+ intmask = self.platform.pci_host.pciFdtAddr(
+ device=int_count - 1, addr=0
+ ) + [0x0]
+ pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask))
+
+ if self.platform.pci_host._dma_coherent:
+ pci_node.append(FdtProperty("dma-coherent"))
+
+ soc_node.append(pci_node)
+
+ # UART node
+ uart = self.platform.uart
+ uart_node = uart.generateBasicPioDeviceNode(
+ soc_state, "uart", uart.pio_addr, uart.pio_size
+ )
+ uart_node.append(
+ FdtPropertyWords("interrupts", [self.platform.uart_int_id])
+ )
+ uart_node.append(FdtPropertyWords("clock-frequency", [0x384000]))
+ uart_node.append(
+ FdtPropertyWords("interrupt-parent", soc_state.phandle(plic))
+ )
+ uart_node.appendCompatible(["ns8250"])
+ soc_node.append(uart_node)
+
+ # VirtIO MMIO disk node
+ disk = self.disk
+ disk_node = disk.generateBasicPioDeviceNode(
+ soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size
+ )
+ disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id]))
+ disk_node.append(
+ FdtPropertyWords("interrupt-parent", soc_state.phandle(plic))
+ )
+ disk_node.appendCompatible(["virtio,mmio"])
+ soc_node.append(disk_node)
+
+ # VirtIO MMIO rng node
+ rng = self.rng
+ rng_node = rng.generateBasicPioDeviceNode(
+ soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size
+ )
+ rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id]))
+ rng_node.append(
+ FdtPropertyWords("interrupt-parent", soc_state.phandle(plic))
+ )
+ rng_node.appendCompatible(["virtio,mmio"])
+ soc_node.append(rng_node)
+
+ root.append(soc_node)
+
+ fdt = Fdt()
+ fdt.add_rootnode(root)
+ fdt.writeDtsFile(os.path.join(outdir, "device.dts"))
+ fdt.writeDtbFile(os.path.join(outdir, "device.dtb"))
+
+ @overrides(KernelDiskWorkload)
+ def get_default_kernel_args(self) -> List[str]:
+ # return ["console=ttyS0", "root={root_value}",
+ # "init=/root/gem5_init.sh", "rw"]
+ return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"]
+
+ # @overrides(AbstractBoard)
+ # def _connect_things(self) -> None:
+ # """Connects all the components to the board.
+
+ # The order of this board is always:
+
+ # 1. Connect the memory.
+ # 2. Connect the cache hierarchy.
+ # 3. Connect the processor.
+
+ # Developers may build upon this assumption when creating components.
+
+ # Notes
+ # -----
+
+ # * The processor is incorporated after the cache hierarchy due to a bug
+ # noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ # bug is fixed, this ordering must be maintained.
+ # * Once this function is called `_connect_things_called` *must* be set
+ # to `True`.
+ # """
+
+ # if self._connect_things_called:
+ # raise Exception(
+ # "The `_connect_things` function has already been called."
+ # )
+
+ # # Incorporate the memory into the motherboard.
+ # self.get_local_memory().incorporate_memory(self)
+ # self.get_remote_memory().incorporate_memory(self)
+
+ # # Incorporate the cache hierarchy for the motherboard.
+ # if self.get_cache_hierarchy():
+ # self.get_cache_hierarchy().incorporate_cache(self)
+
+ # # Incorporate the processor into the motherboard.
+ # self.get_processor().incorporate_processor(self)
+
+ # self._connect_things_called = True
+
+ # @overrides(AbstractBoard)
+ # def _post_instantiate(self):
+ # """Called to set up anything needed after m5.instantiate"""
+ # self.get_processor()._post_instantiate()
+ # if self.get_cache_hierarchy():
+ # self.get_cache_hierarchy()._post_instantiate()
+ # self.get_local_memory()._post_instantiate()
+ # self.get_remote_memory()._post_instantiate()
diff --git a/disaggregated_memory/boards/riscv_gem5_board.py b/disaggregated_memory/boards/riscv_gem5_board.py
new file mode 100644
index 0000000000..d304d00a5c
--- /dev/null
+++ b/disaggregated_memory/boards/riscv_gem5_board.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from typing import List, Optional, Sequence, Tuple
+
+from memories.remote_memory import RemoteChanneledMemory
+from boards.riscv_dm_board import RiscvAbstractDMBoard
+
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.utils.override import overrides
+from gem5.resources.resource import AbstractResource
+from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload
+from gem5.components.boards.abstract_system_board import AbstractSystemBoard
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+
+from gem5.isas import ISA
+
+import m5
+
+from m5.objects import AddrRange, HiFive, Frequency, Port
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+
+class RiscvGem5DMBoard(RiscvAbstractDMBoard):
+ """
+ A board capable of full system simulation for RISC-V
+ At a high-level, this is based on the HiFive Unmatched board from SiFive.
+ This board assumes that you will be booting Linux.
+
+ **Limitations**
+ * Only works with classic caches
+ """
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: AbstractMemorySystem,
+ cache_hierarchy: AbstractCacheHierarchy,
+ remote_memory_addr_range: AddrRange = None,
+ ) -> None:
+ self._localMemory = local_memory
+ self._remoteMemory = remote_memory
+ # If the remote_memory_addr_range is not provided, we'll assume that
+ # it starts at 0x80000000 + local_memory_size and ends at it's own size
+ if remote_memory_addr_range is None:
+ remote_memory_addr_range = AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=remote_memory_addr_range,
+ cache_hierarchy=cache_hierarchy,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = remote_memory
+
+ if processor.get_isa() != ISA.RISCV:
+ raise Exception(
+ "The RISCVBoard requires a processor using the"
+ "RISCV ISA. Current processor ISA: "
+ f"'{processor.get_isa().name}'."
+ )
+
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ return self._remoteMemory
+
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_remote_memory().get_mem_ports()
+
+ @overrides(RiscvAbstractDMBoard)
+ def _incorporate_memory_range(self):
+ # If the memory exists in gem5, then, we need to incorporate this
+ # memory range.
+ self.get_local_memory().set_memory_range(self._local_mem_ranges)
+ self.get_remote_memory().set_memory_range(self._remote_mem_ranges)
+
+ @overrides(RiscvAbstractDMBoard)
+ def get_default_kernel_args(self) -> List[str]:
+ return [
+ "console=ttyS0",
+ "root={root_value}",
+ "init=/root/gem5-init.sh",
+ "rw",
+ ]
+
+ @overrides(AbstractBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ self.get_remote_memory().incorporate_memory(self)
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+ # need to connect the remote links to the board.
+ if self.get_cache_hierarchy().is_ruby():
+ fatal(
+ "remote memory is only supported in classic caches at " +
+ "the moment!")
+ if isinstance(self.get_remote_memory(), RemoteChanneledMemory):
+ for ports in self.get_remote_memory().remote_links:
+ self.get_cache_hierarchy().membus.mem_side_ports = \
+ ports.cpu_side_ports
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(AbstractBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
+ self.get_remote_memory()._post_instantiate()
diff --git a/disaggregated_memory/boards/riscv_sst_board.py b/disaggregated_memory/boards/riscv_sst_board.py
new file mode 100644
index 0000000000..db3b6edae3
--- /dev/null
+++ b/disaggregated_memory/boards/riscv_sst_board.py
@@ -0,0 +1,244 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from typing import List, Optional, Sequence, Tuple
+
+from boards.riscv_dm_board import RiscvAbstractDMBoard
+
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.utils.override import overrides
+from gem5.resources.resource import AbstractResource
+from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload
+from gem5.components.boards.abstract_system_board import AbstractSystemBoard
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+
+from gem5.isas import ISA
+
+import m5
+
+from m5.objects import (
+ AddrRange,
+ HiFive,
+ Frequency,
+ Port,
+ OutgoingRequestBridge,
+ NoncoherentXBar,
+)
+
+from m5.util.fdthelper import (
+ Fdt,
+ FdtNode,
+ FdtProperty,
+ FdtPropertyStrings,
+ FdtPropertyWords,
+ FdtState,
+)
+
+
+class RiscvSstDMBoard(RiscvAbstractDMBoard):
+ """
+ A board capable of full system simulation for multiple RISC-V nodes.
+ At a high-level, this is based on the HiFive Unmatched board from SiFive.
+ This board assumes that you will be booting Linux.
+
+ **Limitations**
+ * Only works with classic caches
+ """
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: "ExternalRemoteMemoryInterface",
+ cache_hierarchy: AbstractCacheHierarchy,
+ ) -> None:
+ self._localMemory = local_memory
+ # Since the remote memory is defined in SST's side, we only need the
+ # size of this memory while setting up stuff from Gem5's side.
+ self._remoteMemory = remote_memory
+ # The remote memory is either setup with a size or an address range.
+ # We need to determine if the address range is set. if not, then we
+ # need to find the starting and ending of the the external memory
+ # range.
+ if not self._remoteMemory.get_set_using_addr_ranges():
+ # Address ranges were not set, but the system knows the size
+ # If the remote_memory_addr_range is not provided, we'll assume
+ # that it starts at 0x80000000 + local_memory_size and ends at it's
+ # own size
+ self._remoteMemory.remote_memory.physical_address_ranges = [
+ AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ ]
+ # We need a size as a string to setup this memory.
+ self._remoteMemorySize = self._remoteMemory.get_size()
+
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[
+ 0
+ ],
+ cache_hierarchy=cache_hierarchy,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = self._remoteMemory.remote_memory
+
+ if processor.get_isa() != ISA.RISCV:
+ raise Exception(
+ "The RISCVBoard requires a processor using the"
+ "RISCV ISA. Current processor ISA: "
+ f"'{processor.get_isa().name}'."
+ )
+
+ @overrides(AbstractSystemBoard)
+ def get_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+
+ :returns: The memory system.
+ """
+ raise NotImplementedError
+
+ def get_local_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The local memory system.
+ """
+ return self._localMemory
+
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ # raise Exception("cannot call this method")
+ return self._remoteMemory
+
+ @overrides(AbstractSystemBoard)
+ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_local_memory().get_mem_ports()
+
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return [
+ (
+ self.get_remote_memory().physical_address_ranges,
+ self.get_remote_memory().port,
+ )
+ ]
+
+ @overrides(RiscvAbstractDMBoard)
+ def _incorporate_memory_range(self):
+ self.get_local_memory().set_memory_range(self._local_mem_ranges)
+
+ @overrides(RiscvAbstractDMBoard)
+ def get_default_kernel_args(self) -> List[str]:
+ return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"]
+
+ @overrides(RiscvAbstractDMBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ # we need to find whether there is any external latency. if yes, then
+ # add xbar to add this latency.
+
+ if self.get_remote_memory().is_xbar_required():
+ self.remote_link = NoncoherentXBar(
+ frontend_latency=0,
+ forward_latency=0,
+ response_latency=self.get_remote_memory()._remote_memory_latency,
+ width=64,
+ )
+ # connect the remote memory port to the remote link
+ self.get_remote_memory().remote_memory.port = (
+ self.remote_link.mem_side_ports
+ )
+ # The remote link is then connected to the membus
+ self.get_cache_hierarchy().membus.mem_side_ports = (
+ self.remote_link.cpu_side_ports
+ )
+ else:
+ # Connect the external memory directly to the motherboard.
+ self.get_remote_memory().remote_memory.port = (
+ self.get_cache_hierarchy().membus.mem_side_ports
+ )
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(RiscvAbstractDMBoard)
+ def get_default_kernel_args(self) -> List[str]:
+ return [
+ "console=ttyS0",
+ "root={root_value}",
+ "init=/root/gem5-init.sh",
+ "rw",
+ ]
+
+ @overrides(AbstractBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
diff --git a/disaggregated_memory/boards/x86_dm_board.py b/disaggregated_memory/boards/x86_dm_board.py
new file mode 100644
index 0000000000..945ca12391
--- /dev/null
+++ b/disaggregated_memory/boards/x86_dm_board.py
@@ -0,0 +1,357 @@
+# Copyright (c) 2023-24 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Creating an x86 board that can simulate more than 3 GB memory.
+
+from m5.objects import (
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ Port,
+ Pc,
+ AddrRange,
+ X86FsLinux,
+ Addr,
+ X86SMBiosBiosInformation,
+ X86IntelMPProcessor,
+ X86IntelMPIOAPIC,
+ X86IntelMPBus,
+ X86IntelMPBusHierarchy,
+ X86IntelMPIOIntAssignment,
+ X86E820Entry,
+ Bridge,
+ IOXBar,
+ IdeDisk,
+ CowDiskImage,
+ RawDiskImage,
+ BaseXBar,
+ Port,
+ OutgoingRequestBridge,
+)
+
+import os
+import m5
+from abc import ABCMeta
+from gem5.components.boards.x86_board import X86Board
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+class X86AbstractDMBoard(X86Board):
+ """
+ A high-level X86 board that can zNUMA-capable systems with a remote
+ memories. This board is extended from the ArmBoard from Gem5 standard
+ library. This board assumes that you will be booting Linux. This board can
+ be used to do disaggregated ARM system research while accelerating the
+ simulation using kvm.
+
+ The reason this board was created was to leverage the features X86 ISA has
+ over ARM and RISCV, e.g. memory hotplug and ACPI driver support in gem5.
+
+ **Limitations**
+ * kvm is only supported in a gem5-only setup.
+ """
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ cache_hierarchy: AbstractCacheHierarchy,
+ local_memory: AbstractMemorySystem,
+ remote_memory_addr_range: AddrRange,
+ ) -> None:
+ # The structure of this board is similar to the RISCV DM board.
+ self._localMemory = local_memory
+ # remote_memory can either be an interface or an external memory
+ # This abstract disaggregated memory does not know what this type of
+ # memory is. it only needs to know the address range for this memory.
+ # from this range, we'll figure out the size.
+ self._remoteMemoryAddrRange = remote_memory_addr_range
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ cache_hierarchy=cache_hierarchy,
+ memory=local_memory,
+ )
+ self.local_memory = local_memory
+
+ @overrides(X86Board)
+ def get_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+
+ :returns: The memory system.
+ """
+ raise NotImplementedError
+
+ def get_local_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The local memory system.
+ """
+ return self._localMemory
+
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ # raise Exception("cannot call this method")
+ return self._remoteMemory
+
+ @overrides(X86Board)
+ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_local_memory().get_mem_ports()
+
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ # return self.get_remote_memory().get_mem_ports()
+ return [
+ (
+ self.get_remote_memory().physical_address_ranges,
+ self.get_remote_memory().port,
+ )
+ ]
+
+ @overrides(X86Board)
+ def _setup_memory_ranges(self):
+ # Need to create 2 entries for the memory ranges.
+ # make the local memory as 3 gb for now.
+
+ self.get_local_memory().set_memory_range([AddrRange(start=0x0, size="1GiB")])
+ self.get_remote_memory().set_memory_range([AddrRange(start=0x40000000, size="1GiB")])
+ # remote_memory = self.get_remote_memory()
+
+ # local_mem_size = local_memory.get_size()
+ # remote_mem_size = remote_memory.get_size()
+
+ # self._local_mem_ranges = [
+ # "2GiB"
+ # # AddrRange(local_mem_size)
+ # ]
+
+ # The remote memory starts where the local memory ends. Therefore it
+ # has to be offset by the local memory's size.
+ # self._remote_mem_ranges = [
+ # AddrRange(start=0x100000000, size=remote_mem_size)
+ # # AddrRange(remote_mem_size)
+ # ]
+ # Keep it under 2 GB for this case. Each slice of memory is 1 GB.
+
+ self.mem_ranges = [
+ AddrRange(start=0x0, size="1GiB"),
+ AddrRange(start=0x40000000, size="1GiB"),
+ # AddrRange(0xC0000000, size=0x100000), # For I/0
+ ]
+
+ @overrides(X86Board)
+ def get_default_kernel_args(self) -> List[str]:
+ return [
+ "earlyprintk=ttyS0",
+ "console=ttyS0",
+ "lpj=7999923",
+ "root=/dev/sda1",
+ # "numa=fake=2",
+ # "movable_node",
+ # "kernelcore=1G",
+ # "mem=1G",
+ "memmap=1G@0x0"
+ # "init=/bin/bash",
+ ]
+
+ @overrides(X86Board)
+ def _setup_io_devices(self):
+ """Sets up the x86 IO devices.
+
+ Note: This is mostly copy-paste from prior X86 FS setups. Some of it
+ may not be documented and there may be bugs.
+ """
+
+ # Constants similar to x86_traits.hh
+ IO_address_space_base = 0x8000000000000000
+ pci_config_address_space_base = 0xC000000000000000
+ interrupts_address_space_base = 0xA000000000000000
+ APIC_range_size = 1 << 12
+
+ # Setup memory system specific settings.
+ if self.get_cache_hierarchy().is_ruby():
+ self.pc.attachIO(self.get_io_bus(), [self.pc.south_bridge.ide.dma])
+ else:
+ self.bridge = Bridge(delay="50ns")
+ self.bridge.mem_side_port = self.get_io_bus().cpu_side_ports
+ self.bridge.cpu_side_port = (
+ self.get_cache_hierarchy().get_mem_side_port()
+ )
+
+ # # Constants similar to x86_traits.hh
+ IO_address_space_base = 0x8000000000000000
+ pci_config_address_space_base = 0xC000000000000000
+ interrupts_address_space_base = 0xA000000000000000
+ APIC_range_size = 1 << 12
+
+ self.bridge.ranges = [
+ AddrRange(0xC0000000, 0xFFFF0000),
+ AddrRange(
+ IO_address_space_base, interrupts_address_space_base - 1
+ ),
+ AddrRange(pci_config_address_space_base, Addr.max),
+ ]
+
+ self.apicbridge = Bridge(delay="50ns")
+ self.apicbridge.cpu_side_port = self.get_io_bus().mem_side_ports
+ self.apicbridge.mem_side_port = (
+ self.get_cache_hierarchy().get_cpu_side_port()
+ )
+ self.apicbridge.ranges = [
+ AddrRange(
+ interrupts_address_space_base,
+ interrupts_address_space_base
+ + self.get_processor().get_num_cores() * APIC_range_size
+ - 1,
+ )
+ ]
+ self.pc.attachIO(self.get_io_bus())
+
+ # Add in a Bios information structure.
+ self.workload.smbios_table.structures = [X86SMBiosBiosInformation()]
+
+ # Set up the Intel MP table
+ base_entries = []
+ ext_entries = []
+ for i in range(self.get_processor().get_num_cores()):
+ bp = X86IntelMPProcessor(
+ local_apic_id=i,
+ local_apic_version=0x14,
+ enable=True,
+ bootstrap=(i == 0),
+ )
+ base_entries.append(bp)
+
+ io_apic = X86IntelMPIOAPIC(
+ id=self.get_processor().get_num_cores(),
+ version=0x11,
+ enable=True,
+ address=0xFEC00000,
+ )
+
+ self.pc.south_bridge.io_apic.apic_id = io_apic.id
+ base_entries.append(io_apic)
+ pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ")
+ base_entries.append(pci_bus)
+ isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ")
+ base_entries.append(isa_bus)
+ connect_busses = X86IntelMPBusHierarchy(
+ bus_id=1, subtractive_decode=True, parent_bus=0
+ )
+ ext_entries.append(connect_busses)
+
+ pci_dev4_inta = X86IntelMPIOIntAssignment(
+ interrupt_type="INT",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=0,
+ source_bus_irq=0 + (4 << 2),
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=16,
+ )
+
+ base_entries.append(pci_dev4_inta)
+
+ def assignISAInt(irq, apicPin):
+
+ assign_8259_to_apic = X86IntelMPIOIntAssignment(
+ interrupt_type="ExtInt",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=1,
+ source_bus_irq=irq,
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=0,
+ )
+ base_entries.append(assign_8259_to_apic)
+
+ assign_to_apic = X86IntelMPIOIntAssignment(
+ interrupt_type="INT",
+ polarity="ConformPolarity",
+ trigger="ConformTrigger",
+ source_bus_id=1,
+ source_bus_irq=irq,
+ dest_io_apic_id=io_apic.id,
+ dest_io_apic_intin=apicPin,
+ )
+ base_entries.append(assign_to_apic)
+
+ assignISAInt(0, 2)
+ assignISAInt(1, 1)
+
+ for i in range(3, 15):
+ assignISAInt(i, i)
+
+ self.workload.intel_mp_table.base_entries = base_entries
+ self.workload.intel_mp_table.ext_entries = ext_entries
+
+ entries = [
+ # Mark the first megabyte of memory as reserved
+ X86E820Entry(addr=0, size="639kB", range_type=1),
+ X86E820Entry(addr=0x9FC00, size="385kB", range_type=2),
+ # Mark the rest of physical memory as available
+ X86E820Entry(
+ addr=0x100000,
+ size=f"{self.mem_ranges[0].size() - 0x100000:d}B",
+ range_type=1,
+ )
+ ]
+ # Reserve the last 16kB of the 32-bit address space for m5ops
+ entries.append(
+ X86E820Entry(
+ addr=0x40000000,
+ size="%dB" % (self.mem_ranges[0].size()),
+ range_type=5,
+ )
+ )
+ entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2))
+ # X86E820Entry(
+ # addr=0x100000000,
+ # size=f"{self.mem_ranges[1].size()}B",
+ # range_type=1,
+ # ),
+ # print("____", self.mem_ranges[0].size() + 0x100000)
+
+ # Reserve the last 16kB of the 32-bit address space for m5ops
+ # entries.append(
+ # X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2)
+ # )
+
+ print(entries)
+ print()
+ self.workload.e820_table.entries = entries
diff --git a/disaggregated_memory/boards/x86_gem5_board.py b/disaggregated_memory/boards/x86_gem5_board.py
new file mode 100644
index 0000000000..c9fa77ea80
--- /dev/null
+++ b/disaggregated_memory/boards/x86_gem5_board.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from m5.objects import (
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ Port,
+ Pc,
+ AddrRange,
+ X86FsLinux,
+ Addr,
+ X86SMBiosBiosInformation,
+ X86IntelMPProcessor,
+ X86IntelMPIOAPIC,
+ X86IntelMPBus,
+ X86IntelMPBusHierarchy,
+ X86IntelMPIOIntAssignment,
+ X86E820Entry,
+ Bridge,
+ IOXBar,
+ IdeDisk,
+ CowDiskImage,
+ RawDiskImage,
+ BaseXBar,
+ Port,
+ OutgoingRequestBridge,
+)
+
+import os
+import m5
+
+from abc import ABCMeta
+
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+from memories.remote_memory import RemoteChanneledMemory
+from boards.x86_dm_board import X86AbstractDMBoard
+
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+
+class X86Gem5DMBoard(X86AbstractDMBoard):
+
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: AbstractMemorySystem,
+ cache_hierarchy: AbstractCacheHierarchy,
+ remote_memory_addr_range: AddrRange = None
+ ) -> None:
+ self._localMemory = local_memory
+ self._remoteMemory = remote_memory
+ # If the remote_memory_addr_range is not provided, we'll assume that
+ # it starts at 0x80000000 + local_memory_size and ends at it's own size
+ if remote_memory_addr_range is None:
+ remote_memory_addr_range = AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=remote_memory_addr_range,
+ cache_hierarchy=cache_hierarchy,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = remote_memory
+
+ @overrides(X86AbstractDMBoard)
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ return self._remoteMemory
+
+ @overrides(X86AbstractDMBoard)
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return self.get_remote_memory().get_mem_ports()
+
+ # @overrides(X86AbstractDMBoard)
+ # def _set_remote_memory_ranges(self):
+ # self.get_remote_memory().set_memory_range(
+ # [self._remoteMemoryAddrRange]
+ # )
+
+ # @overrides(X86AbstractDMBoard)
+ # def get_default_kernel_args(self) -> List[str]:
+
+ # # The default kernel string is taken from the devices.py file.
+ # return [
+ # "console=ttyAMA0",
+ # "lpj=19988480",
+ # "norandmaps",
+ # "root={root_value}",
+ # "rw",
+ # "init=/root/gem5-init.sh",
+ # "kernelcore=2048M"
+ # ]
+
+
+ @overrides(X86AbstractDMBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ self.get_remote_memory().incorporate_memory(self)
+
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+ # need to connect the remote links to the board.
+ if self.get_cache_hierarchy().is_ruby():
+ fatal(
+ "remote memory is only supported in classic caches at " +
+ "the moment!")
+ if isinstance(self.get_remote_memory(), RemoteChanneledMemory):
+ for ports in self.get_remote_memory().remote_links:
+ self.get_cache_hierarchy().membus.mem_side_ports = \
+ ports.cpu_side_ports
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(X86AbstractDMBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
+ self.get_remote_memory()._post_instantiate()
+
+ # print("____", self.remote_memory.mem_ctrl0.dram)
diff --git a/disaggregated_memory/boards/x86_sst_board.py b/disaggregated_memory/boards/x86_sst_board.py
new file mode 100644
index 0000000000..a32275ad24
--- /dev/null
+++ b/disaggregated_memory/boards/x86_sst_board.py
@@ -0,0 +1,235 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from m5.objects import (
+ AddrRange,
+ VoltageDomain,
+ SrcClockDomain,
+ Terminal,
+ VncServer,
+ IOXBar,
+ BadAddr,
+ Port,
+ Pc,
+ AddrRange,
+ X86FsLinux,
+ Addr,
+ X86SMBiosBiosInformation,
+ X86IntelMPProcessor,
+ X86IntelMPIOAPIC,
+ X86IntelMPBus,
+ X86IntelMPBusHierarchy,
+ X86IntelMPIOIntAssignment,
+ X86E820Entry,
+ Bridge,
+ IOXBar,
+ IdeDisk,
+ CowDiskImage,
+ RawDiskImage,
+ BaseXBar,
+ Port,
+ NoncoherentXBar,
+ OutgoingRequestBridge,
+)
+
+import os
+import m5
+
+from abc import ABCMeta
+
+from gem5.utils.override import overrides
+
+from typing import List, Sequence, Tuple
+
+from memories.remote_memory import RemoteChanneledMemory
+from boards.x86_dm_board import X86AbstractDMBoard
+
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from gem5.components.memory.abstract_memory_system import AbstractMemorySystem
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+
+class X86SstDMBoard(X86AbstractDMBoard):
+
+ __metaclass__ = ABCMeta
+
+ def __init__(
+ self,
+ clk_freq: str,
+ processor: AbstractProcessor,
+ local_memory: AbstractMemorySystem,
+ remote_memory: "ExternalRemoteMemoryInterface",
+ cache_hierarchy: AbstractCacheHierarchy,
+ remote_memory_addr_range: AddrRange = None
+ ) -> None:
+ self._localMemory = local_memory
+ # Since the remote memory is defined in SST's side, we only need the
+ # size of this memory while setting up stuff from Gem5's side.
+ self._remoteMemory = remote_memory
+ # The remote memory is either setup with a size or an address range.
+ # We need to determine if the address range is set. if not, then we
+ # need to find the starting and ending of the the external memory
+ # range.
+ if not self._remoteMemory.get_set_using_addr_ranges():
+ # Address ranges were not set, but the system knows the size
+ # If the remote_memory_addr_range is not provided, we'll assume
+ # that it starts at 0x80000000 + local_memory_size and ends at it's
+ # own size
+ self._remoteMemory.remote_memory.physical_address_ranges = [
+ AddrRange(
+ 0x80000000 + self._localMemory.get_size(),
+ size=remote_memory.get_size(),
+ )
+ ]
+ # We need a size as a string to setup this memory.
+ self._remoteMemorySize = self._remoteMemory.get_size()
+ super().__init__(
+ clk_freq=clk_freq,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory_addr_range=remote_memory_addr_range,
+ cache_hierarchy=cache_hierarchy,
+ )
+ self.local_memory = local_memory
+ self.remote_memory = self._remoteMemory.remote_memory
+
+ @overrides(X86AbstractDMBoard)
+ def get_remote_memory(self) -> "AbstractMemory":
+ """Get the memory (RAM) connected to the board.
+ :returns: The remote memory system.
+ """
+ return self._remoteMemory
+
+ @overrides(X86AbstractDMBoard)
+ def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return [
+ (
+ self.get_remote_memory().physical_address_ranges,
+ self.get_remote_memory().port,
+ )
+ ]
+
+ # @overrides(X86AbstractDMBoard)
+ # def _set_remote_memory_ranges(self):
+ # self.get_remote_memory().set_memory_range(
+ # [self._remoteMemoryAddrRange]
+ # )
+
+ # @overrides(X86AbstractDMBoard)
+ # def get_default_kernel_args(self) -> List[str]:
+
+ # # The default kernel string is taken from the devices.py file.
+ # return [
+ # "console=ttyAMA0",
+ # "lpj=19988480",
+ # "norandmaps",
+ # "root={root_value}",
+ # "rw",
+ # "init=/root/gem5-init.sh",
+ # "kernelcore=2048M"
+ # ]
+
+
+ @overrides(X86AbstractDMBoard)
+ def _connect_things(self) -> None:
+ """Connects all the components to the board.
+
+ The order of this board is always:
+
+ 1. Connect the memory.
+ 2. Connect the cache hierarchy.
+ 3. Connect the processor.
+
+ Developers may build upon this assumption when creating components.
+
+ Notes
+ -----
+
+ * The processor is incorporated after the cache hierarchy due to a bug
+ noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this
+ bug is fixed, this ordering must be maintained.
+ * Once this function is called `_connect_things_called` *must* be set
+ to `True`.
+ """
+
+ if self._connect_things_called:
+ raise Exception(
+ "The `_connect_things` function has already been called."
+ )
+
+ # Incorporate the memory into the motherboard.
+ self.get_local_memory().incorporate_memory(self)
+ # we need to find whether there is any external latency. if yes, then
+ # add xbar to add this latency.
+
+ if self.get_remote_memory().is_xbar_required():
+ self.remote_link = NoncoherentXBar(
+ frontend_latency=0,
+ forward_latency=0,
+ response_latency=self.get_remote_memory()._remote_memory_latency,
+ width=64,
+ )
+ # connect the remote memory port to the remote link
+ self.get_remote_memory().remote_memory.port = (
+ self.remote_link.mem_side_ports
+ )
+ # The remote link is then connected to the membus
+ self.get_cache_hierarchy().membus.mem_side_ports = (
+ self.remote_link.cpu_side_ports
+ )
+ else:
+ # Connect the external memory directly to the motherboard.
+ self.get_remote_memory().remote_memory.port = (
+ self.get_cache_hierarchy().membus.mem_side_ports
+ )
+
+ # Incorporate the cache hierarchy for the motherboard.
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy().incorporate_cache(self)
+ # need to connect the remote links to the board.
+ if self.get_cache_hierarchy().is_ruby():
+ fatal(
+ "remote memory is only supported in classic caches at " +
+ "the moment!")
+ if isinstance(self.get_remote_memory(), RemoteChanneledMemory):
+ for ports in self.get_remote_memory().remote_links:
+ self.get_cache_hierarchy().membus.mem_side_ports = \
+ ports.cpu_side_ports
+
+ # Incorporate the processor into the motherboard.
+ self.get_processor().incorporate_processor(self)
+
+ self._connect_things_called = True
+
+ @overrides(X86AbstractDMBoard)
+ def _post_instantiate(self):
+ """Called to set up anything needed after m5.instantiate"""
+ self.get_processor()._post_instantiate()
+ if self.get_cache_hierarchy():
+ self.get_cache_hierarchy()._post_instantiate()
+ self.get_local_memory()._post_instantiate()
diff --git a/disaggregated_memory/cachehierarchies/dm_caches.py b/disaggregated_memory/cachehierarchies/dm_caches.py
new file mode 100644
index 0000000000..40fc96ca1d
--- /dev/null
+++ b/disaggregated_memory/cachehierarchies/dm_caches.py
@@ -0,0 +1,223 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+ PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache
+from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache
+from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache
+from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.isas import ISA
+from m5.objects import L2XBar
+
+from gem5.utils.override import overrides
+
+from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import (
+ PrivateL1PrivateL2SharedL3CacheHierarchy)
+
+class ClassicPrivateL1PrivateL2SharedL3DMCache(
+ PrivateL1PrivateL2SharedL3CacheHierarchy):
+ def __init__(
+ self,
+ l1d_size: str,
+ l1i_size: str,
+ l2_size: str,
+ l3_size: str,
+ l3_assoc: int = 16
+ ):
+ super().__init__(
+ l1d_size=l1d_size,
+ l1i_size=l1i_size,
+ l2_size=l2_size,
+ l3_size=l3_size,
+ l3_assoc=l3_assoc
+ )
+
+ @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ # Set up the system port for functional access from the simulator.
+ board.connect_system_port(self.membus.cpu_side_ports)
+
+ for cntr in board.get_local_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ for cntr in board.get_remote_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ self.l1icaches = [
+ L1ICache(size=self._l1i_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l1dcaches = [
+ L1DCache(size=self._l1d_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2buses = [
+ L2XBar() for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2caches = [
+ L2Cache(size=self._l2_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l3cache = L2Cache(size=self._l3_size,
+ assoc=self._l3_assoc,
+ tag_latency=self._l3_tag_latency,
+ data_latency=self._l3_data_latency,
+ response_latency=self._l3_response_latency,
+ mshrs=self._l3_mshrs,
+ tgts_per_mshr=self._l3_tgts_per_mshr)
+ # There is only one l3 bus, which connects l3 to the membus
+ self.l3bus = L2XBar()
+ # ITLB Page walk caches
+ self.iptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+ # DTLB Page walk caches
+ self.dptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+
+ if board.has_coherent_io():
+ self._setup_io_cache(board)
+
+ for i, cpu in enumerate(board.get_processor().get_cores()):
+
+ cpu.connect_icache(self.l1icaches[i].cpu_side)
+ cpu.connect_dcache(self.l1dcaches[i].cpu_side)
+
+ self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+
+ self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
+
+ self.l2caches[i].mem_side = self.l3bus.cpu_side_ports
+
+ cpu.connect_walker_ports(
+ self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
+ )
+
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = self.membus.mem_side_ports
+ int_resp_port = self.membus.cpu_side_ports
+ cpu.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ cpu.connect_interrupt()
+ self.l3bus.mem_side_ports = self.l3cache.cpu_side
+ self.membus.cpu_side_ports = self.l3cache.mem_side
+
+
+class ClassicPrivateL1PrivateL2DMCache(PrivateL1PrivateL2CacheHierarchy):
+ def __init__(
+ self,
+ l1d_size: str,
+ l1i_size: str,
+ l2_size: str,
+ ) -> None:
+ """
+ :param l1d_size: The size of the L1 Data Cache (e.g., "32kB").
+ :type l1d_size: str
+ :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB").
+ :type l1i_size: str
+ :param l2_size: The size of the L2 Cache (e.g., "256kB").
+ :type l2_size: str
+ :param membus: The memory bus. This parameter is optional parameter and
+ will default to a 64 bit width SystemXBar is not specified.
+ :type membus: BaseXBar
+ """
+ super().__init__(l1i_size, l1d_size, l2_size)
+
+ @overrides(PrivateL1PrivateL2CacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ # Set up the system port for functional access from the simulator.
+ board.connect_system_port(self.membus.cpu_side_ports)
+
+ for cntr in board.get_local_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ for cntr in board.get_remote_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ self.l1icaches = [
+ L1ICache(size=self._l1i_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l1dcaches = [
+ L1DCache(size=self._l1d_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2buses = [
+ L2XBar() for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2caches = [
+ L2Cache(size=self._l2_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ # ITLB Page walk caches
+ self.iptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+ # DTLB Page walk caches
+ self.dptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+
+ if board.has_coherent_io():
+ self._setup_io_cache(board)
+
+ for i, cpu in enumerate(board.get_processor().get_cores()):
+
+ cpu.connect_icache(self.l1icaches[i].cpu_side)
+ cpu.connect_dcache(self.l1dcaches[i].cpu_side)
+
+ self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+
+ self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
+
+ self.membus.cpu_side_ports = self.l2caches[i].mem_side
+
+ cpu.connect_walker_ports(
+ self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
+ )
+
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = self.membus.mem_side_ports
+ int_resp_port = self.membus.cpu_side_ports
+ cpu.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ cpu.connect_interrupt()
diff --git a/disaggregated_memory/cachehierarchies/dm_caches_sst.py b/disaggregated_memory/cachehierarchies/dm_caches_sst.py
new file mode 100644
index 0000000000..00edf5d69e
--- /dev/null
+++ b/disaggregated_memory/cachehierarchies/dm_caches_sst.py
@@ -0,0 +1,218 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+ PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache
+from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache
+from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache
+from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.isas import ISA
+from m5.objects import L2XBar
+
+from gem5.utils.override import overrides
+
+from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import (
+ PrivateL1PrivateL2SharedL3CacheHierarchy)
+
+class ClassicPrivateL1PrivateL2SharedL3SstDMCache(
+ PrivateL1PrivateL2SharedL3CacheHierarchy):
+ def __init__(
+ self,
+ l1d_size: str,
+ l1i_size: str,
+ l2_size: str,
+ l3_size: str,
+ l3_assoc: int = 16
+ ):
+ super().__init__(
+ l1d_size=l1d_size,
+ l1i_size=l1i_size,
+ l2_size=l2_size,
+ l3_size=l3_size,
+ l3_assoc=l3_assoc
+ )
+
+ @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ # Set up the system port for functional access from the simulator.
+ board.connect_system_port(self.membus.cpu_side_ports)
+
+ for cntr in board.get_local_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ self.l1icaches = [
+ L1ICache(size=self._l1i_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l1dcaches = [
+ L1DCache(size=self._l1d_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2buses = [
+ L2XBar() for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2caches = [
+ L2Cache(size=self._l2_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l3cache = L2Cache(size=self._l3_size,
+ assoc=self._l3_assoc,
+ tag_latency=self._l3_tag_latency,
+ data_latency=self._l3_data_latency,
+ response_latency=self._l3_response_latency,
+ mshrs=self._l3_mshrs,
+ tgts_per_mshr=self._l3_tgts_per_mshr)
+ # There is only one l3 bus, which connects l3 to the membus
+ self.l3bus = L2XBar()
+ # ITLB Page walk caches
+ self.iptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+ # DTLB Page walk caches
+ self.dptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+
+ if board.has_coherent_io():
+ self._setup_io_cache(board)
+
+ for i, cpu in enumerate(board.get_processor().get_cores()):
+
+ cpu.connect_icache(self.l1icaches[i].cpu_side)
+ cpu.connect_dcache(self.l1dcaches[i].cpu_side)
+
+ self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+
+ self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
+
+ self.l2caches[i].mem_side = self.l3bus.cpu_side_ports
+
+ cpu.connect_walker_ports(
+ self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
+ )
+
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = self.membus.mem_side_ports
+ int_resp_port = self.membus.cpu_side_ports
+ cpu.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ cpu.connect_interrupt()
+ self.l3bus.mem_side_ports = self.l3cache.cpu_side
+ self.membus.cpu_side_ports = self.l3cache.mem_side
+
+class ClassicPrivateL1PrivateL2SstDMCache(PrivateL1PrivateL2CacheHierarchy):
+ def __init__(
+ self,
+ l1d_size: str,
+ l1i_size: str,
+ l2_size: str,
+ ) -> None:
+ """We need a specific version of DM caches for the external memory,
+ which does not connect the remote memory controller ports directly.
+
+ :param l1d_size: The size of the L1 Data Cache (e.g., "32kB").
+ :type l1d_size: str
+ :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB").
+ :type l1i_size: str
+ :param l2_size: The size of the L2 Cache (e.g., "256kB").
+ :type l2_size: str
+ :param membus: The memory bus. This parameter is optional parameter and
+ will default to a 64 bit width SystemXBar is not specified.
+ :type membus: BaseXBar
+ """
+ super().__init__(l1i_size, l1d_size, l2_size)
+
+ @overrides(PrivateL1PrivateL2CacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ # Set up the system port for functional access from the simulator.
+ board.connect_system_port(self.membus.cpu_side_ports)
+
+ for cntr in board.get_local_memory().get_memory_controllers():
+ cntr.port = self.membus.mem_side_ports
+
+ self.l1icaches = [
+ L1ICache(size=self._l1i_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l1dcaches = [
+ L1DCache(size=self._l1d_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2buses = [
+ L2XBar() for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2caches = [
+ L2Cache(size=self._l2_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ # ITLB Page walk caches
+ self.iptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+ # DTLB Page walk caches
+ self.dptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+
+ if board.has_coherent_io():
+ self._setup_io_cache(board)
+
+ for i, cpu in enumerate(board.get_processor().get_cores()):
+
+ cpu.connect_icache(self.l1icaches[i].cpu_side)
+ cpu.connect_dcache(self.l1dcaches[i].cpu_side)
+
+ self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+
+ self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
+
+ self.membus.cpu_side_ports = self.l2caches[i].mem_side
+
+ cpu.connect_walker_ports(
+ self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
+ )
+
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = self.membus.mem_side_ports
+ int_resp_port = self.membus.cpu_side_ports
+ cpu.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ cpu.connect_interrupt()
diff --git a/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py b/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py
new file mode 100644
index 0000000000..d07e058dee
--- /dev/null
+++ b/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py
@@ -0,0 +1,191 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import (
+ MIExampleCacheHierarchy,
+)
+from gem5.components.cachehierarchies.ruby.caches.mi_example.l1_cache import (
+ L1Cache,
+)
+from gem5.components.cachehierarchies.ruby.caches.mi_example.dma_controller import (
+ DMAController,
+)
+from gem5.components.cachehierarchies.ruby.caches.mi_example.directory import (
+ Directory,
+)
+from gem5.components.cachehierarchies.ruby.topologies.simple_pt2pt import (
+ SimplePt2Pt,
+)
+
+# from gem5.components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy
+from gem5.components.cachehierarchies.abstract_cache_hierarchy import (
+ AbstractCacheHierarchy,
+)
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.coherence_protocol import CoherenceProtocol
+from gem5.isas import ISA
+from gem5.utils.override import overrides
+from gem5.utils.requires import requires
+from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy
+
+# from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache
+# from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache
+# from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache
+# from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache
+# from gem5.components.boards.abstract_board import AbstractBoard
+# from gem5.isas import ISA
+# from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port
+
+# from gem5.utils.override import overrides
+
+
+class MIExampleDMCache(MIExampleCacheHierarchy):
+ def __init__(self, size: str, assoc: str):
+ """
+ :param size: The size of each cache in the heirarchy.
+ :param assoc: The associativity of each cache.
+ """
+ super().__init__(size, assoc)
+
+ @overrides(MIExampleCacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE)
+
+ self.ruby_system = RubySystem()
+
+ # Ruby's global network.
+ self.ruby_system.network = SimplePt2Pt(self.ruby_system)
+
+ # MI Example users 5 virtual networks.
+ self.ruby_system.number_of_virtual_networks = 5
+ self.ruby_system.network.number_of_virtual_networks = 5
+
+ # There is a single global list of all of the controllers to make it
+ # easier to connect everything to the global network. This can be
+ # customized depending on the topology/network requirements.
+ # Create one controller for each L1 cache (and the cache mem obj.)
+ # Create a single directory controller (Really the memory cntrl).
+ self._controllers = []
+ for i, core in enumerate(board.get_processor().get_cores()):
+ cache = L1Cache(
+ size=self._size,
+ assoc=self._assoc,
+ network=self.ruby_system.network,
+ core=core,
+ cache_line_size=board.get_cache_line_size(),
+ target_isa=board.get_processor().get_isa(),
+ clk_domain=board.get_clock_domain(),
+ )
+
+ cache.sequencer = RubySequencer(
+ version=i,
+ dcache=cache.cacheMemory,
+ clk_domain=cache.clk_domain,
+ )
+
+ if board.has_io_bus():
+ cache.sequencer.connectIOPorts(board.get_io_bus())
+
+ cache.ruby_system = self.ruby_system
+
+ core.connect_icache(cache.sequencer.in_ports)
+ core.connect_dcache(cache.sequencer.in_ports)
+
+ core.connect_walker_ports(
+ cache.sequencer.in_ports, cache.sequencer.in_ports
+ )
+
+ # Connect the interrupt ports
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = cache.sequencer.interrupt_out_port
+ int_resp_port = cache.sequencer.in_ports
+ core.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ core.connect_interrupt()
+
+ cache.ruby_system = self.ruby_system
+ self._controllers.append(cache)
+
+ # Create the directory controllers
+ self._directory_controllers = []
+ for range, port in board.get_mem_ports():
+ dir = Directory(
+ self.ruby_system.network,
+ board.get_cache_line_size(),
+ range,
+ port,
+ )
+ dir.ruby_system = self.ruby_system
+ self._directory_controllers.append(dir)
+
+ for range, port in board.get_remote_mem_ports():
+ dir = Directory(
+ self.ruby_system.network,
+ board.get_cache_line_size(),
+ range,
+ port,
+ )
+ dir.ruby_system = self.ruby_system
+ self._directory_controllers.append(dir)
+
+ # Create the DMA Controllers, if required.
+ self._dma_controllers = []
+ if board.has_dma_ports():
+ dma_ports = board.get_dma_ports()
+ for i, port in enumerate(dma_ports):
+ ctrl = DMAController(
+ self.ruby_system.network, board.get_cache_line_size()
+ )
+ ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+
+ ctrl.ruby_system = self.ruby_system
+ ctrl.dma_sequencer.ruby_system = self.ruby_system
+
+ self._dma_controllers.append(ctrl)
+
+ self.ruby_system.num_of_sequencers = len(self._controllers) + len(
+ self._dma_controllers
+ )
+
+ # Connect the controllers.
+ self.ruby_system.controllers = self._controllers
+ self.ruby_system.directory_controllers = self._directory_controllers
+
+ if len(self._dma_controllers) != 0:
+ self.ruby_system.dma_controllers = self._dma_controllers
+
+ self.ruby_system.network.connectControllers(
+ self._controllers
+ + self._directory_controllers
+ + self._dma_controllers
+ )
+ self.ruby_system.network.setup_buffers()
+
+ # Set up a proxy port for the system_port. Used for load binaries and
+ # other functional-only things.
+ self.ruby_system.sys_port_proxy = RubyPortProxy()
+ board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)
diff --git a/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py
new file mode 100644
index 0000000000..db674460ba
--- /dev/null
+++ b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py
@@ -0,0 +1,162 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache
+from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache
+from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache
+from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import PrivateL1PrivateL2CacheHierarchy
+from gem5.components.boards.abstract_board import AbstractBoard
+from gem5.isas import ISA
+
+from m5.objects import (
+ Cache,
+ L2XBar,
+ BaseXBar,
+ SystemXBar,
+ BadAddr,
+ Port
+)
+
+from gem5.utils.override import overrides
+
+
+class PrivateL1PrivateL2SharedL3CacheHierarchy(
+ PrivateL1PrivateL2CacheHierarchy):
+ """
+ A cache setup where each core has a private L1 Data and Instruction Cache,
+ and a private L2 cache.
+ """
+
+ def __init__(
+ self,
+ l1d_size: str,
+ l1i_size: str,
+ l2_size: str,
+ l3_size: str,
+ l3_assoc: int = 16
+ ) -> None:
+ """
+ :param l1d_size: The size of the L1 Data Cache (e.g., "32kB").
+ :type l1d_size: str
+ :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB").
+ :type l1i_size: str
+ :param l2_size: The size of the L2 Cache (e.g., "256kB").
+ :type l2_size: str
+ :param membus: The memory bus. This parameter is optional parameter and
+ will default to a 64 bit width SystemXBar is not specified.
+
+ :type membus: BaseXBar
+ """
+ super().__init__(
+ l1d_size=l1d_size,
+ l1i_size=l1i_size,
+ l2_size=l2_size
+ )
+
+ self._l3_size = l3_size
+ self._l3_assoc = l3_assoc
+ self._l3_tag_latency = 20
+ self._l3_data_latency = 20
+ self._l3_response_latency = 40
+ self._l3_mshrs = 32
+ self._l3_tgts_per_mshr = 12
+
+
+ @overrides(PrivateL1PrivateL2CacheHierarchy)
+ def incorporate_cache(self, board: AbstractBoard) -> None:
+
+ # Set up the system port for functional access from the simulator.
+ board.connect_system_port(self.membus.cpu_side_ports)
+
+ for _, port in board.get_memory().get_mem_ports():
+ self.membus.mem_side_ports = port
+
+ self.l1icaches = [
+ L1ICache(size=self._l1i_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l1dcaches = [
+ L1DCache(size=self._l1d_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2buses = [
+ L2XBar() for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l2caches = [
+ L2Cache(size=self._l2_size)
+ for i in range(board.get_processor().get_num_cores())
+ ]
+ self.l3cache = L2Cache(size=self._l3_size,
+ assoc=self._l3_assoc,
+ tag_latency=self._l3_tag_latency,
+ data_latency=self._l3_data_latency,
+ response_latency=self._l3_response_latency,
+ mshrs=self._l3_mshrs,
+ tgts_per_mshr=self._l3_tgts_per_mshr)
+ # There is only one l3 bus, which connects l3 to the membus
+ self.l3bus = L2XBar()
+ # ITLB Page walk caches
+ self.iptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+ # DTLB Page walk caches
+ self.dptw_caches = [
+ MMUCache(size="8KiB")
+ for _ in range(board.get_processor().get_num_cores())
+ ]
+
+ if board.has_coherent_io():
+ self._setup_io_cache(board)
+
+ for i, cpu in enumerate(board.get_processor().get_cores()):
+
+ cpu.connect_icache(self.l1icaches[i].cpu_side)
+ cpu.connect_dcache(self.l1dcaches[i].cpu_side)
+
+ self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+ self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
+
+ self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
+
+ self.l2caches[i].mem_side = self.l3bus.cpu_side_ports
+
+ cpu.connect_walker_ports(
+ self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
+ )
+
+ if board.get_processor().get_isa() == ISA.X86:
+ int_req_port = self.membus.mem_side_ports
+ int_resp_port = self.membus.cpu_side_ports
+ cpu.connect_interrupt(int_req_port, int_resp_port)
+ else:
+ cpu.connect_interrupt()
+ self.l3bus.mem_side_ports = self.l3cache.cpu_side
+ self.membus.cpu_side_ports = self.l3cache.mem_side
+
diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py
new file mode 100644
index 0000000000..ed796214ab
--- /dev/null
+++ b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2023-24 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system ARM Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `STREAM`. The simulation ends when the
+startup is completed successfully.
+
+This config works if the host ARM machine supports KVM. The
+PR https://github.com/gem5/gem5/pull/725 is needed to be present in the source.
+
+Limitations:
+This only works with VExpress_GEM5_V1 and bootloader-v1
+"""
+
+import os
+import sys
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root, VExpress_GEM5_V1, ArmDefaultRelease
+
+from boards.arm_gem5_board import ArmGem5DMBoard
+from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache
+from memories.remote_memory import RemoteChanneledMemory
+from gem5.utils.requires import requires
+from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8
+from gem5.components.memory import DualChannelDDR4_2400
+from gem5.components.memory.multi_channel import *
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# This runs a check to ensure the gem5 binary is compiled for ARM.
+
+requires(isa_required=ISA.ARM)
+
+# defining a new type of memory with latency added. This memory interface can
+# be used as a remote memory interface to simulate disaggregated memory.
+def RemoteDualChannelDDR4_2400(
+ size: Optional[str] = None, remote_offset_latency=300
+) -> AbstractMemorySystem:
+ """
+ A dual channel memory system using DDR4_2400_8x8 based DIMM
+ """
+ return RemoteChanneledMemory(
+ DDR4_2400_8x8,
+ 2,
+ 64,
+ size=size,
+ remote_offset_latency=remote_offset_latency,
+ )
+
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = DualChannelDDR4_2400(size="1GiB")
+# The remote meomry can either be a simple Memory Interface, which is from a
+# different memory arange or it can be a Remote Memory Range, which has an
+# inherent delay while performing reads and writes into that memory. For simple
+# memory, use any MemInterfaces available in gem5 standard library. For remtoe
+# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this
+# config script to extend any existing MemInterface class and add latency value
+# to that memory.
+remote_memory = RemoteDualChannelDDR4_2400(
+ size="1GB", remote_offset_latency=750
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.ARM, num_cores=1)
+release = ArmDefaultRelease()
+platform = VExpress_GEM5_V1()
+# Here we setup the board which allows us to do Full-System ARM simulations.
+board = ArmGem5DMBoard(
+ clk_freq="3GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+ release=release,
+ platform=platform
+)
+
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;",
+ "numactl --membind=0 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --interleave=0,1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --membind=1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "m5 exit;",
+]
+
+board.set_kernel_disk_workload(
+ # NUMA, SPARSEMEM and HOTPLUG enabled kernel.
+ kernel=CustomResource("/home/kaustavg/kernel/arm/linux-6.7/vmlinux"),
+ bootloader=CustomResource(
+ "/home/kaustavg/.cache/gem5/arm64-bootloader"
+ ),
+ disk_image=DiskImageResource(
+ "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img",
+ root_partition="1",
+ ),
+ readfile_contents=" ".join(cmd),
+)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+simulator = Simulator(board=board)
+simulator.run()
diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes.py b/disaggregated_memory/configs/arm-gem5-numa-nodes.py
new file mode 100644
index 0000000000..a36ce52f5d
--- /dev/null
+++ b/disaggregated_memory/configs/arm-gem5-numa-nodes.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system ARM Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `STREAM`. The simulation ends when the
+startup is completed successfully.
+"""
+
+import os
+import sys
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root
+
+from boards.arm_gem5_board import ArmGem5DMBoard
+from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache
+from memories.remote_memory import RemoteChanneledMemory
+from gem5.utils.requires import requires
+from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8
+from gem5.components.memory import DualChannelDDR4_2400
+from gem5.components.memory.multi_channel import *
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# This runs a check to ensure the gem5 binary is compiled for ARM.
+
+requires(isa_required=ISA.ARM)
+
+# defining a new type of memory with latency added. This memory interface can
+# be used as a remote memory interface to simulate disaggregated memory.
+def RemoteDualChannelDDR4_2400(
+ size: Optional[str] = None, remote_offset_latency=300
+) -> AbstractMemorySystem:
+ """
+ A dual channel memory system using DDR4_2400_8x8 based DIMM
+ """
+ return RemoteChanneledMemory(
+ DDR4_2400_8x8,
+ 2,
+ 64,
+ size=size,
+ remote_offset_latency=remote_offset_latency,
+ )
+
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = DualChannelDDR4_2400(size="1GiB")
+# The remote meomry can either be a simple Memory Interface, which is from a
+# different memory arange or it can be a Remote Memory Range, which has an
+# inherent delay while performing reads and writes into that memory. For simple
+# memory, use any MemInterfaces available in gem5 standard library. For remtoe
+# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this
+# config script to extend any existing MemInterface class and add latency value
+# to that memory.
+remote_memory = RemoteDualChannelDDR4_2400(
+ size="1GB", remote_offset_latency=750
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=1)
+# Here we setup the board which allows us to do Full-System ARM simulations.
+board = ArmGem5DMBoard(
+ clk_freq="3GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;",
+ "numactl --membind=0 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --interleave=0,1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --membind=1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "m5 exit;",
+]
+
+board.set_kernel_disk_workload(
+ kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"),
+ bootloader=CustomResource(
+ "/home/kaustavg/.cache/gem5/arm64-bootloader-foundation"
+ ),
+ disk_image=DiskImageResource(
+ "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img",
+ root_partition="1",
+ ),
+ readfile_contents=" ".join(cmd),
+)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+simulator = Simulator(board=board)
+simulator.run()
diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py
new file mode 100644
index 0000000000..8e2414f51f
--- /dev/null
+++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system ARM Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `STREAM`. The simulation ends when the
+startup is completed successfully.
+
+* This script has to be executed from SST
+"""
+
+import os
+import sys
+import argparse
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root, AddrRange
+
+from boards.arm_sst_board import ArmSstDMBoard
+from cachehierarchies.dm_caches_sst import (
+ ClassicPrivateL1PrivateL2SharedL3SstDMCache
+)
+from memories.external_remote_memory import ExternalRemoteMemoryInterface
+from gem5.utils.requires import requires
+from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# SST passes a couple of arguments for this system to simulate.
+parser = argparse.ArgumentParser()
+parser.add_argument("--command", type=str, help="Command run by guest")
+parser.add_argument(
+ "--cpu-type",
+ type=str,
+ choices=["atomic", "timing", "o3"],
+ default="atomic",
+ help="CPU type",
+)
+parser.add_argument(
+ "--cpu-clock-rate",
+ type=str,
+ required=True,
+ help="CPU Clock",
+)
+parser.add_argument(
+ "--local-memory-size",
+ type=str,
+ required=True,
+ help="Local memory size",
+)
+parser.add_argument(
+ "--remote-memory-addr-range",
+ type=str,
+ required=True,
+ help="Remote memory range",
+)
+parser.add_argument(
+ "--remote-memory-latency",
+ type=int,
+ required=True,
+ help="Remote memory latency in Ticks (has to be converted prior)",
+)
+args = parser.parse_args()
+cpu_type = {
+ "o3" : CPUTypes.O3,
+ "atomic": CPUTypes.ATOMIC,
+ "timing": CPUTypes.TIMING}[args.cpu_type]
+
+remote_memory_range = list(map(int, args.remote_memory_addr_range.split(",")))
+remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1])
+
+# This runs a check to ensure the gem5 binary is compiled for RISCV.
+requires(isa_required=ISA.ARM)
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3SstDMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+
+local_memory = SingleChannelDDR4_2400(size=args.local_memory_size)
+
+# Either suppy the size of the remote memory or the address range of the
+# remote memory. Since this is inside the external memory, it does not matter
+# what type of memory is being simulated. This can either be initialized with
+# a size or a memory address range, which is mroe flexible. Adding remote
+# memory latency automatically adds a non-coherent crossbar to simulate latenyc
+
+remote_memory = ExternalRemoteMemoryInterface(
+ addr_range=remote_memory_range,
+ remote_memory_latency=args.remote_memory_latency
+)
+
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(
+ cpu_type=CPUTypes.O3, isa=ISA.ARM, num_cores=4
+)
+
+# Here we setup the board which allows us to do Full-System ARM simulations.
+board = ArmSstDMBoard(
+ clk_freq=args.cpu_clock_rate,
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;",
+ "numactl --membind=0 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --interleave=0,1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "numactl --membind=1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" +
+ "stream.hw.m5 1000000;",
+ "numastat;",
+ "m5 exit;",
+]
+
+workload = CustomWorkload(
+ function="set_kernel_disk_workload",
+ parameters={
+ "kernel" : CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"),
+ "bootloader" : CustomResource("/home/kaustavg/.cache/gem5/arm64-bootloader-foundation"),
+ "disk_image" : DiskImageResource(
+ "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img",
+ # local_path="/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img",
+ root_partition="1",
+ ),
+ "readfile_contents" : " ".join(cmd)
+ },
+)
+# This disk image needs to have NUMA tools installed.
+board.set_workload(workload)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+board._pre_instantiate()
+root = Root(full_system=True, board=board)
+board._post_instantiate()
+m5.instantiate()
diff --git a/disaggregated_memory/configs/riscv-gem5-numa-nodes.py b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py
new file mode 100644
index 0000000000..e011298803
--- /dev/null
+++ b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py
@@ -0,0 +1,130 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system RISCV Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `numastat`. The simulation ends when the
+startup is completed successfully.
+"""
+
+import os
+import sys
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root
+
+from boards.riscv_gem5_board import RiscvGem5DMBoard
+from cachehierarchies.dm_caches import ClassicPL1PL2DMCache
+from gem5.utils.requires import requires
+from gem5.components.memory import DualChannelDDR4_2400
+from memories.remote_memory import RemoteChanneledMemory
+from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# This runs a check to ensure the gem5 binary is compiled for RISCV.
+
+requires(isa_required=ISA.RISCV)
+
+# defining a new type of memory with latency added.
+def RemoteDualChannelDDR4_2400(
+ size: Optional[str] = None, remote_offset_latency=300
+) -> AbstractMemorySystem:
+ """
+ A dual channel memory system using DDR4_2400_8x8 based DIMM
+ """
+ return RemoteChanneledMemory(
+ DDR4_2400_8x8,
+ 2,
+ 64,
+ size=size,
+ remote_offset_latency=remote_offset_latency,
+ )
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = DualChannelDDR4_2400(size="2GiB")
+# The remote meomry can either be a simple Memory Interface, which is from a
+# different memory arange or it can be a Remote Memory Range, which has an
+# inherent delay while performing reads and writes into that memory. For simple
+# memory, use any MemInterfaces available in gem5 standard library. For remtoe
+# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this
+# config script to extend any existing MemInterface class and add latency value
+# to that memory.
+remote_memory = RemoteDualChannelDDR4_2400(
+ size="1GB", remote_offset_latency=750
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(
+ cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1
+)
+# Here we setup the board which allows us to do Full-System RISCV simulations.
+board = RiscvGem5DMBoard(
+ clk_freq="3GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;"
+ "m5 exit;"
+]
+workload = CustomWorkload(
+ function="set_kernel_disk_workload",
+ parameters={
+ "disk_image": DiskImageResource(
+ local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img",
+ root_partition="1",
+ ),
+ "kernel": CustomResource(
+ "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl"
+ ),
+ "readfile_contents": " ".join(cmd),
+ },
+)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+board.set_workload(workload)
+simulator = Simulator(board=board)
+simulator.run()
diff --git a/disaggregated_memory/configs/riscv-sst-numa-nodes.py b/disaggregated_memory/configs/riscv-sst-numa-nodes.py
new file mode 100644
index 0000000000..2cf0ddb788
--- /dev/null
+++ b/disaggregated_memory/configs/riscv-sst-numa-nodes.py
@@ -0,0 +1,164 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system RISCV Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `numastat`. The simulation ends when the
+startup is completed successfully.
+
+* This script has to be executed from SST
+"""
+
+import os
+import sys
+import argparse
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root, AddrRange
+
+from boards.riscv_sst_board import RiscvSstDMBoard
+from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache
+from memories.external_remote_memory import ExternalRemoteMemoryInterface
+
+from gem5.utils.requires import requires
+from gem5.components.memory import DualChannelDDR4_2400
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# SST passes a couple of arguments for this system to simulate.
+parser = argparse.ArgumentParser()
+parser.add_argument("--command", type=str, help="Command run by guest")
+parser.add_argument(
+ "--cpu-type",
+ type=str,
+ choices=["atomic", "timing", "o3"],
+ default="atomic",
+ help="CPU type",
+)
+parser.add_argument(
+ "--cpu-clock-rate",
+ type=str,
+ required=True,
+ help="CPU Clock",
+)
+parser.add_argument(
+ "--local-memory-size",
+ type=str,
+ required=True,
+ help="Local memory size",
+)
+parser.add_argument(
+ "--remote-memory-addr-range",
+ type=str,
+ required=True,
+ help="Remote memory range",
+)
+parser.add_argument(
+ "--remote-memory-latency",
+ type=int,
+ required=True,
+ help="Remote memory latency in Ticks (has to be converted prior)",
+)
+args = parser.parse_args()
+cpu_type = {
+ "o3" : CPUTypes.O3,
+ "atomic": CPUTypes.ATOMIC,
+ "timing": CPUTypes.TIMING}[args.cpu_type]
+
+remote_memory_range = list(map(int, args.remote_memory_range.split(",")))
+remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1])
+
+# This runs a check to ensure the gem5 binary is compiled for RISCV.
+requires(isa_required=ISA.RISCV)
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = DualChannelDDR4_2400(size=args.local_memory_range)
+# Either suppy the size of the remote memory or the address range of the
+# remote memory. Since this is inside the external memory, it does not matter
+# what type of memory is being simulated. This can either be initialized with
+# a size or a memory address range, which is mroe flexible. Adding remote
+# memory latency automatically adds a non-coherent crossbar to simulate latenyc
+remote_memory = ExternalRemoteMemoryInterface(
+ addr_range=remote_memory_range,
+ remote_memory_latency=args.remote_memory_latency
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(
+ cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1
+)
+# Here we setup the board which allows us to do Full-System RISCV simulations.
+board = RiscvSstDMBoard(
+ clk_freq="3GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;",
+ "m5 exit;"
+]
+
+workload = CustomWorkload(
+ function="set_kernel_disk_workload",
+ parameters={
+ "disk_image": DiskImageResource(
+ local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img",
+ root_partition="1",
+ ),
+ "kernel": CustomResource(
+ "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl"
+ ),
+ "readfile_contents": " ".join(cmd),
+ },
+)
+# This disk image needs to have NUMA tools installed.
+board.set_workload(workload)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+board._pre_instantiate()
+root = Root(full_system=True, board=board)
+board._post_instantiate()
+m5.instantiate()
diff --git a/disaggregated_memory/configs/x86-gem5-numa-nodes.py b/disaggregated_memory/configs/x86-gem5-numa-nodes.py
new file mode 100644
index 0000000000..2c03f13160
--- /dev/null
+++ b/disaggregated_memory/configs/x86-gem5-numa-nodes.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2023-24 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system ARM Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `STREAM`. The simulation ends when the
+startup is completed successfully.
+"""
+
+import os
+import sys
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root
+
+from boards.x86_gem5_board import X86Gem5DMBoard
+from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2DMCache, ClassicPrivateL1PrivateL2SharedL3DMCache
+from memories.remote_memory import RemoteChanneledMemory
+from gem5.utils.requires import requires
+from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8
+from gem5.components.memory import SingleChannelDDR4_2400
+from gem5.components.memory.multi_channel import *
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# This runs a check to ensure the gem5 binary is compiled for ARM.
+
+requires(isa_required=ISA.X86)
+
+# defining a new type of memory with latency added. This memory interface can
+# be used as a remote memory interface to simulate disaggregated memory.
+def RemoteDualChannelDDR4_2400(
+ size: Optional[str] = None, remote_offset_latency=300
+) -> AbstractMemorySystem:
+ """
+ A dual channel memory system using DDR4_2400_8x8 based DIMM
+ """
+ return RemoteChanneledMemory(
+ DDR4_2400_8x8,
+ 1,
+ 64,
+ size=size,
+ remote_offset_latency=remote_offset_latency,
+ )
+
+# Here we setup the parameters of the l1 and l2 caches.
+# cache_hierarchy = ClassicPrivateL1PrivateL2DMCache(
+# l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB"
+# )
+cache_hierarchy = ClassicPrivateL1PrivateL2DMCache(
+ l1d_size="32KiB",
+ l1i_size="32KiB",
+ l2_size="256KiB",
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = SingleChannelDDR4_2400(size="1GiB")
+# The remote meomry can either be a simple Memory Interface, which is from a
+# different memory arange or it can be a Remote Memory Range, which has an
+# inherent delay while performing reads and writes into that memory. For simple
+# memory, use any MemInterfaces available in gem5 standard library. For remtoe
+# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this
+# config script to extend any existing MemInterface class and add latency value
+# to that memory.
+remote_memory = RemoteDualChannelDDR4_2400(
+ size="1GB", remote_offset_latency=1050
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.X86, num_cores=1)
+# Here we setup the board which allows us to do Full-System ARM simulations.
+board = X86Gem5DMBoard(
+ clk_freq="3GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "bin/bash"
+]
+
+# "numastat;",
+# "m5 dumpresetstats 0 ;",
+# # "numactl --preferred=0 -- " +
+# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+# "1000000;",
+# "numastat;",
+# "m5 dumpresetstats 0;",
+# "numactl --interleave=0,1 -- " +
+# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+# "1000000;",
+# "numastat;",
+# "m5 dumpresetstats 0;",
+# "numactl --membind=1 -- " +
+# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+# "1000000;",
+# "numastat;",
+# "m5 dumpresetstats 0;",
+# "m5 exit;",
+# ]
+board.set_kernel_disk_workload(
+ # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"),
+ # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"),
+ kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"),
+ # bootloader=CustomResource(
+ # "/home/kaustavg/.cache/gem5/x86-npb"
+ # ),
+ disk_image=DiskImageResource(
+ "/home/kaustavg/.cache/gem5/x86-ubuntu-img",
+ root_partition="1",
+ ),
+ readfile_contents=" ".join(cmd),
+)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+simulator = Simulator(board=board)
+simulator.run()
+simulator.run()
diff --git a/disaggregated_memory/configs/x86-sst-numa-nodes.py b/disaggregated_memory/configs/x86-sst-numa-nodes.py
new file mode 100644
index 0000000000..e8d80ba434
--- /dev/null
+++ b/disaggregated_memory/configs/x86-sst-numa-nodes.py
@@ -0,0 +1,129 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This script shows an example of running a full system ARM Ubuntu boot
+simulation using the gem5 library. This simulation boots Ubuntu 20.04 using
+1 TIMING CPU cores and executes `STREAM`. The simulation ends when the
+startup is completed successfully.
+
+* This script has to be executed from SST
+"""
+
+import os
+import sys
+
+# all the source files are one directory above.
+sys.path.append(
+ os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))
+)
+
+import m5
+from m5.objects import Root, AddrRange
+
+from boards.x86_sst_board import X86SstDMBoard
+from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache
+from memories.external_remote_memory import ExternalRemoteMemoryInterface
+from gem5.utils.requires import requires
+from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.simulate.simulator import Simulator
+from gem5.resources.workload import Workload
+from gem5.resources.workload import *
+from gem5.resources.resource import *
+
+# This runs a check to ensure the gem5 binary is compiled for RISCV.
+requires(isa_required=ISA.X86)
+# Here we setup the parameters of the l1 and l2 caches.
+cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache(
+ l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB"
+)
+# Memory: Dual Channel DDR4 2400 DRAM device.
+local_memory = SingleChannelDDR4_2400(size="1GiB")
+# Either suppy the size of the remote memory or the address range of the
+# remote memory. Since this is inside the external memory, it does not matter
+# what type of memory is being simulated. This can either be initialized with
+# a size or a memory address range, which is mroe flexible. Adding remote
+# memory latency automatically adds a non-coherent crossbar to simulate latenyc
+remote_memory = ExternalRemoteMemoryInterface(
+ addr_range=AddrRange(0x40000000, size="1GiB"), remote_memory_latency=0
+)
+# Here we setup the processor. We use a simple processor.
+processor = SimpleProcessor(
+ cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1
+)
+# Here we setup the board which allows us to do Full-System ARM simulations.
+board = X86SstDMBoard(
+ clk_freq="1GHz",
+ processor=processor,
+ local_memory=local_memory,
+ remote_memory=remote_memory,
+ cache_hierarchy=cache_hierarchy,
+)
+
+cmd = [
+ "mount -t sysfs - /sys;",
+ "mount -t proc - /proc;",
+ "numastat;",
+ "m5 dumpresetstats 0 ;",
+ "numactl --cpubind=0 --membind=0 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+ "1000000;",
+ "m5 dumpresetstats 0;",
+ "numactl --cpubind=0 --membind=0,1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+ "1000000;",
+ "m5 dumpresetstats 0;",
+ "numactl --cpubind=0 --membind=1 -- " +
+ "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " +
+ "1000000;",
+ "m5 dumpresetstats 0;",
+ "m5 exit;",
+]
+
+board.set_kernel_disk_workload(
+ # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"),
+ # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"),
+ kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"),
+ # bootloader=CustomResource(
+ # "/home/kaustavg/.cache/gem5/x86-npb"
+ # ),
+ disk_image=DiskImageResource(
+ "/home/kaustavg/.cache/gem5/x86-ubuntu-img",
+ root_partition="1",
+ ),
+ readfile_contents=" ".join(cmd),
+)
+# This script will boot two numa nodes in a full system simulation where the
+# gem5 node will be sending instructions to the SST node. the simulation will
+# after displaying numastat information on the terminal, whjic can be viewed
+# from board.terminal.
+board._pre_instantiate()
+root = Root(full_system=True, board=board)
+board._post_instantiate()
+m5.instantiate()
diff --git a/disaggregated_memory/memories/external_remote_memory.py b/disaggregated_memory/memories/external_remote_memory.py
new file mode 100644
index 0000000000..6211f1679a
--- /dev/null
+++ b/disaggregated_memory/memories/external_remote_memory.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""We need a class that extends the outgoing bridge from gem5. The goal
+of this class to have a MemInterface like class in the future, where we'll
+append mem_ranges within this interface."""
+
+import m5
+from m5.util import fatal
+from m5.objects.XBar import NoncoherentXBar
+from m5.objects import OutgoingRequestBridge, AddrRange, Tick
+
+
+class ExternalRemoteMemoryInterface:
+ def __init__(
+ self,
+ size: "str" = None,
+ addr_range: AddrRange = None,
+ remote_memory_latency: Tick = None,
+ ):
+ # We will create a non-coherent cross bar if the user wants to simulate
+ # latency for the remote memory links.
+ self._xbar_required = False
+ # We setup the remote memory with size or address range. This allows us
+ # to quickly scale the setup with N nodes.
+ self._size = None
+ self._set_using_addr_ranges = False
+ self.remote_memory = OutgoingRequestBridge()
+ # The user needs to provide either the size of the remote memory or the
+ # range of the remote memory.
+ if size is None and addr_range is None:
+ fatal("External memory needs to either have a size or a range!")
+ else:
+ if addr_range is not None:
+ self.remote_memory.physical_address_ranges = [addr_range]
+ self._size = self.remote_memory.physical_address_ranges[
+ 0
+ ].size()
+ self._set_using_addr_ranges = True
+ # The size will be setup in the board in case ranges are not given
+ # by the user.
+ else:
+ self._size = size
+
+ # If there is a remote latency specified, create a non_coherent
+ # cross_bar.
+ if remote_memory_latency is not None:
+ self._xbar_required = True
+ self._remote_memory_latency = remote_memory_latency
+
+ def get_size(self):
+ return self._size
+
+ # def set_size(self):
+ # self._size = self.remote_memory.physical_addr_ranges[0].size()
+
+ def is_xbar_required(self):
+ # If an XBar is required, it should be added in the connect_things to
+ # avoid initializing an orphan node.
+ return self._xbar_required
+
+ def get_set_using_addr_ranges(self):
+ return self._set_using_addr_ranges
diff --git a/disaggregated_memory/memories/remote_memory.py b/disaggregated_memory/memories/remote_memory.py
new file mode 100644
index 0000000000..367d29830c
--- /dev/null
+++ b/disaggregated_memory/memories/remote_memory.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+""" Channeled "generic" DDR memory controllers
+"""
+
+import m5
+from gem5.utils.override import overrides
+from m5.objects import AddrRange, DRAMInterface, MemCtrl, Port
+from m5.objects.XBar import NoncoherentXBar
+from typing import Type, Sequence, Tuple, Optional, Union
+
+from gem5.components.memory.memory import ChanneledMemory
+
+
+class RemoteChanneledMemory(ChanneledMemory):
+ def __init__(
+ self,
+ dram_interface_class: Type[DRAMInterface],
+ num_channels: Union[int, str],
+ interleaving_size: Union[int, str],
+ size: Optional[str] = None,
+ addr_mapping: Optional[str] = None,
+ remote_offset_latency: Union[int, str] = 0,
+ ) -> None:
+ self._remote_latency = remote_offset_latency
+ super().__init__(
+ dram_interface_class,
+ num_channels,
+ interleaving_size,
+ size,
+ addr_mapping,
+ )
+
+ @overrides(ChanneledMemory)
+ def _create_mem_interfaces_controller(self):
+ self._dram = [
+ self._dram_class(addr_mapping=self._addr_mapping)
+ for _ in range(self._num_channels)
+ ]
+ self.remote_links = [
+ NoncoherentXBar(
+ frontend_latency=self._remote_latency,
+ forward_latency=0,
+ response_latency=0,
+ width=64,
+ )
+ for _ in range(self._num_channels)
+ ]
+ self.mem_ctrl = [
+ MemCtrl(
+ dram=self._dram[i], port=self.remote_links[i].mem_side_ports
+ )
+ for i in range(self._num_channels)
+ ]
+
+ @overrides(ChanneledMemory)
+ def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+ return [
+ (self.mem_ctrl[i].dram.range, self.remote_links[i].cpu_side_ports)
+ for i in range(self._num_channels)
+ ]
+
+ @overrides(ChanneledMemory)
+ def get_memory_controllers(self):
+ return [
+ (self.remote_links[i].cpu_side_ports)
+ for i in range(self._num_channels)
+ ]
diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md
index ba61996b32..46fa26a8e4 100644
--- a/ext/sst/INSTALL.md
+++ b/ext/sst/INSTALL.md
@@ -1,5 +1,8 @@
# Installing SST
+The links to download SST source code are available at
+.
+This guide is using the most recent SST version (13.0.0) as of September 2023.
The links to download SST source code are available at
.
This guide is using the most recent SST version (13.0.0) as of September 2023.
@@ -12,13 +15,16 @@ installed.
```sh
wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz
-tar xzf sstcore-13.0.0.tar.gz
+tar xvf sstcore-13.0.0.tar.gz
+wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz
+tar xvf sstcore-13.0.0.tar.gz
```
### Installing SST-Core
```sh
cd sstcore-13.0.0
+cd sstcore-13.0.0
./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
--disable-mpi # optional, used when MPI is not available.
make all -j$(nproc)
@@ -37,13 +43,16 @@ export PATH=$SST_CORE_HOME/bin:$PATH
```sh
wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz
-tar xzf sstelements-13.0.0.tar.gz
+tar xvf sstelements-13.0.0.tar.gz
+wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz
+tar xvf sstelements-13.0.0.tar.gz
```
### Installing SST-Elements
```sh
cd sst-elements-library-13.0.0
+cd sst-elements-library-13.0.0
./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \
--with-sst-core=$SST_CORE_HOME
make all -j$(nproc)
@@ -59,8 +68,9 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >>
### Building gem5 library
At the root of the gem5 folder, you need to compile gem5 as a library. This
-varies dependent on which OS you are using. If you're using Linux, then
-execute the following:
+varies which OS you use. If you're using Linux, then type the following:
+At the root of the gem5 folder, you need to compile gem5 as a library. This
+varies which OS you use. If you're using Linux, then type the following:
```sh
scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources
```
@@ -68,7 +78,15 @@ In case you're using Mac, then type the following:
```sh
scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources
```
+In case you're using Mac, then type the following:
+```sh
+scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources
+```
+**Note:**
+* `--without-tcmalloc` is required to avoid a conflict with SST's malloc.
+* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory.
+* The Mac version was tested on a Macbook Air with M2 processor.
**Note:**
* `--without-tcmalloc` is required to avoid a conflict with SST's malloc.
* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory.
@@ -76,18 +94,40 @@ scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-s
### Compiling the SST integration
+Go to the SST directory in the gem5 repo.
Go to the SST directory in the gem5 repo.
```sh
cd ext/sst
```
-Depending on your OS, you need to copy the correct `Makefile.xxx` file to
-`Makefile`.
+According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`.
+```sh
+cp Makefile.xxx Makefile # linux or mac
+make -j4
+```
+If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH`
+```sh
+# go to the base gem5 directory
+cd ../..
+export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/
+```
+
+Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling
+for ARM.
+```
+According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`.
```sh
cp Makefile.xxx Makefile # linux or mac
make -j4
```
-The make file is hardcoded to RISC-V. IN the case you wish to compile to ARM,
-edit the Makefile or pass `ARCH=RISCV` to `ARCH=ARM` while compiling.
+If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH`
+```sh
+# go to the base gem5 directory
+cd ../..
+export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/
+```
+
+Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling
+for ARM.
### Running an example simulation
See `README.md`
diff --git a/ext/sst/Makefile b/ext/sst/Makefile
deleted file mode 100644
index 9213d266e9..0000000000
--- a/ext/sst/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-SST_VERSION=SST-11.1.0 # Name of the .pc file in lib/pkgconfig where SST is installed
-GEM5_LIB=gem5_opt
-ARCH=RISCV
-OFLAG=3
-
-LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH}
-CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext
-CPPFLAGS+=-MMD -MP
-SRC=$(wildcard *.cc)
-
-.PHONY: clean all
-
-all: libgem5.so
-
-libgem5.so: $(SRC:%.cc=%.o)
- ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB}
-
--include $(SRC:%.cc=%.d)
-
-clean:
- ${RM} *.[do] libgem5.so
diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc
index 3ea6127ecd..f9357937a6 100644
--- a/ext/sst/gem5.cc
+++ b/ext/sst/gem5.cc
@@ -182,7 +182,6 @@ gem5Component::gem5Component(SST::ComponentId_t id, SST::Params& params):
// Split the port names using the util method defined.
splitPortNames(ports);
for (int i = 0 ; i < sstPortCount ; i++) {
- std::cout << sstPortNames[i] << std::endl;
sstPorts.push_back(
loadUserSubComponent(sstPortNames[i], 0)
);
@@ -447,4 +446,4 @@ gem5Component::splitPortNames(std::string port_names)
sstPortNames.push_back(strdup(part.c_str()));
sstPortCount++;
}
-}
+}
\ No newline at end of file
diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh
index f9f00beabd..1941691edd 100644
--- a/ext/sst/gem5.hh
+++ b/ext/sst/gem5.hh
@@ -143,8 +143,9 @@ class gem5Component: public SST::Component
SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
// These are the generally expected ports.
- {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"},
- {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"}
+ {"ports",
+ "Connection to gem5's outgoing ports to SST's ports",
+ "gem5.gem5Bridge"}
)
};
diff --git a/ext/sst/sst/arm_example.py b/ext/sst/sst/arm_example.py
index 4bc111cb86..b655e80d8b 100644
--- a/ext/sst/sst/arm_example.py
+++ b/ext/sst/sst/arm_example.py
@@ -35,6 +35,30 @@
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Copyright (c) 2021 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2021 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
import sst
import sys
@@ -50,6 +74,10 @@
# memory currently, we do not subtract 0x80000000 from the request's address to
# get the "real" address so, the mem_size would always be 2GiB larger than the
# desired memory size
+# gem5 will send requests to physical addresses of range [0x80000000, inf) to
+# memory currently, we do not subtract 0x80000000 from the request's address to
+# get the "real" address so, the mem_size would always be 2GiB larger than the
+# desired memory size
memory_size_gem5 = "4GiB"
memory_size_sst = "16GiB"
addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue()
@@ -76,6 +104,17 @@
"cache_port" : "system.memory_outgoing_bridge"
}
+# We need a list of ports.
+port_list = []
+for port in sst_ports:
+ port_list.append(port)
+
+# We keep a track of all the memory ports that we have.
+sst_ports = {
+ "system_port" : "system.system_outgoing_bridge",
+ "cache_port" : "system.memory_outgoing_bridge"
+}
+
# We need a list of ports.
port_list = []
for port in sst_ports:
@@ -86,6 +125,8 @@
"cmd": gem5_command,
"ports" : " ".join(port_list),
"debug_flags" : ""
+ "ports" : " ".join(port_list),
+ "debug_flags" : ""
}
gem5_node = sst.Component("gem5_node", "gem5.gem5Component")
@@ -95,13 +136,19 @@
cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
# for initialization
system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0)
+# for initialization
+system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0)
system_port.addParams({
+ "response_receiver_name": sst_ports["system_port"],
"response_receiver_name": sst_ports["system_port"],
"mem_size": memory_size_sst
})
# SST -> gem5
cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0)
+# SST -> gem5
+cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0)
cache_port.addParams({
+ "response_receiver_name": sst_ports["cache_port"],
"response_receiver_name": sst_ports["cache_port"],
"mem_size": memory_size_sst
})
@@ -113,11 +160,13 @@
# Memory
memctrl = sst.Component("memory", "memHierarchy.MemController")
# `addr_range_end` should be changed accordingly to memory_size_sst
+# `addr_range_end` should be changed accordingly to memory_size_sst
memctrl.addParams({
"debug" : "0",
"clock" : "1GHz",
"request_width" : "64",
"addr_range_end" : addr_range_end,
+ "addr_range_end" : addr_range_end,
})
memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
memory.addParams({
diff --git a/ext/sst/sst/example.py b/ext/sst/sst/example.py
index 1c35bc3f83..e66de07ab2 100644
--- a/ext/sst/sst/example.py
+++ b/ext/sst/sst/example.py
@@ -38,6 +38,10 @@
# memory currently, we do not subtract 0x80000000 from the request's address to
# get the "real" address so, the mem_size would always be 2GiB larger than the
# desired memory size
+# gem5 will send requests to physical addresses of range [0x80000000, inf) to
+# memory currently, we do not subtract 0x80000000 from the request's address to
+# get the "real" address so, the mem_size would always be 2GiB larger than the
+# desired memory size
memory_size_gem5 = "4GiB"
memory_size_sst = "6GiB"
addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue()
@@ -59,6 +63,17 @@
"cache_port" : "system.memory_outgoing_bridge"
}
+# We need a list of ports.
+port_list = []
+for port in sst_ports:
+ port_list.append(port)
+
+# We keep a track of all the memory ports that we have.
+sst_ports = {
+ "system_port" : "system.system_outgoing_bridge",
+ "cache_port" : "system.memory_outgoing_bridge"
+}
+
# We need a list of ports.
port_list = []
for port in sst_ports:
@@ -71,6 +86,11 @@
+ f" --memory-size {memory_size_gem5}",
"debug_flags": "",
"ports" : " ".join(port_list)
+ "cmd": " ../../configs/example/sst/riscv_fs.py"
+ + f" --cpu-clock-rate {cpu_clock_rate}"
+ + f" --memory-size {memory_size_gem5}",
+ "debug_flags": "",
+ "ports" : " ".join(port_list)
}
gem5_node = sst.Component("gem5_node", "gem5.gem5Component")
@@ -84,6 +104,14 @@
# tell the SubComponent the name of the corresponding SimObject
system_port.addParams({ "response_receiver_name": sst_ports["system_port"]})
+# SST -> gem5
+cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0)
+cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]})
+# for initialization
+system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0)
+# tell the SubComponent the name of the corresponding SimObject
+system_port.addParams({ "response_receiver_name": sst_ports["system_port"]})
+
# SST -> gem5
cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0)
cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]})
@@ -95,11 +123,13 @@
# Memory
memctrl = sst.Component("memory", "memHierarchy.MemController")
# `addr_range_end` should be changed accordingly to memory_size_sst
+# `addr_range_end` should be changed accordingly to memory_size_sst
memctrl.addParams({
"debug" : "0",
"clock" : "1GHz",
"request_width" : "64",
"addr_range_end" : addr_range_end,
+ "addr_range_end" : addr_range_end,
})
memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
memory.addParams({
diff --git a/ext/sst/sst/example_arm_dm_board.py b/ext/sst/sst/example_arm_dm_board.py
new file mode 100644
index 0000000000..bbbf2e5172
--- /dev/null
+++ b/ext/sst/sst/example_arm_dm_board.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This SST configuration file tests a merlin router.
+import sst
+import sys
+import os
+
+from sst import UnitAlgebra
+
+cache_link_latency = "1ps"
+cpu_clock_rate = "4.2GHz"
+def connect_components(link_name: str,
+ low_port_name: str, low_port_idx: int,
+ high_port_name: str, high_port_idx: int,
+ port = False, direct_link = False):
+ link = sst.Link(link_name)
+ low_port = "low_network_" + str(low_port_idx)
+ if port == True:
+ low_port = "port"
+ high_port = "high_network_" + str(high_port_idx)
+ if direct_link == True:
+ high_port = "direct_link"
+ link.connect(
+ (low_port_name, low_port, cache_link_latency),
+ (high_port_name, high_port, cache_link_latency)
+ )
+
+# =========================================================================== #
+
+# Define the number of gem5 nodes in the system. anything more than 1 needs
+# mpirun to run the sst binary.
+system_nodes = 2
+
+# Define the total number of SST Memory nodes
+memory_nodes = 1
+
+# This example uses fixed number of node size -> 2 GiB
+# The directory controller decides where the addresses are mapped to.
+node_memory_slice = "2GiB"
+remote_memory_slice = "2GiB"
+
+# SST memory node size. Each system gets a 2 GiB slice of fixed memory.
+sst_memory_size = str(
+ (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB"
+addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue()
+
+# There is one cache bus connecting all gem5 ports to the remote memory.
+mem_bus = sst.Component("membus", "memHierarchy.Bus")
+mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
+
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.setRank(0, 0)
+# `addr_range_end` should be changed accordingly to memory_size_sst
+memctrl.addParams({
+ "debug" : "0",
+ "clock" : "1.2GHz",
+ "request_width" : "64",
+ "addr_range_end" : addr_range_end,
+})
+memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM")
+memory.addParams({
+ "id" : 0,
+ "addrMapper" : "memHierarchy.simpleAddrMapper",
+ "addrMapper.interleave_size" : "64B",
+ "addrMapper.row_size" : "1KiB",
+ "clock" : "1.2GHz",
+ "mem_size" : sst_memory_size,
+ "channels" : 4,
+ "channel.numRanks" : 2,
+ "channel.rank.numBanks" : 16,
+ "channel.rank.bank.TRP" : 14,
+ "printconfig" : 1,
+})
+
+# Add all the Gem5 nodes to this list.
+gem5_nodes = []
+memory_ports = []
+
+# Create each of these nodes and conect it to a SST memory cache
+for node in range(system_nodes):
+ # Each of the nodes needs to have the initial parameters. We might need to
+ # to supply the instance count to the Gem5 side. This will enable range
+ # adjustments to be made to the DTB File.
+ node_range = [0x80000000 + (node + 1) * 0x80000000,
+ 0x80000000 + (node + 2) * 0x80000000]
+ print(node_range)
+ cmd = [
+ f"--outdir=m5out_arm_node_{node}",
+ "../../disaggregated_memory/configs/arm-sst-numa-nodes.py",
+ f"--cpu-clock-rate {cpu_clock_rate}",
+ "--cpu-type o3",
+ f"--local-memory-size {node_memory_slice}",
+ f"--remote-memory-addr-range {node_range[0]},{node_range[1]}",
+ f"--remote-memory-latency \
+ {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}"
+ ]
+ ports = {
+ "remote_memory_port" : "board.remote_memory"
+ }
+ port_list = []
+ for port in ports:
+ port_list.append(port)
+ cpu_params = {
+ "frequency" : cpu_clock_rate,
+ "cmd" : " ".join(cmd),
+ "debug_flags" : "",
+ "ports" : " ".join(port_list)
+ }
+ # Each of the Gem5 node has to be separately simulated. TODO: Figure out
+ # this part on the mpirun side.
+ gem5_nodes.append(
+ sst.Component("gem5_node_{}".format(node), "gem5.gem5Component")
+ )
+ gem5_nodes[node].addParams(cpu_params)
+ gem5_nodes[node].setRank(node + 1, 0)
+
+ memory_ports.append(
+ gem5_nodes[node].setSubComponent(
+ "remote_memory_port", "gem5.gem5Bridge", 0
+ )
+ )
+ memory_ports[node].addParams({
+ "response_receiver_name" : ports["remote_memory_port"]
+ })
+
+ # we dont need directory controllers in this example case. The start and
+ # end ranges does not really matter as the OS is doing this management in
+ # in this case.
+ connect_components(f"node_{node}_mem_port_2_mem_bus",
+ memory_ports[node], 0,
+ mem_bus, node,
+ port = True)
+
+# All system nodes are setup. Now create a SST memory. Keep it simplemem for
+# avoiding extra simulation time. There is only one memory node in SST's side.
+# This will be updated in the future to use number of sst_memory_nodes
+
+connect_components("membus_2_memory",
+ mem_bus, 0,
+ memctrl, 0,
+ direct_link = True)
+
+# enable Statistics
+stat_params = { "rate" : "0ns" }
+sst.setStatisticLoadLevel(10)
+sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./arm-board.txt"})
+sst.enableAllStatisticsForAllComponents()
diff --git a/ext/sst/sst/example_multiISA.py b/ext/sst/sst/example_multiISA.py
new file mode 100644
index 0000000000..5cf5e5030f
--- /dev/null
+++ b/ext/sst/sst/example_multiISA.py
@@ -0,0 +1,182 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This SST configuration file tests a merlin router.
+import sst
+import sys
+import os
+
+from sst import UnitAlgebra
+
+cache_link_latency = "1ps"
+
+cpu_clock_rate = "3GHz"
+
+def connect_components(link_name: str,
+ low_port_name: str, low_port_idx: int,
+ high_port_name: str, high_port_idx: int,
+ port = False, direct_link = False):
+ link = sst.Link(link_name)
+ low_port = "low_network_" + str(low_port_idx)
+ if port == True:
+ low_port = "port"
+ high_port = "high_network_" + str(high_port_idx)
+ if direct_link == True:
+ high_port = "direct_link"
+ link.connect(
+ (low_port_name, low_port, cache_link_latency),
+ (high_port_name, high_port, cache_link_latency)
+ )
+
+# =========================================================================== #
+
+# Define the number of gem5 nodes in the system.
+system_nodes = 2
+
+# Define the total number of SST Memory nodes
+memory_nodes = 1
+
+# This example uses fixed number of node size -> 2 GiB
+# TODO: Fix this in the later version of the script.
+# The directory controller decides where the addresses are mapped to.
+node_memory_slice = "2GiB"
+remote_memory_slice = "2GiB"
+
+# SST memory node size. Each system gets a 2 GiB slice of fixed memory.
+# SST memory node size. Each system gets a 2 GiB slice of fixed memory.
+sst_memory_size = str(
+ (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB"
+addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue()
+
+# There is one cache bus connecting all gem5 ports to the remote memory.
+mem_bus = sst.Component("membus", "memHierarchy.Bus")
+mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
+
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.setRank(0, 0)
+# `addr_range_end` should be changed accordingly to memory_size_sst
+memctrl.addParams({
+ "debug" : "0",
+ "clock" : "2.4GHz",
+ "request_width" : "64",
+ "addr_range_end" : addr_range_end,
+})
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+ "access_time" : "30ns",
+ "mem_size" : sst_memory_size
+})
+
+# Add all the Gem5 nodes to this list.
+gem5_nodes = []
+memory_ports = []
+
+# Create each of these nodes and conect it to a SST memory cache
+for node in range(system_nodes):
+ # Each of the nodes needs to have the initial parameters. We might need to
+ # to supply the instance count to the Gem5 side. This will enable range
+ # adjustments to be made to the DTB File.
+ cmd = []
+ ports = {}
+ script = []
+ # Each of the nodes needs to have the initial parameters. We might need to
+ # to supply the instance count to the Gem5 side. This will enable range
+ # adjustments to be made to the DTB File.
+ node_range = [0x80000000 + (node + 1) * 0x80000000,
+ 0x80000000 + (node + 2) * 0x80000000]
+ if node % 2 == 0:
+ # This is a RISCV node. We need to call the RISCV script.
+ script = [
+ f"--outdir=m5out_riscv_node_{node}",
+ "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py",
+ ]
+ else:
+ script = [
+ f"--outdir=m5out_arm_node_{node}",
+ "../../disaggregated_memory/configs/arm-sst-numa-nodes.py",
+ ]
+
+ cmd = script + [
+ f"--cpu-clock-rate {cpu_clock_rate}",
+ "--cpu-type timing",
+ f"--local-memory-size {node_memory_slice}",
+ f"--remote-memory-addr-range {node_range[0]},{node_range[1]}",
+ f"--remote-memory-latency \
+ {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}"
+ ]
+ ports = {
+ "remote_memory_port" : "board.remote_memory"
+ }
+ port_list = []
+ for port in ports:
+ port_list.append(port)
+ cpu_params = {
+ "frequency" : cpu_clock_rate,
+ "cmd" : " ".join(cmd),
+ "debug_flags" : "",
+ "ports" : " ".join(port_list)
+ }
+ # Each of the Gem5 node has to be separately simulated. TODO: Figure out
+ # this part on the mpirun side.
+ gem5_nodes.append(
+ sst.Component("gem5_node_{}".format(node), "gem5.gem5Component")
+ )
+
+ gem5_nodes[node].addParams(cpu_params)
+ gem5_nodes[node].setRank(node + 1, 0)
+ memory_ports.append(
+ gem5_nodes[node].setSubComponent(
+ "remote_memory_port", "gem5.gem5Bridge", 0
+ )
+ )
+
+ memory_ports[node].addParams({
+ "response_receiver_name" : ports["remote_memory_port"]
+ })
+
+ # we dont need directory controllers in this example case. The start and
+ # end ranges does not really matter as the OS is doing this management in
+ # in this case.
+ connect_components(f"node_{node}_mem_port_2_mem_bus",
+ memory_ports[node], 0,
+ mem_bus, node,
+ port = True)
+
+# All system nodes are setup. Now create a SST memory. Keep it simplemem for
+# avoiding extra simulation time. There is only one memory node in SST's side.
+# This will be updated in the future to use number of sst_memory_nodes
+
+connect_components("membus_2_memory",
+ mem_bus, 0,
+ memctrl, 0,
+ direct_link = True)
+
+# enable Statistics
+stat_params = { "rate" : "0ns" }
+sst.setStatisticLoadLevel(10)
+sst.setStatisticOutput("sst.statOutputTXT",
+ {"filepath" : "./multiISA-board.txt"})
+sst.enableAllStatisticsForAllComponents()
diff --git a/ext/sst/sst/example_riscv_dm_board.py b/ext/sst/sst/example_riscv_dm_board.py
new file mode 100644
index 0000000000..9a6c07b6e6
--- /dev/null
+++ b/ext/sst/sst/example_riscv_dm_board.py
@@ -0,0 +1,161 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This SST configuration file tests a merlin router.
+import sst
+import sys
+import os
+
+from sst import UnitAlgebra
+
+cache_link_latency = "1ps"
+cpu_clock_rate = "4.2GHz"
+def connect_components(link_name: str,
+ low_port_name: str, low_port_idx: int,
+ high_port_name: str, high_port_idx: int,
+ port = False, direct_link = False):
+ link = sst.Link(link_name)
+ low_port = "low_network_" + str(low_port_idx)
+ if port == True:
+ low_port = "port"
+ high_port = "high_network_" + str(high_port_idx)
+ if direct_link == True:
+ high_port = "direct_link"
+ link.connect(
+ (low_port_name, low_port, cache_link_latency),
+ (high_port_name, high_port, cache_link_latency)
+ )
+
+# =========================================================================== #
+
+# Define the number of gem5 nodes in the system. anything more than 1 needs
+# mpirun to run the sst binary.
+system_nodes = 2
+
+# Define the total number of SST Memory nodes
+memory_nodes = 1
+
+# This example uses fixed number of node size -> 2 GiB
+# The directory controller decides where the addresses are mapped to.
+node_memory_slice = "2GiB"
+remote_memory_slice = "2GiB"
+
+# SST memory node size. Each system gets a 2 GiB slice of fixed memory.
+sst_memory_size = str(
+ (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB"
+addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue()
+
+# There is one cache bus connecting all gem5 ports to the remote memory.
+mem_bus = sst.Component("membus", "memHierarchy.Bus")
+mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } )
+
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.setRank(0, 0)
+# `addr_range_end` should be changed accordingly to memory_size_sst
+memctrl.addParams({
+ "debug" : "0",
+ "clock" : "2.4GHz",
+ "request_width" : "64",
+ "addr_range_end" : addr_range_end,
+})
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+ "access_time" : "50ns",
+ "mem_size" : sst_memory_size
+})
+
+# Add all the Gem5 nodes to this list.
+gem5_nodes = []
+memory_ports = []
+
+# Create each of these nodes and conect it to a SST memory cache
+for node in range(system_nodes):
+ # Each of the nodes needs to have the initial parameters. We might need to
+ # to supply the instance count to the Gem5 side. This will enable range
+ # adjustments to be made to the DTB File.
+ node_range = [0x80000000 + (node + 1) * 0x80000000,
+ 0x80000000 + (node + 2) * 0x80000000]
+ cmd = [
+ f"--outdir=m5out_riscv_node_{node}",
+ "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py",
+ f"--cpu-clock-rate {cpu_clock_rate}",
+ "--cpu-type o3",
+ f"--local-memory-size {node_memory_slice}",
+ f"--remote-memory-addr-range {node_range[0]},{node_range[1]}",
+ f"--remote-memory-latency \
+ {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}"
+ ]
+ ports = {
+ "remote_memory_port" : "board.remote_memory"
+ }
+ port_list = []
+ for port in ports:
+ port_list.append(port)
+ cpu_params = {
+ "frequency" : cpu_clock_rate,
+ "cmd" : " ".join(cmd),
+ "debug_flags" : "",
+ "ports" : " ".join(port_list)
+ }
+ # Each of the Gem5 node has to be separately simulated. TODO: Figure out
+ # this part on the mpirun side.
+ gem5_nodes.append(
+ sst.Component("gem5_node_{}".format(node), "gem5.gem5Component")
+ )
+ gem5_nodes[node].addParams(cpu_params)
+ gem5_nodes[node].setRank(node + 1, 0)
+
+ memory_ports.append(
+ gem5_nodes[node].setSubComponent(
+ "remote_memory_port", "gem5.gem5Bridge", 0
+ )
+ )
+ memory_ports[node].addParams({
+ "response_receiver_name" : ports["remote_memory_port"]
+ })
+
+ # we dont need directory controllers in this example case. The start and
+ # end ranges does not really matter as the OS is doing this management in
+ # in this case.
+ connect_components(f"node_{node}_mem_port_2_mem_bus",
+ memory_ports[node], 0,
+ mem_bus, node,
+ port = True)
+
+# All system nodes are setup. Now create a SST memory. Keep it simplemem for
+# avoiding extra simulation time. There is only one memory node in SST's side.
+# This will be updated in the future to use number of sst_memory_nodes
+
+connect_components("membus_2_memory",
+ mem_bus, 0,
+ memctrl, 0,
+ direct_link = True)
+
+# enable Statistics
+stat_params = { "rate" : "0ns" }
+sst.setStatisticLoadLevel(10)
+sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./riscv-board.txt"})
+sst.enableAllStatisticsForAllComponents()
diff --git a/ext/sst/sst_responder.hh b/ext/sst/sst_responder.hh
index 5f483be845..e9607bdc84 100644
--- a/ext/sst/sst_responder.hh
+++ b/ext/sst/sst_responder.hh
@@ -37,6 +37,7 @@
#include
#include
+#include
#include
#include
diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc
index 8cd2c04628..2a7a325fc8 100644
--- a/ext/sst/sst_responder_subcomponent.cc
+++ b/ext/sst/sst_responder_subcomponent.cc
@@ -108,6 +108,8 @@ SSTResponderSubComponent::init(unsigned phase)
addr, data.size(), data);
memoryInterface->sendUntimedData(request);
}
+ responseReceiver->initData.clear();
+ responseReceiver->initPhaseComplete(true);
}
memoryInterface->init(phase);
}
@@ -200,11 +202,16 @@ SSTResponderSubComponent::portEventHandler(
responseQueue.push(pkt);
}
} else {
- // we can handle unexpected invalidates, but nothing else.
+ // we can handle a few types of requests.
if (SST::Interfaces::StandardMem::Read* test =
dynamic_cast(request)) {
return;
}
+ else if (SST::Interfaces::StandardMem::ReadResp* test =
+ dynamic_cast(
+ request)) {
+ return;
+ }
else if (SST::Interfaces::StandardMem::WriteResp* test =
dynamic_cast(
request)) {
@@ -241,6 +248,43 @@ SSTResponderSubComponent::handleRecvRespRetry()
void
SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt)
{
+ // SST does not understand what is a functional access in gem5 since SST
+ // only allows functional accesses at init time. Since it
+ // has all the stored in it's memory, any functional access made to SST has
+ // to be correctly handled. The idea here is to convert this functional
+ // access into a timing access and keep the SST memory consistent.
+
+ gem5::Addr addr = pkt->getAddr();
+ uint8_t* ptr = pkt->getPtr();
+ uint64_t size = pkt->getSize();
+
+ // Create a new request to handle this request immediately.
+ SST::Interfaces::StandardMem::Request* request = nullptr;
+
+ // we need a minimal translator here which does reads and writes. Any other
+ // command type is unexpected and the program should crash immediately.
+ switch((gem5::MemCmd::Command)pkt->cmd.toInt()) {
+ case gem5::MemCmd::WriteReq: {
+ std::vector data(ptr, ptr+size);
+ request = new SST::Interfaces::StandardMem::Write(
+ addr, data.size(), data);
+ break;
+ }
+ case gem5::MemCmd::ReadReq: {
+ request = new SST::Interfaces::StandardMem::Read(addr, size);
+ break;
+ }
+ default:
+ panic(
+ "handleRecvFunctional: Unable to convert gem5 packet: %s\n",
+ pkt->cmd.toString()
+ );
+ }
+ if(pkt->req->isUncacheable()) {
+ request->setFlag(
+ SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE);
+ }
+ memoryInterface->send(request);
}
bool
diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc
index 58abfbad46..edff92c923 100644
--- a/src/sst/outgoing_request_bridge.cc
+++ b/src/sst/outgoing_request_bridge.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2021 The Regents of the University of California
+// Copyright (c) 2021-2023 The Regents of the University of California
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -30,6 +30,7 @@
#include
#include
+#include "sim/stats.hh"
#include "base/trace.hh"
namespace gem5
@@ -38,11 +39,13 @@ namespace gem5
OutgoingRequestBridge::OutgoingRequestBridge(
const OutgoingRequestBridgeParams ¶ms) :
SimObject(params),
+ stats(this),
outgoingPort(std::string(name()), this),
sstResponder(nullptr),
physicalAddressRanges(params.physical_address_ranges.begin(),
params.physical_address_ranges.end())
{
+ this->init_phase_bool = false;
}
OutgoingRequestBridge::~OutgoingRequestBridge()
@@ -62,6 +65,7 @@ OutgoingRequestPort::~OutgoingRequestPort()
{
}
+
void
OutgoingRequestBridge::init()
{
@@ -96,7 +100,14 @@ OutgoingRequestBridge::setResponder(SSTResponderInterface* responder)
bool
OutgoingRequestBridge::sendTimingResp(gem5::PacketPtr pkt)
{
- return outgoingPort.sendTimingResp(pkt);
+ // see if the responder responded true or false. if it's true, then we
+ // increment the stats counters.
+ bool return_status = outgoingPort.sendTimingResp(pkt);
+ if (return_status == true) {
+ ++stats.numIncomingPackets;
+ stats.sizeIncomingPackets += pkt->getSize();
+ }
+ return return_status;
}
void
@@ -105,19 +116,56 @@ OutgoingRequestBridge::sendTimingSnoopReq(gem5::PacketPtr pkt)
outgoingPort.sendTimingSnoopReq(pkt);
}
+void
+OutgoingRequestBridge::initPhaseComplete(bool value) {
+ init_phase_bool = value;
+}
+bool
+OutgoingRequestBridge::getInitPhaseStatus() {
+ return init_phase_bool;
+}
void
OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt)
{
- uint8_t* ptr = pkt->getPtr();
- uint64_t size = pkt->getSize();
- std::vector data(ptr, ptr+size);
- initData.push_back(std::make_pair(pkt->getAddr(), data));
+ // This should not receive any functional accesses
+ // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt();
+ // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() <<
+ // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " <<
+ // getInitPhaseStatus() << std::endl;
+ // Check at which stage are we at. If we are at INIT phase, then queue all
+ // these packets.
+ if (!getInitPhaseStatus())
+ {
+ // sstResponder->recvAtomic(pkt);
+ uint8_t* ptr = pkt->getPtr();
+ uint64_t size = pkt->getSize();
+ std::vector data(ptr, ptr+size);
+ initData.push_back(std::make_pair(pkt->getAddr(), data));
+ }
+ // This is the RUN phase. SST does not allow any sendUntimedData (AKA
+ // functional accesses) to it's memory. We need to convert these accesses
+ // to timing to at least store the correct data in the memory.
+ else {
+ // These packets have to translated at runtime. We convert these
+ // packets to timing as its data has to be stored correctly in SST
+ // memory. Otherwise reads from the SST memory will fail. To reproduce
+ // this error, don not handle any functional accesses and the kernel
+ // boot will fail while reading the correct partition from the vda
+ // device.
+
+ // we cannot allow any functional reads to go to SST
+ if (pkt->isRead()) {
+ assert(false && "Outgoing bridge cannot handle functional reads!");
+ }
+ sstResponder->handleRecvFunctional(pkt);
+ }
}
Tick
OutgoingRequestBridge::
OutgoingRequestPort::recvAtomic(PacketPtr pkt)
{
+ // return 0;
assert(false && "OutgoingRequestPort::recvAtomic not implemented");
return Tick();
}
@@ -133,8 +181,19 @@ bool
OutgoingRequestBridge::
OutgoingRequestPort::recvTimingReq(PacketPtr pkt)
{
- owner->sstResponder->handleRecvTimingReq(pkt);
- return true;
+ return owner->handleTiming(pkt);
+}
+
+bool OutgoingRequestBridge::handleTiming(PacketPtr pkt)
+{
+ // see if the responder responded true or false. if it's true, then we
+ // increment the stats counters.
+ bool return_status = sstResponder->handleRecvTimingReq(pkt);
+ if(return_status == true) {
+ ++stats.numOutgoingPackets;
+ stats.sizeOutgoingPackets += pkt->getSize();
+ }
+ return return_status;
}
void
@@ -151,4 +210,16 @@ OutgoingRequestPort::getAddrRanges() const
return owner->physicalAddressRanges;
}
+OutgoingRequestBridge::StatGroup::StatGroup(statistics::Group *parent)
+ : statistics::Group(parent),
+ ADD_STAT(numOutgoingPackets, statistics::units::Count::get(),
+ "Number of packets going out of the gem5 port"),
+ ADD_STAT(sizeOutgoingPackets, statistics::units::Byte::get(),
+ "Cumulative size of all the outgoing packets"),
+ ADD_STAT(numIncomingPackets, statistics::units::Count::get(),
+ "Number of packets coming into the gem5 port"),
+ ADD_STAT(sizeIncomingPackets, statistics::units::Byte::get(),
+ "Cumulative size of all the incoming packets")
+{
+}
}; // namespace gem5
diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh
index af049af45a..dfb2a60dbc 100644
--- a/src/sst/outgoing_request_bridge.hh
+++ b/src/sst/outgoing_request_bridge.hh
@@ -30,6 +30,7 @@
#include
#include
+#include "base/statistics.hh"
#include "mem/port.hh"
#include "params/OutgoingRequestBridge.hh"
#include "sim/sim_object.hh"
@@ -53,6 +54,7 @@ namespace gem5
class OutgoingRequestBridge: public SimObject
{
+
public:
class OutgoingRequestPort: public ResponsePort
{
@@ -69,6 +71,36 @@ class OutgoingRequestBridge: public SimObject
AddrRangeList getAddrRanges() const;
};
+ // We need a boolean variable to distinguish between INIT and RUN phases in
+ // SST. Gem5 does functional accesses to the SST memory when:
+ // (a) It loads the kernel (at the start of the simulation
+ // (b) During VIO/disk accesses.
+ // While loading the kernel, it is easy to handle all functional accesses
+ // as SST allows initializing of untimed data during its INIT phase.
+ // However, functional accesses done to the SST memory during RUN phase has
+ // to handled separately. In this implementation, we convert all such
+ // functional accesses to timing accesses so that it is correctly read from
+ // the memory.
+ bool init_phase_bool;
+
+ public:
+ // we need a statistics counter for this simobject to find out how many
+ // requests were sent to or received from the outgoing port.
+ struct StatGroup : public statistics::Group
+ {
+ StatGroup(statistics::Group *parent);
+ /** Count the number of outgoing packets */
+ statistics::Scalar numOutgoingPackets;
+
+
+ /** Cumulative size of the all outgoing packets */
+ statistics::Scalar sizeOutgoingPackets;
+
+ /** Count the number of incoming packets */
+ statistics::Scalar numIncomingPackets;
+ /** Cumulative size of all the incoming packets */
+ statistics::Scalar sizeIncomingPackets;
+ } stats;
public:
// a gem5 ResponsePort
OutgoingRequestPort outgoingPort;
@@ -85,7 +117,8 @@ class OutgoingRequestBridge: public SimObject
// Required to let the OutgoingRequestPort to send range change request.
void init();
-
+
+ bool handleTiming(PacketPtr pkt);
// Returns the range of addresses that the ports will handle.
// Currently, it will return the range of [0x80000000, inf), which is
// specific to RISCV (SiFive's HiFive boards).
@@ -97,8 +130,18 @@ class OutgoingRequestBridge: public SimObject
// Returns the buffered data for initialization. This is necessary as
// when gem5 sends functional requests to memory for initialization,
// the connection in SST Memory Hierarchy has not been constructed yet.
+ // This buffer is only used during the INIT phase.
std::vector>> getInitData() const;
+ // We need Set/Get functions to set the init_phase_bool.
+ // `initPhaseComplete` is used to signal the outgoing bridge that INIT
+ // phase is completed and RUN phase will start.
+ void initPhaseComplete(bool value);
+
+ // We read the value of the init_phase_bool using `getInitPhaseStatus`
+ // method.
+
+ bool getInitPhaseStatus();
// gem5 Component (from SST) will call this function to let set the
// bridge's corresponding SSTResponderSubComponent (which implemented
// SSTResponderInterface). I.e., this will connect this bridge to the
@@ -115,6 +158,8 @@ class OutgoingRequestBridge: public SimObject
// to SST. Should only be called during the SST construction phase, i.e.
// not at the simulation time.
void handleRecvFunctional(PacketPtr pkt);
+
+
};
}; // namespace gem5