diff --git a/disaggregated_memory/boards/arm_dm_board.py b/disaggregated_memory/boards/arm_dm_board.py new file mode 100644 index 0000000000..fb965d51b2 --- /dev/null +++ b/disaggregated_memory/boards/arm_dm_board.py @@ -0,0 +1,284 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects import ( + Port, + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.arm_board import ArmBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmAbstractDMBoard(ArmBoard): + """ + A high-level ARM board that can zNUMA-capable systems with a remote + memories. This board is extended from the ArmBoard from Gem5 standard + library. This board assumes that you will be booting Linux. This board can + be used to do disaggregated ARM system research while accelerating the + simulation using kvm. + + **Limitations** + * kvm is only supported in a gem5-only setup. + """ + + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + # The structure of this board is similar to the RISCV DM board. + self._localMemory = local_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + self.local_memory = local_memory + + @overrides(ArmBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: The remote memory system. + """ + raise NotImplementedError + + def get_remote_memory_size(self) -> "str": + """Get the remote memory size to setup the NUMA nodes.""" + return self._remoteMemoryAddrRange.size() + + @overrides(ArmBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + """Get the memory (RAM) ports connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: A tuple of mem_ports. + """ + raise NotImplementedError + + def get_remote_memory_addr_range(self): + raise NotImplementedError + return self._remote_memory_range + + @overrides(ArmBoard) + def _setup_board(self) -> None: + + # This board is expected to run full-system simulation. + # Loading ArmFsLinux() from `src/arch/arm/ArmFsWorkload.py` + self.workload = ArmFsLinux() + + # We are fixing the following variable for the ArmSystem to work. The + # security extension is checked while generating the dtb file in + # realview. This board does not have security extension enabled. + self._have_psci = False + + # highest_el_is_64 is set to True. True if the register width of the + # highest implemented exception level is 64 bits. + self.highest_el_is_64 = True + + # Setting up the voltage and the clock domain here for the ARM board. + # The ArmSystem/RealView expects voltage_domain to be a parameter. + # The voltage and the clock frequency are taken from the devices.py + # file from configs/example/arm. We set the clock to the same frequency + # as the user specified in the config script. + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain( + clock=self._clk_freq, voltage_domain=self.voltage_domain + ) + + # The ARM board supports both Terminal and VncServer. + self.terminal = Terminal() + self.vncserver = VncServer() + + # Incoherent I/O Bus + self.iobus = IOXBar() + self.iobus.badaddr_responder = BadAddr() + self.iobus.default = self.iobus.badaddr_responder.pio + + # We now need to setup the dma_ports. + self._dma_ports = None + + # RealView sets up most of the on-chip and off-chip devices and GIC + # for the ARM board. These devices' information is also used to + # generate the dtb file. We then connect the I/O devices to the + # I/O bus. + self._setup_io_devices() + + # Once the realview is setup, we can continue setting up the memory + # ranges. ArmBoard's memory can only be setup once realview is + # initialized. + local_memory = self.get_local_memory() + mem_size = local_memory.get_size() + + # The following code is taken from configs/example/arm/devices.py. It + # sets up all the memory ranges for the board. + self.mem_ranges = [] + success = False + # self.mem_ranges.append(self.get_remote_memory_addr_range()) + for mem_range in self.realview._mem_regions: + size_in_range = min(mem_size, mem_range.size()) + self.mem_ranges.append( + AddrRange(start=mem_range.start, size=size_in_range) + ) + + mem_size -= size_in_range + if mem_size == 0: + success = True + break + + if success: + local_memory.set_memory_range(self.mem_ranges) + else: + raise ValueError("Memory size too big for platform capabilities") + # At the end of the local_memory, append the remote memory range. + self.mem_ranges.append(self._remoteMemoryAddrRange) + + # The PCI Devices. PCI devices can be added via the `_add_pci_device` + # function. + self._pci_devices = [] + + # set remtoe memory in the child board + self._set_remote_memory_ranges() + + def _set_remote_memory_ranges(self): + raise NotImplementedError + + @overrides(ArmSystem) + def generateDeviceTree(self, state): + # Generate a device tree root node for the system by creating the root + # node and adding the generated subnodes of all children. + # When a child needs to add multiple nodes, this is done by also + # creating a node called '/' which will then be merged with the + # root instead of appended. + + def generateMemNode(numa_node_id, mem_range): + node = FdtNode(f"memory@{int(mem_range.start):x}") + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + node.append(FdtPropertyWords("numa-node-id", [numa_node_id])) + return node + + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + + # Add memory nodes + for mem_range in self.mem_ranges: + root.append(generateMemNode(0, mem_range)) + root.append(generateMemNode(1, self._remoteMemoryAddrRange)) + + for node in self.recurseDeviceTree(state): + # Merge root nodes instead of adding them (for children + # that need to add multiple root level nodes) + if node.get_name() == root.get_name(): + root.merge(node) + else: + root.append(node) + + return root + + @overrides(ArmBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + ] diff --git a/disaggregated_memory/boards/arm_gem5_board.py b/disaggregated_memory/boards/arm_gem5_board.py new file mode 100644 index 0000000000..1323256f6f --- /dev/null +++ b/disaggregated_memory/boards/arm_gem5_board.py @@ -0,0 +1,198 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# import os +# import sys + +# # all the source files are one directory above. +# sys.path.append( +# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +# ) + +from m5.objects import ( + Port, + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta + +from memories.remote_memory import RemoteChanneledMemory +from boards.arm_dm_board import ArmAbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmGem5DMBoard(ArmAbstractDMBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + @overrides(ArmAbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(ArmAbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(ArmAbstractDMBoard) + def _set_remote_memory_ranges(self): + self.get_remote_memory().set_memory_range( + [self._remoteMemoryAddrRange] + ) + + @overrides(ArmAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh", + ] + + @overrides(ArmAbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(ArmAbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/arm_sst_board.py b/disaggregated_memory/boards/arm_sst_board.py new file mode 100644 index 0000000000..1e60803ba8 --- /dev/null +++ b/disaggregated_memory/boards/arm_sst_board.py @@ -0,0 +1,239 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# import os +# import sys + +# # all the source files are one directory above. +# sys.path.append( +# os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +# ) + +from m5.objects import ( + Port, + AddrRange, + VoltageDomain, + SrcClockDomain, + NoncoherentXBar, + Terminal, + VncServer, + IOXBar, + BadAddr, + ArmSystem, +) + +from m5.objects.RealView import VExpress_GEM5_Base, VExpress_GEM5_Foundation +from m5.objects.ArmSystem import ArmRelease, ArmDefaultRelease +from m5.objects.ArmFsWorkload import ArmFsLinux + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + +import os +import m5 +from abc import ABCMeta + +from memories.remote_memory import RemoteChanneledMemory +from boards.arm_dm_board import ArmAbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + + +class ArmSstDMBoard(ArmAbstractDMBoard): + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + platform: VExpress_GEM5_Base = VExpress_GEM5_Foundation(), + release: ArmRelease = ArmDefaultRelease(), + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[ + 0 + ], + cache_hierarchy=cache_hierarchy, + platform=platform, + release=release, + ) + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + @overrides(ArmAbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(ArmAbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(ArmAbstractDMBoard) + def _set_remote_memory_ranges(self): + pass + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + @overrides(ArmAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + + # The default kernel string is taken from the devices.py file. + return [ + "console=ttyAMA0", + "lpj=19988480", + "norandmaps", + "root={root_value}", + "rw", + "init=/root/gem5-init.sh", + ] + + @overrides(ArmAbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(ArmAbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/riscv_dm_board.py b/disaggregated_memory/boards/riscv_dm_board.py new file mode 100644 index 0000000000..f9660748e8 --- /dev/null +++ b/disaggregated_memory/boards/riscv_dm_board.py @@ -0,0 +1,482 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from gem5.components.boards.riscv_board import RiscvBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import AddrRange, HiFive, Frequency, Port + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvAbstractDMBoard(RiscvBoard): + """ + A high-level RISCV board that can zNUMA-capable systems with a remote + memories. This board is extended from the RiscvBoard from Gem5 standard + library. At a high-level, this is based on the HiFive Unmatched board from + SiFive. This board assumes that you will be booting Linux. + + **Limitations** + * There is only one Plic and Clint controller supported by this board, + which make this board only capable of simulating zNUMA nodes and not + full-fledged NUMA nodes. + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory_addr_range: AddrRange, + cache_hierarchy: AbstractCacheHierarchy, + ) -> None: + self._localMemory = local_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + memory=local_memory, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: The remote memory system. + """ + raise NotImplementedError + + def get_remote_memory_size(self) -> "str": + """Get the remote memory size to setup the NUMA nodes.""" + return self._remoteMemoryAddrRange.size() + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + """Get the memory (RAM) ports connected to the board. + This has to be implemeted by the child class as we don't know if + this board is simulating Gem5 memory or some external simulator + memory. + :returns: A tuple of mem_ports. + """ + raise NotImplementedError + + @overrides(AbstractSystemBoard) + def _setup_memory_ranges(self): + # the memory has to be setup for both the memory ranges. there is one + # local memory range, close to the host machine and the other range is + # pure memory, far from the host. + local_memory = self.get_local_memory() + # remote_memory = self.get_remote_memory_size() + + local_mem_size = local_memory.get_size() + remote_mem_size = self.get_remote_memory_size() + + # local memory range will always start from 0x80000000. The remote + # memory can start and end anywhere as long as it is consistent + # with the dtb. + self._local_mem_ranges = [ + AddrRange(start=0x80000000, size=local_mem_size) + ] + + # The remote memory starts anywhere after the local memory ends. We + # rely on the user to start and end this range. + self._remote_mem_ranges = [self._remoteMemoryAddrRange] + + # using a _global_ memory range to keep a track of all the memory + # ranges. This is used to generate the dtb for this machine + self._global_mem_ranges = [] + self._global_mem_ranges.append(self._local_mem_ranges[0]) + self._global_mem_ranges.append(self._remote_mem_ranges[0]) + + # setting the memory ranges for both of the memory ranges. we cannot + # incorporate the memory at using this abstract board. + + self._incorporate_memory_range() + + def _incorporate_memory_range(self): + """ + The child board only can incorporate this memory range""" + + raise NotImplementedError( + "Cannot incorporte the memory using an Abstract-like board." + ) + + @overrides(RiscvBoard) + def generate_device_tree(self, outdir: str) -> None: + """Creates the dtb and dts files. + Creates two files in the outdir: 'device.dtb' and 'device.dts' + :param outdir: Directory to output the files + """ + state = FdtState(addr_cells=2, size_cells=2, cpu_cells=1) + root = FdtNode("/") + root.append(state.addrCellsProperty()) + root.append(state.sizeCellsProperty()) + root.appendCompatible(["riscv-virtio"]) + + for idx, mem_range in enumerate(self._global_mem_ranges): + node = FdtNode("memory@%x" % int(mem_range.start)) + node.append(FdtPropertyStrings("device_type", ["memory"])) + node.append( + FdtPropertyWords( + "reg", + state.addrCells(mem_range.start) + + state.sizeCells(mem_range.size()), + ) + ) + # adding the NUMA node information so that the OS can identify all + # the NUMA ranges. + node.append(FdtPropertyWords("numa-node-id", [idx])) + root.append(node) + + # See Documentation/devicetree/bindings/riscv/cpus.txt for details. + cpus_node = FdtNode("cpus") + cpus_state = FdtState(addr_cells=1, size_cells=0) + cpus_node.append(cpus_state.addrCellsProperty()) + cpus_node.append(cpus_state.sizeCellsProperty()) + # Used by the CLINT driver to set the timer frequency. Value taken from + # RISC-V kernel docs (Note: freedom-u540 is actually 1MHz) + cpus_node.append(FdtPropertyWords("timebase-frequency", [100000000])) + + for i, core in enumerate(self.get_processor().get_cores()): + node = FdtNode(f"cpu@{i}") + node.append(FdtPropertyStrings("device_type", "cpu")) + node.append(FdtPropertyWords("reg", state.CPUAddrCells(i))) + # The CPUs are also associated to the NUMA nodes. All the CPUs are + # bound to the first NUMA node. + node.append(FdtPropertyWords("numa-node-id", [0])) + node.append(FdtPropertyStrings("mmu-type", "riscv,sv48")) + node.append(FdtPropertyStrings("status", "okay")) + node.append(FdtPropertyStrings("riscv,isa", "rv64imafdc")) + # TODO: Should probably get this from the core. + freq = self.clk_domain.clock[0].frequency + node.append(FdtPropertyWords("clock-frequency", freq)) + node.appendCompatible(["riscv"]) + int_phandle = state.phandle(f"cpu@{i}.int_state") + node.appendPhandle(f"cpu@{i}") + + int_node = FdtNode("interrupt-controller") + int_state = FdtState(interrupt_cells=1) + int_phandle = int_state.phandle(f"cpu@{i}.int_state") + int_node.append(int_state.interruptCellsProperty()) + int_node.append(FdtProperty("interrupt-controller")) + int_node.appendCompatible("riscv,cpu-intc") + int_node.append(FdtPropertyWords("phandle", [int_phandle])) + + node.append(int_node) + cpus_node.append(node) + + root.append(cpus_node) + + soc_node = FdtNode("soc") + soc_state = FdtState(addr_cells=2, size_cells=2) + soc_node.append(soc_state.addrCellsProperty()) + soc_node.append(soc_state.sizeCellsProperty()) + soc_node.append(FdtProperty("ranges")) + soc_node.appendCompatible(["simple-bus"]) + + # CLINT node + clint = self.platform.clint + clint_node = clint.generateBasicPioDeviceNode( + soc_state, "clint", clint.pio_addr, clint.pio_size + ) + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = soc_state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0x3) + int_extended.append(phandle) + int_extended.append(0x7) + clint_node.append( + FdtPropertyWords("interrupts-extended", int_extended) + ) + # NUMA information is also associated with the CLINT controller. + # In this board, the objective to associate one NUMA node to the CPUs + # and the other node with no CPUs. To generalize this, an additional + # CLINT controller has to be created on this board, which will make it + # completely NUMA, instead of just disaggregated NUMA-like board. + clint_node.append(FdtPropertyWords("numa-node-id", [0])) + clint_node.appendCompatible(["riscv,clint0"]) + soc_node.append(clint_node) + + # PLIC node + plic = self.platform.plic + plic_node = plic.generateBasicPioDeviceNode( + soc_state, "plic", plic.pio_addr, plic.pio_size + ) + + int_state = FdtState(addr_cells=0, interrupt_cells=1) + plic_node.append(int_state.addrCellsProperty()) + plic_node.append(int_state.interruptCellsProperty()) + + phandle = int_state.phandle(plic) + plic_node.append(FdtPropertyWords("phandle", [phandle])) + # Similar to the CLINT interrupt controller, another PLIC controller is + # required to make this board a general NUMA like board. + plic_node.append(FdtPropertyWords("numa-node-id", [0])) + plic_node.append(FdtPropertyWords("riscv,ndev", [plic.n_src - 1])) + + int_extended = list() + for i, core in enumerate(self.get_processor().get_cores()): + phandle = state.phandle(f"cpu@{i}.int_state") + int_extended.append(phandle) + int_extended.append(0xB) + int_extended.append(phandle) + int_extended.append(0x9) + + plic_node.append(FdtPropertyWords("interrupts-extended", int_extended)) + plic_node.append(FdtProperty("interrupt-controller")) + plic_node.appendCompatible(["riscv,plic0"]) + + soc_node.append(plic_node) + + # PCI + pci_state = FdtState( + addr_cells=3, size_cells=2, cpu_cells=1, interrupt_cells=1 + ) + pci_node = FdtNode("pci") + + if int(self.platform.pci_host.conf_device_bits) == 8: + pci_node.appendCompatible("pci-host-cam-generic") + elif int(self.platform.pci_host.conf_device_bits) == 12: + pci_node.appendCompatible("pci-host-ecam-generic") + else: + m5.fatal("No compatibility string for the set conf_device_width") + + pci_node.append(FdtPropertyStrings("device_type", ["pci"])) + + # Cell sizes of child nodes/peripherals + pci_node.append(pci_state.addrCellsProperty()) + pci_node.append(pci_state.sizeCellsProperty()) + pci_node.append(pci_state.interruptCellsProperty()) + # PCI address for CPU + pci_node.append( + FdtPropertyWords( + "reg", + soc_state.addrCells(self.platform.pci_host.conf_base) + + soc_state.sizeCells(self.platform.pci_host.conf_size), + ) + ) + + # Ranges mapping + # For now some of this is hard coded, because the PCI module does not + # have a proper full understanding of the memory map, but adapting the + # PCI module is beyond the scope of what I'm trying to do here. + # Values are taken from the ARM VExpress_GEM5_V1 platform. + ranges = [] + # Pio address range + ranges += self.platform.pci_host.pciFdtAddr(space=1, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_pio_base) + ranges += pci_state.sizeCells(0x10000) # Fixed size + + # AXI memory address range + ranges += self.platform.pci_host.pciFdtAddr(space=2, addr=0) + ranges += soc_state.addrCells(self.platform.pci_host.pci_mem_base) + ranges += pci_state.sizeCells(0x40000000) # Fixed size + pci_node.append(FdtPropertyWords("ranges", ranges)) + + # Interrupt mapping + plic_handle = int_state.phandle(plic) + int_base = self.platform.pci_host.int_base + + interrupts = [] + + for i in range(int(self.platform.pci_host.int_count)): + interrupts += self.platform.pci_host.pciFdtAddr( + device=i, addr=0 + ) + [int(i) + 1, plic_handle, int(int_base) + i] + + pci_node.append(FdtPropertyWords("interrupt-map", interrupts)) + + int_count = int(self.platform.pci_host.int_count) + if int_count & (int_count - 1): + fatal("PCI interrupt count should be power of 2") + + intmask = self.platform.pci_host.pciFdtAddr( + device=int_count - 1, addr=0 + ) + [0x0] + pci_node.append(FdtPropertyWords("interrupt-map-mask", intmask)) + + if self.platform.pci_host._dma_coherent: + pci_node.append(FdtProperty("dma-coherent")) + + soc_node.append(pci_node) + + # UART node + uart = self.platform.uart + uart_node = uart.generateBasicPioDeviceNode( + soc_state, "uart", uart.pio_addr, uart.pio_size + ) + uart_node.append( + FdtPropertyWords("interrupts", [self.platform.uart_int_id]) + ) + uart_node.append(FdtPropertyWords("clock-frequency", [0x384000])) + uart_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + uart_node.appendCompatible(["ns8250"]) + soc_node.append(uart_node) + + # VirtIO MMIO disk node + disk = self.disk + disk_node = disk.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", disk.pio_addr, disk.pio_size + ) + disk_node.append(FdtPropertyWords("interrupts", [disk.interrupt_id])) + disk_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + disk_node.appendCompatible(["virtio,mmio"]) + soc_node.append(disk_node) + + # VirtIO MMIO rng node + rng = self.rng + rng_node = rng.generateBasicPioDeviceNode( + soc_state, "virtio_mmio", rng.pio_addr, rng.pio_size + ) + rng_node.append(FdtPropertyWords("interrupts", [rng.interrupt_id])) + rng_node.append( + FdtPropertyWords("interrupt-parent", soc_state.phandle(plic)) + ) + rng_node.appendCompatible(["virtio,mmio"]) + soc_node.append(rng_node) + + root.append(soc_node) + + fdt = Fdt() + fdt.add_rootnode(root) + fdt.writeDtsFile(os.path.join(outdir, "device.dts")) + fdt.writeDtbFile(os.path.join(outdir, "device.dtb")) + + @overrides(KernelDiskWorkload) + def get_default_kernel_args(self) -> List[str]: + # return ["console=ttyS0", "root={root_value}", + # "init=/root/gem5_init.sh", "rw"] + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + # @overrides(AbstractBoard) + # def _connect_things(self) -> None: + # """Connects all the components to the board. + + # The order of this board is always: + + # 1. Connect the memory. + # 2. Connect the cache hierarchy. + # 3. Connect the processor. + + # Developers may build upon this assumption when creating components. + + # Notes + # ----- + + # * The processor is incorporated after the cache hierarchy due to a bug + # noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + # bug is fixed, this ordering must be maintained. + # * Once this function is called `_connect_things_called` *must* be set + # to `True`. + # """ + + # if self._connect_things_called: + # raise Exception( + # "The `_connect_things` function has already been called." + # ) + + # # Incorporate the memory into the motherboard. + # self.get_local_memory().incorporate_memory(self) + # self.get_remote_memory().incorporate_memory(self) + + # # Incorporate the cache hierarchy for the motherboard. + # if self.get_cache_hierarchy(): + # self.get_cache_hierarchy().incorporate_cache(self) + + # # Incorporate the processor into the motherboard. + # self.get_processor().incorporate_processor(self) + + # self._connect_things_called = True + + # @overrides(AbstractBoard) + # def _post_instantiate(self): + # """Called to set up anything needed after m5.instantiate""" + # self.get_processor()._post_instantiate() + # if self.get_cache_hierarchy(): + # self.get_cache_hierarchy()._post_instantiate() + # self.get_local_memory()._post_instantiate() + # self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/riscv_gem5_board.py b/disaggregated_memory/boards/riscv_gem5_board.py new file mode 100644 index 0000000000..d304d00a5c --- /dev/null +++ b/disaggregated_memory/boards/riscv_gem5_board.py @@ -0,0 +1,187 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.riscv_dm_board import RiscvAbstractDMBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import AddrRange, HiFive, Frequency, Port + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvGem5DMBoard(RiscvAbstractDMBoard): + """ + A board capable of full system simulation for RISC-V + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None, + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + @overrides(RiscvAbstractDMBoard) + def _incorporate_memory_range(self): + # If the memory exists in gem5, then, we need to incorporate this + # memory range. + self.get_local_memory().set_memory_range(self._local_mem_ranges) + self.get_remote_memory().set_memory_range(self._remote_mem_ranges) + + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "init=/root/gem5-init.sh", + "rw", + ] + + @overrides(AbstractBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/riscv_sst_board.py b/disaggregated_memory/boards/riscv_sst_board.py new file mode 100644 index 0000000000..db3b6edae3 --- /dev/null +++ b/disaggregated_memory/boards/riscv_sst_board.py @@ -0,0 +1,244 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from typing import List, Optional, Sequence, Tuple + +from boards.riscv_dm_board import RiscvAbstractDMBoard + +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.utils.override import overrides +from gem5.resources.resource import AbstractResource +from gem5.components.boards.kernel_disk_workload import KernelDiskWorkload +from gem5.components.boards.abstract_system_board import AbstractSystemBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +from gem5.isas import ISA + +import m5 + +from m5.objects import ( + AddrRange, + HiFive, + Frequency, + Port, + OutgoingRequestBridge, + NoncoherentXBar, +) + +from m5.util.fdthelper import ( + Fdt, + FdtNode, + FdtProperty, + FdtPropertyStrings, + FdtPropertyWords, + FdtState, +) + + +class RiscvSstDMBoard(RiscvAbstractDMBoard): + """ + A board capable of full system simulation for multiple RISC-V nodes. + At a high-level, this is based on the HiFive Unmatched board from SiFive. + This board assumes that you will be booting Linux. + + **Limitations** + * Only works with classic caches + """ + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=self._remoteMemory.remote_memory.physical_address_ranges[ + 0 + ], + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + if processor.get_isa() != ISA.RISCV: + raise Exception( + "The RISCVBoard requires a processor using the" + "RISCV ISA. Current processor ISA: " + f"'{processor.get_isa().name}'." + ) + + @overrides(AbstractSystemBoard) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(AbstractSystemBoard) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(RiscvAbstractDMBoard) + def _incorporate_memory_range(self): + self.get_local_memory().set_memory_range(self._local_mem_ranges) + + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return ["console=ttyS0", "root={root_value}", "init=/bin/bash", "rw"] + + @overrides(RiscvAbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(RiscvAbstractDMBoard) + def get_default_kernel_args(self) -> List[str]: + return [ + "console=ttyS0", + "root={root_value}", + "init=/root/gem5-init.sh", + "rw", + ] + + @overrides(AbstractBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory/boards/x86_dm_board.py b/disaggregated_memory/boards/x86_dm_board.py new file mode 100644 index 0000000000..945ca12391 --- /dev/null +++ b/disaggregated_memory/boards/x86_dm_board.py @@ -0,0 +1,357 @@ +# Copyright (c) 2023-24 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Creating an x86 board that can simulate more than 3 GB memory. + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + OutgoingRequestBridge, +) + +import os +import m5 +from abc import ABCMeta +from gem5.components.boards.x86_board import X86Board +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + +class X86AbstractDMBoard(X86Board): + """ + A high-level X86 board that can zNUMA-capable systems with a remote + memories. This board is extended from the ArmBoard from Gem5 standard + library. This board assumes that you will be booting Linux. This board can + be used to do disaggregated ARM system research while accelerating the + simulation using kvm. + + The reason this board was created was to leverage the features X86 ISA has + over ARM and RISCV, e.g. memory hotplug and ACPI driver support in gem5. + + **Limitations** + * kvm is only supported in a gem5-only setup. + """ + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + cache_hierarchy: AbstractCacheHierarchy, + local_memory: AbstractMemorySystem, + remote_memory_addr_range: AddrRange, + ) -> None: + # The structure of this board is similar to the RISCV DM board. + self._localMemory = local_memory + # remote_memory can either be an interface or an external memory + # This abstract disaggregated memory does not know what this type of + # memory is. it only needs to know the address range for this memory. + # from this range, we'll figure out the size. + self._remoteMemoryAddrRange = remote_memory_addr_range + super().__init__( + clk_freq=clk_freq, + processor=processor, + cache_hierarchy=cache_hierarchy, + memory=local_memory, + ) + self.local_memory = local_memory + + @overrides(X86Board) + def get_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + + :returns: The memory system. + """ + raise NotImplementedError + + def get_local_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The local memory system. + """ + return self._localMemory + + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + # raise Exception("cannot call this method") + return self._remoteMemory + + @overrides(X86Board) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_local_memory().get_mem_ports() + + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + # return self.get_remote_memory().get_mem_ports() + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + @overrides(X86Board) + def _setup_memory_ranges(self): + # Need to create 2 entries for the memory ranges. + # make the local memory as 3 gb for now. + + self.get_local_memory().set_memory_range([AddrRange(start=0x0, size="1GiB")]) + self.get_remote_memory().set_memory_range([AddrRange(start=0x40000000, size="1GiB")]) + # remote_memory = self.get_remote_memory() + + # local_mem_size = local_memory.get_size() + # remote_mem_size = remote_memory.get_size() + + # self._local_mem_ranges = [ + # "2GiB" + # # AddrRange(local_mem_size) + # ] + + # The remote memory starts where the local memory ends. Therefore it + # has to be offset by the local memory's size. + # self._remote_mem_ranges = [ + # AddrRange(start=0x100000000, size=remote_mem_size) + # # AddrRange(remote_mem_size) + # ] + # Keep it under 2 GB for this case. Each slice of memory is 1 GB. + + self.mem_ranges = [ + AddrRange(start=0x0, size="1GiB"), + AddrRange(start=0x40000000, size="1GiB"), + # AddrRange(0xC0000000, size=0x100000), # For I/0 + ] + + @overrides(X86Board) + def get_default_kernel_args(self) -> List[str]: + return [ + "earlyprintk=ttyS0", + "console=ttyS0", + "lpj=7999923", + "root=/dev/sda1", + # "numa=fake=2", + # "movable_node", + # "kernelcore=1G", + # "mem=1G", + "memmap=1G@0x0" + # "init=/bin/bash", + ] + + @overrides(X86Board) + def _setup_io_devices(self): + """Sets up the x86 IO devices. + + Note: This is mostly copy-paste from prior X86 FS setups. Some of it + may not be documented and there may be bugs. + """ + + # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + # Setup memory system specific settings. + if self.get_cache_hierarchy().is_ruby(): + self.pc.attachIO(self.get_io_bus(), [self.pc.south_bridge.ide.dma]) + else: + self.bridge = Bridge(delay="50ns") + self.bridge.mem_side_port = self.get_io_bus().cpu_side_ports + self.bridge.cpu_side_port = ( + self.get_cache_hierarchy().get_mem_side_port() + ) + + # # Constants similar to x86_traits.hh + IO_address_space_base = 0x8000000000000000 + pci_config_address_space_base = 0xC000000000000000 + interrupts_address_space_base = 0xA000000000000000 + APIC_range_size = 1 << 12 + + self.bridge.ranges = [ + AddrRange(0xC0000000, 0xFFFF0000), + AddrRange( + IO_address_space_base, interrupts_address_space_base - 1 + ), + AddrRange(pci_config_address_space_base, Addr.max), + ] + + self.apicbridge = Bridge(delay="50ns") + self.apicbridge.cpu_side_port = self.get_io_bus().mem_side_ports + self.apicbridge.mem_side_port = ( + self.get_cache_hierarchy().get_cpu_side_port() + ) + self.apicbridge.ranges = [ + AddrRange( + interrupts_address_space_base, + interrupts_address_space_base + + self.get_processor().get_num_cores() * APIC_range_size + - 1, + ) + ] + self.pc.attachIO(self.get_io_bus()) + + # Add in a Bios information structure. + self.workload.smbios_table.structures = [X86SMBiosBiosInformation()] + + # Set up the Intel MP table + base_entries = [] + ext_entries = [] + for i in range(self.get_processor().get_num_cores()): + bp = X86IntelMPProcessor( + local_apic_id=i, + local_apic_version=0x14, + enable=True, + bootstrap=(i == 0), + ) + base_entries.append(bp) + + io_apic = X86IntelMPIOAPIC( + id=self.get_processor().get_num_cores(), + version=0x11, + enable=True, + address=0xFEC00000, + ) + + self.pc.south_bridge.io_apic.apic_id = io_apic.id + base_entries.append(io_apic) + pci_bus = X86IntelMPBus(bus_id=0, bus_type="PCI ") + base_entries.append(pci_bus) + isa_bus = X86IntelMPBus(bus_id=1, bus_type="ISA ") + base_entries.append(isa_bus) + connect_busses = X86IntelMPBusHierarchy( + bus_id=1, subtractive_decode=True, parent_bus=0 + ) + ext_entries.append(connect_busses) + + pci_dev4_inta = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=0, + source_bus_irq=0 + (4 << 2), + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=16, + ) + + base_entries.append(pci_dev4_inta) + + def assignISAInt(irq, apicPin): + + assign_8259_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="ExtInt", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=0, + ) + base_entries.append(assign_8259_to_apic) + + assign_to_apic = X86IntelMPIOIntAssignment( + interrupt_type="INT", + polarity="ConformPolarity", + trigger="ConformTrigger", + source_bus_id=1, + source_bus_irq=irq, + dest_io_apic_id=io_apic.id, + dest_io_apic_intin=apicPin, + ) + base_entries.append(assign_to_apic) + + assignISAInt(0, 2) + assignISAInt(1, 1) + + for i in range(3, 15): + assignISAInt(i, i) + + self.workload.intel_mp_table.base_entries = base_entries + self.workload.intel_mp_table.ext_entries = ext_entries + + entries = [ + # Mark the first megabyte of memory as reserved + X86E820Entry(addr=0, size="639kB", range_type=1), + X86E820Entry(addr=0x9FC00, size="385kB", range_type=2), + # Mark the rest of physical memory as available + X86E820Entry( + addr=0x100000, + size=f"{self.mem_ranges[0].size() - 0x100000:d}B", + range_type=1, + ) + ] + # Reserve the last 16kB of the 32-bit address space for m5ops + entries.append( + X86E820Entry( + addr=0x40000000, + size="%dB" % (self.mem_ranges[0].size()), + range_type=5, + ) + ) + entries.append(X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2)) + # X86E820Entry( + # addr=0x100000000, + # size=f"{self.mem_ranges[1].size()}B", + # range_type=1, + # ), + # print("____", self.mem_ranges[0].size() + 0x100000) + + # Reserve the last 16kB of the 32-bit address space for m5ops + # entries.append( + # X86E820Entry(addr=0xFFFF0000, size="64kB", range_type=2) + # ) + + print(entries) + print() + self.workload.e820_table.entries = entries diff --git a/disaggregated_memory/boards/x86_gem5_board.py b/disaggregated_memory/boards/x86_gem5_board.py new file mode 100644 index 0000000000..c9fa77ea80 --- /dev/null +++ b/disaggregated_memory/boards/x86_gem5_board.py @@ -0,0 +1,199 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + OutgoingRequestBridge, +) + +import os +import m5 + +from abc import ABCMeta + +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.x86_dm_board import X86AbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +class X86Gem5DMBoard(X86AbstractDMBoard): + + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: AbstractMemorySystem, + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None + ) -> None: + self._localMemory = local_memory + self._remoteMemory = remote_memory + # If the remote_memory_addr_range is not provided, we'll assume that + # it starts at 0x80000000 + local_memory_size and ends at it's own size + if remote_memory_addr_range is None: + remote_memory_addr_range = AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = remote_memory + + @overrides(X86AbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(X86AbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return self.get_remote_memory().get_mem_ports() + + # @overrides(X86AbstractDMBoard) + # def _set_remote_memory_ranges(self): + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + # @overrides(X86AbstractDMBoard) + # def get_default_kernel_args(self) -> List[str]: + + # # The default kernel string is taken from the devices.py file. + # return [ + # "console=ttyAMA0", + # "lpj=19988480", + # "norandmaps", + # "root={root_value}", + # "rw", + # "init=/root/gem5-init.sh", + # "kernelcore=2048M" + # ] + + + @overrides(X86AbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + self.get_remote_memory().incorporate_memory(self) + + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(X86AbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() + self.get_remote_memory()._post_instantiate() + + # print("____", self.remote_memory.mem_ctrl0.dram) diff --git a/disaggregated_memory/boards/x86_sst_board.py b/disaggregated_memory/boards/x86_sst_board.py new file mode 100644 index 0000000000..a32275ad24 --- /dev/null +++ b/disaggregated_memory/boards/x86_sst_board.py @@ -0,0 +1,235 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from m5.objects import ( + AddrRange, + VoltageDomain, + SrcClockDomain, + Terminal, + VncServer, + IOXBar, + BadAddr, + Port, + Pc, + AddrRange, + X86FsLinux, + Addr, + X86SMBiosBiosInformation, + X86IntelMPProcessor, + X86IntelMPIOAPIC, + X86IntelMPBus, + X86IntelMPBusHierarchy, + X86IntelMPIOIntAssignment, + X86E820Entry, + Bridge, + IOXBar, + IdeDisk, + CowDiskImage, + RawDiskImage, + BaseXBar, + Port, + NoncoherentXBar, + OutgoingRequestBridge, +) + +import os +import m5 + +from abc import ABCMeta + +from gem5.utils.override import overrides + +from typing import List, Sequence, Tuple + +from memories.remote_memory import RemoteChanneledMemory +from boards.x86_dm_board import X86AbstractDMBoard + +from gem5.components.processors.abstract_processor import AbstractProcessor +from gem5.components.memory.abstract_memory_system import AbstractMemorySystem +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) + +class X86SstDMBoard(X86AbstractDMBoard): + + __metaclass__ = ABCMeta + + def __init__( + self, + clk_freq: str, + processor: AbstractProcessor, + local_memory: AbstractMemorySystem, + remote_memory: "ExternalRemoteMemoryInterface", + cache_hierarchy: AbstractCacheHierarchy, + remote_memory_addr_range: AddrRange = None + ) -> None: + self._localMemory = local_memory + # Since the remote memory is defined in SST's side, we only need the + # size of this memory while setting up stuff from Gem5's side. + self._remoteMemory = remote_memory + # The remote memory is either setup with a size or an address range. + # We need to determine if the address range is set. if not, then we + # need to find the starting and ending of the the external memory + # range. + if not self._remoteMemory.get_set_using_addr_ranges(): + # Address ranges were not set, but the system knows the size + # If the remote_memory_addr_range is not provided, we'll assume + # that it starts at 0x80000000 + local_memory_size and ends at it's + # own size + self._remoteMemory.remote_memory.physical_address_ranges = [ + AddrRange( + 0x80000000 + self._localMemory.get_size(), + size=remote_memory.get_size(), + ) + ] + # We need a size as a string to setup this memory. + self._remoteMemorySize = self._remoteMemory.get_size() + super().__init__( + clk_freq=clk_freq, + processor=processor, + local_memory=local_memory, + remote_memory_addr_range=remote_memory_addr_range, + cache_hierarchy=cache_hierarchy, + ) + self.local_memory = local_memory + self.remote_memory = self._remoteMemory.remote_memory + + @overrides(X86AbstractDMBoard) + def get_remote_memory(self) -> "AbstractMemory": + """Get the memory (RAM) connected to the board. + :returns: The remote memory system. + """ + return self._remoteMemory + + @overrides(X86AbstractDMBoard) + def get_remote_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + ( + self.get_remote_memory().physical_address_ranges, + self.get_remote_memory().port, + ) + ] + + # @overrides(X86AbstractDMBoard) + # def _set_remote_memory_ranges(self): + # self.get_remote_memory().set_memory_range( + # [self._remoteMemoryAddrRange] + # ) + + # @overrides(X86AbstractDMBoard) + # def get_default_kernel_args(self) -> List[str]: + + # # The default kernel string is taken from the devices.py file. + # return [ + # "console=ttyAMA0", + # "lpj=19988480", + # "norandmaps", + # "root={root_value}", + # "rw", + # "init=/root/gem5-init.sh", + # "kernelcore=2048M" + # ] + + + @overrides(X86AbstractDMBoard) + def _connect_things(self) -> None: + """Connects all the components to the board. + + The order of this board is always: + + 1. Connect the memory. + 2. Connect the cache hierarchy. + 3. Connect the processor. + + Developers may build upon this assumption when creating components. + + Notes + ----- + + * The processor is incorporated after the cache hierarchy due to a bug + noted here: https://gem5.atlassian.net/browse/GEM5-1113. Until this + bug is fixed, this ordering must be maintained. + * Once this function is called `_connect_things_called` *must* be set + to `True`. + """ + + if self._connect_things_called: + raise Exception( + "The `_connect_things` function has already been called." + ) + + # Incorporate the memory into the motherboard. + self.get_local_memory().incorporate_memory(self) + # we need to find whether there is any external latency. if yes, then + # add xbar to add this latency. + + if self.get_remote_memory().is_xbar_required(): + self.remote_link = NoncoherentXBar( + frontend_latency=0, + forward_latency=0, + response_latency=self.get_remote_memory()._remote_memory_latency, + width=64, + ) + # connect the remote memory port to the remote link + self.get_remote_memory().remote_memory.port = ( + self.remote_link.mem_side_ports + ) + # The remote link is then connected to the membus + self.get_cache_hierarchy().membus.mem_side_ports = ( + self.remote_link.cpu_side_ports + ) + else: + # Connect the external memory directly to the motherboard. + self.get_remote_memory().remote_memory.port = ( + self.get_cache_hierarchy().membus.mem_side_ports + ) + + # Incorporate the cache hierarchy for the motherboard. + if self.get_cache_hierarchy(): + self.get_cache_hierarchy().incorporate_cache(self) + # need to connect the remote links to the board. + if self.get_cache_hierarchy().is_ruby(): + fatal( + "remote memory is only supported in classic caches at " + + "the moment!") + if isinstance(self.get_remote_memory(), RemoteChanneledMemory): + for ports in self.get_remote_memory().remote_links: + self.get_cache_hierarchy().membus.mem_side_ports = \ + ports.cpu_side_ports + + # Incorporate the processor into the motherboard. + self.get_processor().incorporate_processor(self) + + self._connect_things_called = True + + @overrides(X86AbstractDMBoard) + def _post_instantiate(self): + """Called to set up anything needed after m5.instantiate""" + self.get_processor()._post_instantiate() + if self.get_cache_hierarchy(): + self.get_cache_hierarchy()._post_instantiate() + self.get_local_memory()._post_instantiate() diff --git a/disaggregated_memory/cachehierarchies/dm_caches.py b/disaggregated_memory/cachehierarchies/dm_caches.py new file mode 100644 index 0000000000..40fc96ca1d --- /dev/null +++ b/disaggregated_memory/cachehierarchies/dm_caches.py @@ -0,0 +1,223 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import L2XBar + +from gem5.utils.override import overrides + +from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import ( + PrivateL1PrivateL2SharedL3CacheHierarchy) + +class ClassicPrivateL1PrivateL2SharedL3DMCache( + PrivateL1PrivateL2SharedL3CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ): + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size, + l3_size=l3_size, + l3_assoc=l3_assoc + ) + + @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + for cntr in board.get_remote_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side + + +class ClassicPrivateL1PrivateL2DMCache(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + for cntr in board.get_remote_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory/cachehierarchies/dm_caches_sst.py b/disaggregated_memory/cachehierarchies/dm_caches_sst.py new file mode 100644 index 0000000000..00edf5d69e --- /dev/null +++ b/disaggregated_memory/cachehierarchies/dm_caches_sst.py @@ -0,0 +1,218 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import ( + PrivateL1PrivateL2CacheHierarchy, +) +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA +from m5.objects import L2XBar + +from gem5.utils.override import overrides + +from cachehierarchies.private_l1_private_l2_shared_l3_cache_hierarchy import ( + PrivateL1PrivateL2SharedL3CacheHierarchy) + +class ClassicPrivateL1PrivateL2SharedL3SstDMCache( + PrivateL1PrivateL2SharedL3CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ): + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size, + l3_size=l3_size, + l3_assoc=l3_assoc + ) + + @overrides(PrivateL1PrivateL2SharedL3CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side + +class ClassicPrivateL1PrivateL2SstDMCache(PrivateL1PrivateL2CacheHierarchy): + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + ) -> None: + """We need a specific version of DM caches for the external memory, + which does not connect the remote memory controller ports directly. + + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + :type membus: BaseXBar + """ + super().__init__(l1i_size, l1d_size, l2_size) + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for cntr in board.get_local_memory().get_memory_controllers(): + cntr.port = self.membus.mem_side_ports + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.membus.cpu_side_ports = self.l2caches[i].mem_side + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() diff --git a/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py b/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py new file mode 100644 index 0000000000..d07e058dee --- /dev/null +++ b/disaggregated_memory/cachehierarchies/mi_example_dm_caches.py @@ -0,0 +1,191 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import ( + MIExampleCacheHierarchy, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.l1_cache import ( + L1Cache, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.dma_controller import ( + DMAController, +) +from gem5.components.cachehierarchies.ruby.caches.mi_example.directory import ( + Directory, +) +from gem5.components.cachehierarchies.ruby.topologies.simple_pt2pt import ( + SimplePt2Pt, +) + +# from gem5.components.cachehierarchies.ruby.abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy +from gem5.components.cachehierarchies.abstract_cache_hierarchy import ( + AbstractCacheHierarchy, +) +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.coherence_protocol import CoherenceProtocol +from gem5.isas import ISA +from gem5.utils.override import overrides +from gem5.utils.requires import requires +from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy + +# from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +# from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +# from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +# from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +# from gem5.components.boards.abstract_board import AbstractBoard +# from gem5.isas import ISA +# from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port + +# from gem5.utils.override import overrides + + +class MIExampleDMCache(MIExampleCacheHierarchy): + def __init__(self, size: str, assoc: str): + """ + :param size: The size of each cache in the heirarchy. + :param assoc: The associativity of each cache. + """ + super().__init__(size, assoc) + + @overrides(MIExampleCacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + requires(coherence_protocol_required=CoherenceProtocol.MI_EXAMPLE) + + self.ruby_system = RubySystem() + + # Ruby's global network. + self.ruby_system.network = SimplePt2Pt(self.ruby_system) + + # MI Example users 5 virtual networks. + self.ruby_system.number_of_virtual_networks = 5 + self.ruby_system.network.number_of_virtual_networks = 5 + + # There is a single global list of all of the controllers to make it + # easier to connect everything to the global network. This can be + # customized depending on the topology/network requirements. + # Create one controller for each L1 cache (and the cache mem obj.) + # Create a single directory controller (Really the memory cntrl). + self._controllers = [] + for i, core in enumerate(board.get_processor().get_cores()): + cache = L1Cache( + size=self._size, + assoc=self._assoc, + network=self.ruby_system.network, + core=core, + cache_line_size=board.get_cache_line_size(), + target_isa=board.get_processor().get_isa(), + clk_domain=board.get_clock_domain(), + ) + + cache.sequencer = RubySequencer( + version=i, + dcache=cache.cacheMemory, + clk_domain=cache.clk_domain, + ) + + if board.has_io_bus(): + cache.sequencer.connectIOPorts(board.get_io_bus()) + + cache.ruby_system = self.ruby_system + + core.connect_icache(cache.sequencer.in_ports) + core.connect_dcache(cache.sequencer.in_ports) + + core.connect_walker_ports( + cache.sequencer.in_ports, cache.sequencer.in_ports + ) + + # Connect the interrupt ports + if board.get_processor().get_isa() == ISA.X86: + int_req_port = cache.sequencer.interrupt_out_port + int_resp_port = cache.sequencer.in_ports + core.connect_interrupt(int_req_port, int_resp_port) + else: + core.connect_interrupt() + + cache.ruby_system = self.ruby_system + self._controllers.append(cache) + + # Create the directory controllers + self._directory_controllers = [] + for range, port in board.get_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + for range, port in board.get_remote_mem_ports(): + dir = Directory( + self.ruby_system.network, + board.get_cache_line_size(), + range, + port, + ) + dir.ruby_system = self.ruby_system + self._directory_controllers.append(dir) + + # Create the DMA Controllers, if required. + self._dma_controllers = [] + if board.has_dma_ports(): + dma_ports = board.get_dma_ports() + for i, port in enumerate(dma_ports): + ctrl = DMAController( + self.ruby_system.network, board.get_cache_line_size() + ) + ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port) + + ctrl.ruby_system = self.ruby_system + ctrl.dma_sequencer.ruby_system = self.ruby_system + + self._dma_controllers.append(ctrl) + + self.ruby_system.num_of_sequencers = len(self._controllers) + len( + self._dma_controllers + ) + + # Connect the controllers. + self.ruby_system.controllers = self._controllers + self.ruby_system.directory_controllers = self._directory_controllers + + if len(self._dma_controllers) != 0: + self.ruby_system.dma_controllers = self._dma_controllers + + self.ruby_system.network.connectControllers( + self._controllers + + self._directory_controllers + + self._dma_controllers + ) + self.ruby_system.network.setup_buffers() + + # Set up a proxy port for the system_port. Used for load binaries and + # other functional-only things. + self.ruby_system.sys_port_proxy = RubyPortProxy() + board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports) diff --git a/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py new file mode 100644 index 0000000000..db674460ba --- /dev/null +++ b/disaggregated_memory/cachehierarchies/private_l1_private_l2_shared_l3_cache_hierarchy.py @@ -0,0 +1,162 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from gem5.components.cachehierarchies.classic.caches.l1dcache import L1DCache +from gem5.components.cachehierarchies.classic.caches.l1icache import L1ICache +from gem5.components.cachehierarchies.classic.caches.l2cache import L2Cache +from gem5.components.cachehierarchies.classic.caches.mmu_cache import MMUCache +from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import PrivateL1PrivateL2CacheHierarchy +from gem5.components.boards.abstract_board import AbstractBoard +from gem5.isas import ISA + +from m5.objects import ( + Cache, + L2XBar, + BaseXBar, + SystemXBar, + BadAddr, + Port +) + +from gem5.utils.override import overrides + + +class PrivateL1PrivateL2SharedL3CacheHierarchy( + PrivateL1PrivateL2CacheHierarchy): + """ + A cache setup where each core has a private L1 Data and Instruction Cache, + and a private L2 cache. + """ + + def __init__( + self, + l1d_size: str, + l1i_size: str, + l2_size: str, + l3_size: str, + l3_assoc: int = 16 + ) -> None: + """ + :param l1d_size: The size of the L1 Data Cache (e.g., "32kB"). + :type l1d_size: str + :param l1i_size: The size of the L1 Instruction Cache (e.g., "32kB"). + :type l1i_size: str + :param l2_size: The size of the L2 Cache (e.g., "256kB"). + :type l2_size: str + :param membus: The memory bus. This parameter is optional parameter and + will default to a 64 bit width SystemXBar is not specified. + + :type membus: BaseXBar + """ + super().__init__( + l1d_size=l1d_size, + l1i_size=l1i_size, + l2_size=l2_size + ) + + self._l3_size = l3_size + self._l3_assoc = l3_assoc + self._l3_tag_latency = 20 + self._l3_data_latency = 20 + self._l3_response_latency = 40 + self._l3_mshrs = 32 + self._l3_tgts_per_mshr = 12 + + + @overrides(PrivateL1PrivateL2CacheHierarchy) + def incorporate_cache(self, board: AbstractBoard) -> None: + + # Set up the system port for functional access from the simulator. + board.connect_system_port(self.membus.cpu_side_ports) + + for _, port in board.get_memory().get_mem_ports(): + self.membus.mem_side_ports = port + + self.l1icaches = [ + L1ICache(size=self._l1i_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l1dcaches = [ + L1DCache(size=self._l1d_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l2buses = [ + L2XBar() for i in range(board.get_processor().get_num_cores()) + ] + self.l2caches = [ + L2Cache(size=self._l2_size) + for i in range(board.get_processor().get_num_cores()) + ] + self.l3cache = L2Cache(size=self._l3_size, + assoc=self._l3_assoc, + tag_latency=self._l3_tag_latency, + data_latency=self._l3_data_latency, + response_latency=self._l3_response_latency, + mshrs=self._l3_mshrs, + tgts_per_mshr=self._l3_tgts_per_mshr) + # There is only one l3 bus, which connects l3 to the membus + self.l3bus = L2XBar() + # ITLB Page walk caches + self.iptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + # DTLB Page walk caches + self.dptw_caches = [ + MMUCache(size="8KiB") + for _ in range(board.get_processor().get_num_cores()) + ] + + if board.has_coherent_io(): + self._setup_io_cache(board) + + for i, cpu in enumerate(board.get_processor().get_cores()): + + cpu.connect_icache(self.l1icaches[i].cpu_side) + cpu.connect_dcache(self.l1dcaches[i].cpu_side) + + self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports + self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports + + self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side + + self.l2caches[i].mem_side = self.l3bus.cpu_side_ports + + cpu.connect_walker_ports( + self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side + ) + + if board.get_processor().get_isa() == ISA.X86: + int_req_port = self.membus.mem_side_ports + int_resp_port = self.membus.cpu_side_ports + cpu.connect_interrupt(int_req_port, int_resp_port) + else: + cpu.connect_interrupt() + self.l3bus.mem_side_ports = self.l3cache.cpu_side + self.membus.cpu_side_ports = self.l3cache.mem_side + diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py new file mode 100644 index 0000000000..ed796214ab --- /dev/null +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes-w-kvm.py @@ -0,0 +1,153 @@ +# Copyright (c) 2023-24 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +This config works if the host ARM machine supports KVM. The +PR https://github.com/gem5/gem5/pull/725 is needed to be present in the source. + +Limitations: +This only works with VExpress_GEM5_V1 and bootloader-v1 +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, VExpress_GEM5_V1, ArmDefaultRelease + +from boards.arm_gem5_board import ArmGem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.ARM) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.ARM, num_cores=1) +release = ArmDefaultRelease() +platform = VExpress_GEM5_V1() +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmGem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, + release=release, + platform=platform +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "numactl --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --interleave=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "m5 exit;", +] + +board.set_kernel_disk_workload( + # NUMA, SPARSEMEM and HOTPLUG enabled kernel. + kernel=CustomResource("/home/kaustavg/kernel/arm/linux-6.7/vmlinux"), + bootloader=CustomResource( + "/home/kaustavg/.cache/gem5/arm64-bootloader" + ), + disk_image=DiskImageResource( + "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() diff --git a/disaggregated_memory/configs/arm-gem5-numa-nodes.py b/disaggregated_memory/configs/arm-gem5-numa-nodes.py new file mode 100644 index 0000000000..a36ce52f5d --- /dev/null +++ b/disaggregated_memory/configs/arm-gem5-numa-nodes.py @@ -0,0 +1,142 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.arm_gem5_board import ArmGem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2SharedL3DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.ARM) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3DMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.ATOMIC, isa=ISA.ARM, num_cores=1) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmGem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "numactl --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --interleave=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "m5 exit;", +] + +board.set_kernel_disk_workload( + kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + bootloader=CustomResource( + "/home/kaustavg/.cache/gem5/arm64-bootloader-foundation" + ), + disk_image=DiskImageResource( + "/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() diff --git a/disaggregated_memory/configs/arm-sst-numa-nodes.py b/disaggregated_memory/configs/arm-sst-numa-nodes.py new file mode 100644 index 0000000000..8e2414f51f --- /dev/null +++ b/disaggregated_memory/configs/arm-sst-numa-nodes.py @@ -0,0 +1,182 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +* This script has to be executed from SST +""" + +import os +import sys +import argparse + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, AddrRange + +from boards.arm_sst_board import ArmSstDMBoard +from cachehierarchies.dm_caches_sst import ( + ClassicPrivateL1PrivateL2SharedL3SstDMCache +) +from memories.external_remote_memory import ExternalRemoteMemoryInterface +from gem5.utils.requires import requires +from gem5.components.memory import SingleChannelDDR4_2400, DualChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# SST passes a couple of arguments for this system to simulate. +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing", "o3"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--local-memory-size", + type=str, + required=True, + help="Local memory size", +) +parser.add_argument( + "--remote-memory-addr-range", + type=str, + required=True, + help="Remote memory range", +) +parser.add_argument( + "--remote-memory-latency", + type=int, + required=True, + help="Remote memory latency in Ticks (has to be converted prior)", +) +args = parser.parse_args() +cpu_type = { + "o3" : CPUTypes.O3, + "atomic": CPUTypes.ATOMIC, + "timing": CPUTypes.TIMING}[args.cpu_type] + +remote_memory_range = list(map(int, args.remote_memory_addr_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) + +# This runs a check to ensure the gem5 binary is compiled for RISCV. +requires(isa_required=ISA.ARM) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SharedL3SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="256KiB", l3_size="1MiB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. + +local_memory = SingleChannelDDR4_2400(size=args.local_memory_size) + +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc + +remote_memory = ExternalRemoteMemoryInterface( + addr_range=remote_memory_range, + remote_memory_latency=args.remote_memory_latency +) + +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.O3, isa=ISA.ARM, num_cores=4 +) + +# Here we setup the board which allows us to do Full-System ARM simulations. +board = ArmSstDMBoard( + clk_freq=args.cpu_clock_rate, + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "numactl --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --interleave=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "numactl --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream-annotated/" + + "stream.hw.m5 1000000;", + "numastat;", + "m5 exit;", +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "kernel" : CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + "bootloader" : CustomResource("/home/kaustavg/.cache/gem5/arm64-bootloader-foundation"), + "disk_image" : DiskImageResource( + "/home/kaustavg/disk-images/arm/arm64sve-hpc-2204-20230526-numa.img", + # local_path="/projects/gem5/hn/DISK_IMAGES/arm64sve-hpc-2204-20230526-numa.img", + root_partition="1", + ), + "readfile_contents" : " ".join(cmd) + }, +) +# This disk image needs to have NUMA tools installed. +board.set_workload(workload) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() diff --git a/disaggregated_memory/configs/riscv-gem5-numa-nodes.py b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py new file mode 100644 index 0000000000..e011298803 --- /dev/null +++ b/disaggregated_memory/configs/riscv-gem5-numa-nodes.py @@ -0,0 +1,130 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `numastat`. The simulation ends when the +startup is completed successfully. +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.riscv_gem5_board import RiscvGem5DMBoard +from cachehierarchies.dm_caches import ClassicPL1PL2DMCache +from gem5.utils.requires import requires +from gem5.components.memory import DualChannelDDR4_2400 +from memories.remote_memory import RemoteChanneledMemory +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. + +requires(isa_required=ISA.RISCV) + +# defining a new type of memory with latency added. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 2, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size="2GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=750 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.RISCV, num_cores=1 +) +# Here we setup the board which allows us to do Full-System RISCV simulations. +board = RiscvGem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;" + "m5 exit;" +] +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" + ), + "readfile_contents": " ".join(cmd), + }, +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board.set_workload(workload) +simulator = Simulator(board=board) +simulator.run() diff --git a/disaggregated_memory/configs/riscv-sst-numa-nodes.py b/disaggregated_memory/configs/riscv-sst-numa-nodes.py new file mode 100644 index 0000000000..2cf0ddb788 --- /dev/null +++ b/disaggregated_memory/configs/riscv-sst-numa-nodes.py @@ -0,0 +1,164 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system RISCV Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `numastat`. The simulation ends when the +startup is completed successfully. + +* This script has to be executed from SST +""" + +import os +import sys +import argparse + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, AddrRange + +from boards.riscv_sst_board import RiscvSstDMBoard +from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from memories.external_remote_memory import ExternalRemoteMemoryInterface + +from gem5.utils.requires import requires +from gem5.components.memory import DualChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# SST passes a couple of arguments for this system to simulate. +parser = argparse.ArgumentParser() +parser.add_argument("--command", type=str, help="Command run by guest") +parser.add_argument( + "--cpu-type", + type=str, + choices=["atomic", "timing", "o3"], + default="atomic", + help="CPU type", +) +parser.add_argument( + "--cpu-clock-rate", + type=str, + required=True, + help="CPU Clock", +) +parser.add_argument( + "--local-memory-size", + type=str, + required=True, + help="Local memory size", +) +parser.add_argument( + "--remote-memory-addr-range", + type=str, + required=True, + help="Remote memory range", +) +parser.add_argument( + "--remote-memory-latency", + type=int, + required=True, + help="Remote memory latency in Ticks (has to be converted prior)", +) +args = parser.parse_args() +cpu_type = { + "o3" : CPUTypes.O3, + "atomic": CPUTypes.ATOMIC, + "timing": CPUTypes.TIMING}[args.cpu_type] + +remote_memory_range = list(map(int, args.remote_memory_range.split(","))) +remote_memory_range = AddrRange(remote_memory_range[0], remote_memory_range[1]) + +# This runs a check to ensure the gem5 binary is compiled for RISCV. +requires(isa_required=ISA.RISCV) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = DualChannelDDR4_2400(size=args.local_memory_range) +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc +remote_memory = ExternalRemoteMemoryInterface( + addr_range=remote_memory_range, + remote_memory_latency=args.remote_memory_latency +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.TIMING, isa=ISA.RISCV, num_cores=1 +) +# Here we setup the board which allows us to do Full-System RISCV simulations. +board = RiscvSstDMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 exit;" +] + +workload = CustomWorkload( + function="set_kernel_disk_workload", + parameters={ + "disk_image": DiskImageResource( + local_path="/home/kaustavg/disk-images/rv64gc-hpc-2204.img", + root_partition="1", + ), + "kernel": CustomResource( + "/scr/kaustavg/simulators-at-scratch/DArchR/WorkingDir/SST13/kernels/gem5-resources/src/riscv-boot-exit-nodisk/riscv-pk/build/bbl" + ), + "readfile_contents": " ".join(cmd), + }, +) +# This disk image needs to have NUMA tools installed. +board.set_workload(workload) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() diff --git a/disaggregated_memory/configs/x86-gem5-numa-nodes.py b/disaggregated_memory/configs/x86-gem5-numa-nodes.py new file mode 100644 index 0000000000..2c03f13160 --- /dev/null +++ b/disaggregated_memory/configs/x86-gem5-numa-nodes.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023-24 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root + +from boards.x86_gem5_board import X86Gem5DMBoard +from cachehierarchies.dm_caches import ClassicPrivateL1PrivateL2DMCache, ClassicPrivateL1PrivateL2SharedL3DMCache +from memories.remote_memory import RemoteChanneledMemory +from gem5.utils.requires import requires +from gem5.components.memory.dram_interfaces.ddr4 import DDR4_2400_8x8 +from gem5.components.memory import SingleChannelDDR4_2400 +from gem5.components.memory.multi_channel import * +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for ARM. + +requires(isa_required=ISA.X86) + +# defining a new type of memory with latency added. This memory interface can +# be used as a remote memory interface to simulate disaggregated memory. +def RemoteDualChannelDDR4_2400( + size: Optional[str] = None, remote_offset_latency=300 +) -> AbstractMemorySystem: + """ + A dual channel memory system using DDR4_2400_8x8 based DIMM + """ + return RemoteChanneledMemory( + DDR4_2400_8x8, + 1, + 64, + size=size, + remote_offset_latency=remote_offset_latency, + ) + +# Here we setup the parameters of the l1 and l2 caches. +# cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( +# l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +# ) +cache_hierarchy = ClassicPrivateL1PrivateL2DMCache( + l1d_size="32KiB", + l1i_size="32KiB", + l2_size="256KiB", +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = SingleChannelDDR4_2400(size="1GiB") +# The remote meomry can either be a simple Memory Interface, which is from a +# different memory arange or it can be a Remote Memory Range, which has an +# inherent delay while performing reads and writes into that memory. For simple +# memory, use any MemInterfaces available in gem5 standard library. For remtoe +# memory, please refer to the `RemoteDualChannelDDR4_2400` method in this +# config script to extend any existing MemInterface class and add latency value +# to that memory. +remote_memory = RemoteDualChannelDDR4_2400( + size="1GB", remote_offset_latency=1050 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor(cpu_type=CPUTypes.KVM, isa=ISA.X86, num_cores=1) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = X86Gem5DMBoard( + clk_freq="3GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "bin/bash" +] + +# "numastat;", +# "m5 dumpresetstats 0 ;", +# # "numactl --preferred=0 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "numactl --interleave=0,1 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "numactl --membind=1 -- " + +# "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + +# "1000000;", +# "numastat;", +# "m5 dumpresetstats 0;", +# "m5 exit;", +# ] +board.set_kernel_disk_workload( + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"), + kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"), + # bootloader=CustomResource( + # "/home/kaustavg/.cache/gem5/x86-npb" + # ), + disk_image=DiskImageResource( + "/home/kaustavg/.cache/gem5/x86-ubuntu-img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +simulator = Simulator(board=board) +simulator.run() +simulator.run() diff --git a/disaggregated_memory/configs/x86-sst-numa-nodes.py b/disaggregated_memory/configs/x86-sst-numa-nodes.py new file mode 100644 index 0000000000..e8d80ba434 --- /dev/null +++ b/disaggregated_memory/configs/x86-sst-numa-nodes.py @@ -0,0 +1,129 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This script shows an example of running a full system ARM Ubuntu boot +simulation using the gem5 library. This simulation boots Ubuntu 20.04 using +1 TIMING CPU cores and executes `STREAM`. The simulation ends when the +startup is completed successfully. + +* This script has to be executed from SST +""" + +import os +import sys + +# all the source files are one directory above. +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)) +) + +import m5 +from m5.objects import Root, AddrRange + +from boards.x86_sst_board import X86SstDMBoard +from cachehierarchies.dm_caches_sst import ClassicPrivateL1PrivateL2SstDMCache +from memories.external_remote_memory import ExternalRemoteMemoryInterface +from gem5.utils.requires import requires +from gem5.components.memory import DualChannelDDR4_2400, SingleChannelDDR4_2400 +from gem5.components.processors.simple_processor import SimpleProcessor +from gem5.components.processors.cpu_types import CPUTypes +from gem5.isas import ISA +from gem5.simulate.simulator import Simulator +from gem5.resources.workload import Workload +from gem5.resources.workload import * +from gem5.resources.resource import * + +# This runs a check to ensure the gem5 binary is compiled for RISCV. +requires(isa_required=ISA.X86) +# Here we setup the parameters of the l1 and l2 caches. +cache_hierarchy = ClassicPrivateL1PrivateL2SstDMCache( + l1d_size="32KiB", l1i_size="32KiB", l2_size="1MB" +) +# Memory: Dual Channel DDR4 2400 DRAM device. +local_memory = SingleChannelDDR4_2400(size="1GiB") +# Either suppy the size of the remote memory or the address range of the +# remote memory. Since this is inside the external memory, it does not matter +# what type of memory is being simulated. This can either be initialized with +# a size or a memory address range, which is mroe flexible. Adding remote +# memory latency automatically adds a non-coherent crossbar to simulate latenyc +remote_memory = ExternalRemoteMemoryInterface( + addr_range=AddrRange(0x40000000, size="1GiB"), remote_memory_latency=0 +) +# Here we setup the processor. We use a simple processor. +processor = SimpleProcessor( + cpu_type=CPUTypes.ATOMIC, isa=ISA.X86, num_cores=1 +) +# Here we setup the board which allows us to do Full-System ARM simulations. +board = X86SstDMBoard( + clk_freq="1GHz", + processor=processor, + local_memory=local_memory, + remote_memory=remote_memory, + cache_hierarchy=cache_hierarchy, +) + +cmd = [ + "mount -t sysfs - /sys;", + "mount -t proc - /proc;", + "numastat;", + "m5 dumpresetstats 0 ;", + "numactl --cpubind=0 --membind=0 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=0,1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "numactl --cpubind=0 --membind=1 -- " + + "/home/ubuntu/simple-vectorizable-microbenchmarks/stream/stream.hw " + + "1000000;", + "m5 dumpresetstats 0;", + "m5 exit;", +] + +board.set_kernel_disk_workload( + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49-NUMA.arm64"), + # kernel=CustomResource("/home/kaustavg/vmlinux-5.4.49/vmlinux"), + kernel=CustomResource("/home/kaustavg/kernel/x86/linux-6.7/vmlinux"), + # bootloader=CustomResource( + # "/home/kaustavg/.cache/gem5/x86-npb" + # ), + disk_image=DiskImageResource( + "/home/kaustavg/.cache/gem5/x86-ubuntu-img", + root_partition="1", + ), + readfile_contents=" ".join(cmd), +) +# This script will boot two numa nodes in a full system simulation where the +# gem5 node will be sending instructions to the SST node. the simulation will +# after displaying numastat information on the terminal, whjic can be viewed +# from board.terminal. +board._pre_instantiate() +root = Root(full_system=True, board=board) +board._post_instantiate() +m5.instantiate() diff --git a/disaggregated_memory/memories/external_remote_memory.py b/disaggregated_memory/memories/external_remote_memory.py new file mode 100644 index 0000000000..6211f1679a --- /dev/null +++ b/disaggregated_memory/memories/external_remote_memory.py @@ -0,0 +1,86 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""We need a class that extends the outgoing bridge from gem5. The goal +of this class to have a MemInterface like class in the future, where we'll +append mem_ranges within this interface.""" + +import m5 +from m5.util import fatal +from m5.objects.XBar import NoncoherentXBar +from m5.objects import OutgoingRequestBridge, AddrRange, Tick + + +class ExternalRemoteMemoryInterface: + def __init__( + self, + size: "str" = None, + addr_range: AddrRange = None, + remote_memory_latency: Tick = None, + ): + # We will create a non-coherent cross bar if the user wants to simulate + # latency for the remote memory links. + self._xbar_required = False + # We setup the remote memory with size or address range. This allows us + # to quickly scale the setup with N nodes. + self._size = None + self._set_using_addr_ranges = False + self.remote_memory = OutgoingRequestBridge() + # The user needs to provide either the size of the remote memory or the + # range of the remote memory. + if size is None and addr_range is None: + fatal("External memory needs to either have a size or a range!") + else: + if addr_range is not None: + self.remote_memory.physical_address_ranges = [addr_range] + self._size = self.remote_memory.physical_address_ranges[ + 0 + ].size() + self._set_using_addr_ranges = True + # The size will be setup in the board in case ranges are not given + # by the user. + else: + self._size = size + + # If there is a remote latency specified, create a non_coherent + # cross_bar. + if remote_memory_latency is not None: + self._xbar_required = True + self._remote_memory_latency = remote_memory_latency + + def get_size(self): + return self._size + + # def set_size(self): + # self._size = self.remote_memory.physical_addr_ranges[0].size() + + def is_xbar_required(self): + # If an XBar is required, it should be added in the connect_things to + # avoid initializing an orphan node. + return self._xbar_required + + def get_set_using_addr_ranges(self): + return self._set_using_addr_ranges diff --git a/disaggregated_memory/memories/remote_memory.py b/disaggregated_memory/memories/remote_memory.py new file mode 100644 index 0000000000..367d29830c --- /dev/null +++ b/disaggregated_memory/memories/remote_memory.py @@ -0,0 +1,92 @@ +# Copyright (c) 2021 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" Channeled "generic" DDR memory controllers +""" + +import m5 +from gem5.utils.override import overrides +from m5.objects import AddrRange, DRAMInterface, MemCtrl, Port +from m5.objects.XBar import NoncoherentXBar +from typing import Type, Sequence, Tuple, Optional, Union + +from gem5.components.memory.memory import ChanneledMemory + + +class RemoteChanneledMemory(ChanneledMemory): + def __init__( + self, + dram_interface_class: Type[DRAMInterface], + num_channels: Union[int, str], + interleaving_size: Union[int, str], + size: Optional[str] = None, + addr_mapping: Optional[str] = None, + remote_offset_latency: Union[int, str] = 0, + ) -> None: + self._remote_latency = remote_offset_latency + super().__init__( + dram_interface_class, + num_channels, + interleaving_size, + size, + addr_mapping, + ) + + @overrides(ChanneledMemory) + def _create_mem_interfaces_controller(self): + self._dram = [ + self._dram_class(addr_mapping=self._addr_mapping) + for _ in range(self._num_channels) + ] + self.remote_links = [ + NoncoherentXBar( + frontend_latency=self._remote_latency, + forward_latency=0, + response_latency=0, + width=64, + ) + for _ in range(self._num_channels) + ] + self.mem_ctrl = [ + MemCtrl( + dram=self._dram[i], port=self.remote_links[i].mem_side_ports + ) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]: + return [ + (self.mem_ctrl[i].dram.range, self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] + + @overrides(ChanneledMemory) + def get_memory_controllers(self): + return [ + (self.remote_links[i].cpu_side_ports) + for i in range(self._num_channels) + ] diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md index ba61996b32..46fa26a8e4 100644 --- a/ext/sst/INSTALL.md +++ b/ext/sst/INSTALL.md @@ -1,5 +1,8 @@ # Installing SST +The links to download SST source code are available at +. +This guide is using the most recent SST version (13.0.0) as of September 2023. The links to download SST source code are available at . This guide is using the most recent SST version (13.0.0) as of September 2023. @@ -12,13 +15,16 @@ installed. ```sh wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz -tar xzf sstcore-13.0.0.tar.gz +tar xvf sstcore-13.0.0.tar.gz +wget https://github.com/sstsimulator/sst-core/releases/download/v13.0.0_Final/sstcore-13.0.0.tar.gz +tar xvf sstcore-13.0.0.tar.gz ``` ### Installing SST-Core ```sh cd sstcore-13.0.0 +cd sstcore-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --disable-mpi # optional, used when MPI is not available. make all -j$(nproc) @@ -37,13 +43,16 @@ export PATH=$SST_CORE_HOME/bin:$PATH ```sh wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz -tar xzf sstelements-13.0.0.tar.gz +tar xvf sstelements-13.0.0.tar.gz +wget https://github.com/sstsimulator/sst-elements/releases/download/v13.0.0_Final/sstelements-13.0.0.tar.gz +tar xvf sstelements-13.0.0.tar.gz ``` ### Installing SST-Elements ```sh cd sst-elements-library-13.0.0 +cd sst-elements-library-13.0.0 ./configure --prefix=$SST_CORE_HOME --with-python=/usr/bin/python3-config \ --with-sst-core=$SST_CORE_HOME make all -j$(nproc) @@ -59,8 +68,9 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >> ### Building gem5 library At the root of the gem5 folder, you need to compile gem5 as a library. This -varies dependent on which OS you are using. If you're using Linux, then -execute the following: +varies which OS you use. If you're using Linux, then type the following: +At the root of the gem5 folder, you need to compile gem5 as a library. This +varies which OS you use. If you're using Linux, then type the following: ```sh scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources ``` @@ -68,7 +78,15 @@ In case you're using Mac, then type the following: ```sh scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources ``` +In case you're using Mac, then type the following: +```sh +scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-sources +``` +**Note:** +* `--without-tcmalloc` is required to avoid a conflict with SST's malloc. +* `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. +* The Mac version was tested on a Macbook Air with M2 processor. **Note:** * `--without-tcmalloc` is required to avoid a conflict with SST's malloc. * `--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory. @@ -76,18 +94,40 @@ scons build/RISCV/libgem5_opt.dylib -j $(nproc) --without-tcmalloc --duplicate-s ### Compiling the SST integration +Go to the SST directory in the gem5 repo. Go to the SST directory in the gem5 repo. ```sh cd ext/sst ``` -Depending on your OS, you need to copy the correct `Makefile.xxx` file to -`Makefile`. +According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`. +```sh +cp Makefile.xxx Makefile # linux or mac +make -j4 +``` +If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH` +```sh +# go to the base gem5 directory +cd ../.. +export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/ +``` + +Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling +for ARM. +``` +According to the OS that you're using, you need to rename the `Makefile.xxx` to `Makefile`. ```sh cp Makefile.xxx Makefile # linux or mac make -j4 ``` -The make file is hardcoded to RISC-V. IN the case you wish to compile to ARM, -edit the Makefile or pass `ARCH=RISCV` to `ARCH=ARM` while compiling. +If you are compiling this on Mac, then you'd need to export `DYLD_LIBRARY_PATH` +```sh +# go to the base gem5 directory +cd ../.. +export DYLD_LIBRARY_PATH=:`pwd`/build/RISCV/ +``` + +Change `ARCH=RISCV` to `ARCH=ARM` in the `Makefile` in case you're compiling +for ARM. ### Running an example simulation See `README.md` diff --git a/ext/sst/Makefile b/ext/sst/Makefile deleted file mode 100644 index 9213d266e9..0000000000 --- a/ext/sst/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -SST_VERSION=SST-11.1.0 # Name of the .pc file in lib/pkgconfig where SST is installed -GEM5_LIB=gem5_opt -ARCH=RISCV -OFLAG=3 - -LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH} -CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext -CPPFLAGS+=-MMD -MP -SRC=$(wildcard *.cc) - -.PHONY: clean all - -all: libgem5.so - -libgem5.so: $(SRC:%.cc=%.o) - ${CXX} ${CPPFLAGS} ${LDFLAGS} $? -o $@ -l${GEM5_LIB} - --include $(SRC:%.cc=%.d) - -clean: - ${RM} *.[do] libgem5.so diff --git a/ext/sst/gem5.cc b/ext/sst/gem5.cc index 3ea6127ecd..f9357937a6 100644 --- a/ext/sst/gem5.cc +++ b/ext/sst/gem5.cc @@ -182,7 +182,6 @@ gem5Component::gem5Component(SST::ComponentId_t id, SST::Params& params): // Split the port names using the util method defined. splitPortNames(ports); for (int i = 0 ; i < sstPortCount ; i++) { - std::cout << sstPortNames[i] << std::endl; sstPorts.push_back( loadUserSubComponent(sstPortNames[i], 0) ); @@ -447,4 +446,4 @@ gem5Component::splitPortNames(std::string port_names) sstPortNames.push_back(strdup(part.c_str())); sstPortCount++; } -} +} \ No newline at end of file diff --git a/ext/sst/gem5.hh b/ext/sst/gem5.hh index f9f00beabd..1941691edd 100644 --- a/ext/sst/gem5.hh +++ b/ext/sst/gem5.hh @@ -143,8 +143,9 @@ class gem5Component: public SST::Component SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( // These are the generally expected ports. - {"system_port", "Connection to gem5 system_port", "gem5.gem5Bridge"}, - {"cache_port", "Connection to gem5 CPU", "gem5.gem5Bridge"} + {"ports", + "Connection to gem5's outgoing ports to SST's ports", + "gem5.gem5Bridge"} ) }; diff --git a/ext/sst/sst/arm_example.py b/ext/sst/sst/arm_example.py index 4bc111cb86..b655e80d8b 100644 --- a/ext/sst/sst/arm_example.py +++ b/ext/sst/sst/arm_example.py @@ -35,6 +35,30 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Copyright (c) 2021 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2021 Arm Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. import sst import sys @@ -50,6 +74,10 @@ # memory currently, we do not subtract 0x80000000 from the request's address to # get the "real" address so, the mem_size would always be 2GiB larger than the # desired memory size +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size memory_size_gem5 = "4GiB" memory_size_sst = "16GiB" addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() @@ -76,6 +104,17 @@ "cache_port" : "system.memory_outgoing_bridge" } +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +# We keep a track of all the memory ports that we have. +sst_ports = { + "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + # We need a list of ports. port_list = [] for port in sst_ports: @@ -86,6 +125,8 @@ "cmd": gem5_command, "ports" : " ".join(port_list), "debug_flags" : "" + "ports" : " ".join(port_list), + "debug_flags" : "" } gem5_node = sst.Component("gem5_node", "gem5.gem5Component") @@ -95,13 +136,19 @@ cache_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) # for initialization system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) +# for initialization +system_port = gem5_node.setSubComponent("system_port", "gem5.gem5Bridge", 0) system_port.addParams({ + "response_receiver_name": sst_ports["system_port"], "response_receiver_name": sst_ports["system_port"], "mem_size": memory_size_sst }) # SST -> gem5 cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) +# SST -> gem5 +cache_port = gem5_node.setSubComponent("cache_port", "gem5.gem5Bridge", 0) cache_port.addParams({ + "response_receiver_name": sst_ports["cache_port"], "response_receiver_name": sst_ports["cache_port"], "mem_size": memory_size_sst }) @@ -113,11 +160,13 @@ # Memory memctrl = sst.Component("memory", "memHierarchy.MemController") # `addr_range_end` should be changed accordingly to memory_size_sst +# `addr_range_end` should be changed accordingly to memory_size_sst memctrl.addParams({ "debug" : "0", "clock" : "1GHz", "request_width" : "64", "addr_range_end" : addr_range_end, + "addr_range_end" : addr_range_end, }) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({ diff --git a/ext/sst/sst/example.py b/ext/sst/sst/example.py index 1c35bc3f83..e66de07ab2 100644 --- a/ext/sst/sst/example.py +++ b/ext/sst/sst/example.py @@ -38,6 +38,10 @@ # memory currently, we do not subtract 0x80000000 from the request's address to # get the "real" address so, the mem_size would always be 2GiB larger than the # desired memory size +# gem5 will send requests to physical addresses of range [0x80000000, inf) to +# memory currently, we do not subtract 0x80000000 from the request's address to +# get the "real" address so, the mem_size would always be 2GiB larger than the +# desired memory size memory_size_gem5 = "4GiB" memory_size_sst = "6GiB" addr_range_end = UnitAlgebra(memory_size_sst).getRoundedValue() @@ -59,6 +63,17 @@ "cache_port" : "system.memory_outgoing_bridge" } +# We need a list of ports. +port_list = [] +for port in sst_ports: + port_list.append(port) + +# We keep a track of all the memory ports that we have. +sst_ports = { + "system_port" : "system.system_outgoing_bridge", + "cache_port" : "system.memory_outgoing_bridge" +} + # We need a list of ports. port_list = [] for port in sst_ports: @@ -71,6 +86,11 @@ + f" --memory-size {memory_size_gem5}", "debug_flags": "", "ports" : " ".join(port_list) + "cmd": " ../../configs/example/sst/riscv_fs.py" + + f" --cpu-clock-rate {cpu_clock_rate}" + + f" --memory-size {memory_size_gem5}", + "debug_flags": "", + "ports" : " ".join(port_list) } gem5_node = sst.Component("gem5_node", "gem5.gem5Component") @@ -84,6 +104,14 @@ # tell the SubComponent the name of the corresponding SimObject system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) +# SST -> gem5 +cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) +cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) +# for initialization +system_port = gem5_node.setSubComponent(port_list[0], "gem5.gem5Bridge", 0) +# tell the SubComponent the name of the corresponding SimObject +system_port.addParams({ "response_receiver_name": sst_ports["system_port"]}) + # SST -> gem5 cache_port = gem5_node.setSubComponent(port_list[1], "gem5.gem5Bridge", 0) cache_port.addParams({ "response_receiver_name": sst_ports["cache_port"]}) @@ -95,11 +123,13 @@ # Memory memctrl = sst.Component("memory", "memHierarchy.MemController") # `addr_range_end` should be changed accordingly to memory_size_sst +# `addr_range_end` should be changed accordingly to memory_size_sst memctrl.addParams({ "debug" : "0", "clock" : "1GHz", "request_width" : "64", "addr_range_end" : addr_range_end, + "addr_range_end" : addr_range_end, }) memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") memory.addParams({ diff --git a/ext/sst/sst/example_arm_dm_board.py b/ext/sst/sst/example_arm_dm_board.py new file mode 100644 index 0000000000..bbbf2e5172 --- /dev/null +++ b/ext/sst/sst/example_arm_dm_board.py @@ -0,0 +1,171 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" +cpu_clock_rate = "4.2GHz" +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. anything more than 1 needs +# mpirun to run the sst binary. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "1.2GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent( "backend", "memHierarchy.timingDRAM") +memory.addParams({ + "id" : 0, + "addrMapper" : "memHierarchy.simpleAddrMapper", + "addrMapper.interleave_size" : "64B", + "addrMapper.row_size" : "1KiB", + "clock" : "1.2GHz", + "mem_size" : sst_memory_size, + "channels" : 4, + "channel.numRanks" : 2, + "channel.rank.numBanks" : 16, + "channel.rank.bank.TRP" : 14, + "printconfig" : 1, +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] + print(node_range) + cmd = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory/configs/arm-sst-numa-nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + "--cpu-type o3", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" + ] + ports = { + "remote_memory_port" : "board.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./arm-board.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_multiISA.py b/ext/sst/sst/example_multiISA.py new file mode 100644 index 0000000000..5cf5e5030f --- /dev/null +++ b/ext/sst/sst/example_multiISA.py @@ -0,0 +1,182 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" + +cpu_clock_rate = "3GHz" + +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# TODO: Fix this in the later version of the script. +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "30ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + cmd = [] + ports = {} + script = [] + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] + if node % 2 == 0: + # This is a RISCV node. We need to call the RISCV script. + script = [ + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py", + ] + else: + script = [ + f"--outdir=m5out_arm_node_{node}", + "../../disaggregated_memory/configs/arm-sst-numa-nodes.py", + ] + + cmd = script + [ + f"--cpu-clock-rate {cpu_clock_rate}", + "--cpu-type timing", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" + ] + ports = { + "remote_memory_port" : "board.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", + {"filepath" : "./multiISA-board.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst/example_riscv_dm_board.py b/ext/sst/sst/example_riscv_dm_board.py new file mode 100644 index 0000000000..9a6c07b6e6 --- /dev/null +++ b/ext/sst/sst/example_riscv_dm_board.py @@ -0,0 +1,161 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This SST configuration file tests a merlin router. +import sst +import sys +import os + +from sst import UnitAlgebra + +cache_link_latency = "1ps" +cpu_clock_rate = "4.2GHz" +def connect_components(link_name: str, + low_port_name: str, low_port_idx: int, + high_port_name: str, high_port_idx: int, + port = False, direct_link = False): + link = sst.Link(link_name) + low_port = "low_network_" + str(low_port_idx) + if port == True: + low_port = "port" + high_port = "high_network_" + str(high_port_idx) + if direct_link == True: + high_port = "direct_link" + link.connect( + (low_port_name, low_port, cache_link_latency), + (high_port_name, high_port, cache_link_latency) + ) + +# =========================================================================== # + +# Define the number of gem5 nodes in the system. anything more than 1 needs +# mpirun to run the sst binary. +system_nodes = 2 + +# Define the total number of SST Memory nodes +memory_nodes = 1 + +# This example uses fixed number of node size -> 2 GiB +# The directory controller decides where the addresses are mapped to. +node_memory_slice = "2GiB" +remote_memory_slice = "2GiB" + +# SST memory node size. Each system gets a 2 GiB slice of fixed memory. +sst_memory_size = str( + (memory_nodes * int(node_memory_slice[0])) + (system_nodes) * 2 + 2) +"GiB" +addr_range_end = UnitAlgebra(sst_memory_size).getRoundedValue() + +# There is one cache bus connecting all gem5 ports to the remote memory. +mem_bus = sst.Component("membus", "memHierarchy.Bus") +mem_bus.addParams( { "bus_frequency" : cpu_clock_rate } ) + +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.setRank(0, 0) +# `addr_range_end` should be changed accordingly to memory_size_sst +memctrl.addParams({ + "debug" : "0", + "clock" : "2.4GHz", + "request_width" : "64", + "addr_range_end" : addr_range_end, +}) +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "50ns", + "mem_size" : sst_memory_size +}) + +# Add all the Gem5 nodes to this list. +gem5_nodes = [] +memory_ports = [] + +# Create each of these nodes and conect it to a SST memory cache +for node in range(system_nodes): + # Each of the nodes needs to have the initial parameters. We might need to + # to supply the instance count to the Gem5 side. This will enable range + # adjustments to be made to the DTB File. + node_range = [0x80000000 + (node + 1) * 0x80000000, + 0x80000000 + (node + 2) * 0x80000000] + cmd = [ + f"--outdir=m5out_riscv_node_{node}", + "../../disaggregated_memory/configs/riscv-sst-numa-nodes.py", + f"--cpu-clock-rate {cpu_clock_rate}", + "--cpu-type o3", + f"--local-memory-size {node_memory_slice}", + f"--remote-memory-addr-range {node_range[0]},{node_range[1]}", + f"--remote-memory-latency \ + {int(float(cpu_clock_rate[0:cpu_clock_rate.find('G')]) * 250)}" + ] + ports = { + "remote_memory_port" : "board.remote_memory" + } + port_list = [] + for port in ports: + port_list.append(port) + cpu_params = { + "frequency" : cpu_clock_rate, + "cmd" : " ".join(cmd), + "debug_flags" : "", + "ports" : " ".join(port_list) + } + # Each of the Gem5 node has to be separately simulated. TODO: Figure out + # this part on the mpirun side. + gem5_nodes.append( + sst.Component("gem5_node_{}".format(node), "gem5.gem5Component") + ) + gem5_nodes[node].addParams(cpu_params) + gem5_nodes[node].setRank(node + 1, 0) + + memory_ports.append( + gem5_nodes[node].setSubComponent( + "remote_memory_port", "gem5.gem5Bridge", 0 + ) + ) + memory_ports[node].addParams({ + "response_receiver_name" : ports["remote_memory_port"] + }) + + # we dont need directory controllers in this example case. The start and + # end ranges does not really matter as the OS is doing this management in + # in this case. + connect_components(f"node_{node}_mem_port_2_mem_bus", + memory_ports[node], 0, + mem_bus, node, + port = True) + +# All system nodes are setup. Now create a SST memory. Keep it simplemem for +# avoiding extra simulation time. There is only one memory node in SST's side. +# This will be updated in the future to use number of sst_memory_nodes + +connect_components("membus_2_memory", + mem_bus, 0, + memctrl, 0, + direct_link = True) + +# enable Statistics +stat_params = { "rate" : "0ns" } +sst.setStatisticLoadLevel(10) +sst.setStatisticOutput("sst.statOutputTXT", {"filepath" : "./riscv-board.txt"}) +sst.enableAllStatisticsForAllComponents() diff --git a/ext/sst/sst_responder.hh b/ext/sst/sst_responder.hh index 5f483be845..e9607bdc84 100644 --- a/ext/sst/sst_responder.hh +++ b/ext/sst/sst_responder.hh @@ -37,6 +37,7 @@ #include #include +#include #include #include diff --git a/ext/sst/sst_responder_subcomponent.cc b/ext/sst/sst_responder_subcomponent.cc index 8cd2c04628..2a7a325fc8 100644 --- a/ext/sst/sst_responder_subcomponent.cc +++ b/ext/sst/sst_responder_subcomponent.cc @@ -108,6 +108,8 @@ SSTResponderSubComponent::init(unsigned phase) addr, data.size(), data); memoryInterface->sendUntimedData(request); } + responseReceiver->initData.clear(); + responseReceiver->initPhaseComplete(true); } memoryInterface->init(phase); } @@ -200,11 +202,16 @@ SSTResponderSubComponent::portEventHandler( responseQueue.push(pkt); } } else { - // we can handle unexpected invalidates, but nothing else. + // we can handle a few types of requests. if (SST::Interfaces::StandardMem::Read* test = dynamic_cast(request)) { return; } + else if (SST::Interfaces::StandardMem::ReadResp* test = + dynamic_cast( + request)) { + return; + } else if (SST::Interfaces::StandardMem::WriteResp* test = dynamic_cast( request)) { @@ -241,6 +248,43 @@ SSTResponderSubComponent::handleRecvRespRetry() void SSTResponderSubComponent::handleRecvFunctional(gem5::PacketPtr pkt) { + // SST does not understand what is a functional access in gem5 since SST + // only allows functional accesses at init time. Since it + // has all the stored in it's memory, any functional access made to SST has + // to be correctly handled. The idea here is to convert this functional + // access into a timing access and keep the SST memory consistent. + + gem5::Addr addr = pkt->getAddr(); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + + // Create a new request to handle this request immediately. + SST::Interfaces::StandardMem::Request* request = nullptr; + + // we need a minimal translator here which does reads and writes. Any other + // command type is unexpected and the program should crash immediately. + switch((gem5::MemCmd::Command)pkt->cmd.toInt()) { + case gem5::MemCmd::WriteReq: { + std::vector data(ptr, ptr+size); + request = new SST::Interfaces::StandardMem::Write( + addr, data.size(), data); + break; + } + case gem5::MemCmd::ReadReq: { + request = new SST::Interfaces::StandardMem::Read(addr, size); + break; + } + default: + panic( + "handleRecvFunctional: Unable to convert gem5 packet: %s\n", + pkt->cmd.toString() + ); + } + if(pkt->req->isUncacheable()) { + request->setFlag( + SST::Interfaces::StandardMem::Request::Flag::F_NONCACHEABLE); + } + memoryInterface->send(request); } bool diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc index 58abfbad46..edff92c923 100644 --- a/src/sst/outgoing_request_bridge.cc +++ b/src/sst/outgoing_request_bridge.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 The Regents of the University of California +// Copyright (c) 2021-2023 The Regents of the University of California // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,6 +30,7 @@ #include #include +#include "sim/stats.hh" #include "base/trace.hh" namespace gem5 @@ -38,11 +39,13 @@ namespace gem5 OutgoingRequestBridge::OutgoingRequestBridge( const OutgoingRequestBridgeParams ¶ms) : SimObject(params), + stats(this), outgoingPort(std::string(name()), this), sstResponder(nullptr), physicalAddressRanges(params.physical_address_ranges.begin(), params.physical_address_ranges.end()) { + this->init_phase_bool = false; } OutgoingRequestBridge::~OutgoingRequestBridge() @@ -62,6 +65,7 @@ OutgoingRequestPort::~OutgoingRequestPort() { } + void OutgoingRequestBridge::init() { @@ -96,7 +100,14 @@ OutgoingRequestBridge::setResponder(SSTResponderInterface* responder) bool OutgoingRequestBridge::sendTimingResp(gem5::PacketPtr pkt) { - return outgoingPort.sendTimingResp(pkt); + // see if the responder responded true or false. if it's true, then we + // increment the stats counters. + bool return_status = outgoingPort.sendTimingResp(pkt); + if (return_status == true) { + ++stats.numIncomingPackets; + stats.sizeIncomingPackets += pkt->getSize(); + } + return return_status; } void @@ -105,19 +116,56 @@ OutgoingRequestBridge::sendTimingSnoopReq(gem5::PacketPtr pkt) outgoingPort.sendTimingSnoopReq(pkt); } +void +OutgoingRequestBridge::initPhaseComplete(bool value) { + init_phase_bool = value; +} +bool +OutgoingRequestBridge::getInitPhaseStatus() { + return init_phase_bool; +} void OutgoingRequestBridge::handleRecvFunctional(PacketPtr pkt) { - uint8_t* ptr = pkt->getPtr(); - uint64_t size = pkt->getSize(); - std::vector data(ptr, ptr+size); - initData.push_back(std::make_pair(pkt->getAddr(), data)); + // This should not receive any functional accesses + // gem5::MemCmd::Command pktCmd = (gem5::MemCmd::Command)pkt->cmd.toInt(); + // std::cout << "Recv Functional : 0x" << std::hex << pkt->getAddr() << + // std::dec << " " << pktCmd << " " << gem5::MemCmd::WriteReq << " " << + // getInitPhaseStatus() << std::endl; + // Check at which stage are we at. If we are at INIT phase, then queue all + // these packets. + if (!getInitPhaseStatus()) + { + // sstResponder->recvAtomic(pkt); + uint8_t* ptr = pkt->getPtr(); + uint64_t size = pkt->getSize(); + std::vector data(ptr, ptr+size); + initData.push_back(std::make_pair(pkt->getAddr(), data)); + } + // This is the RUN phase. SST does not allow any sendUntimedData (AKA + // functional accesses) to it's memory. We need to convert these accesses + // to timing to at least store the correct data in the memory. + else { + // These packets have to translated at runtime. We convert these + // packets to timing as its data has to be stored correctly in SST + // memory. Otherwise reads from the SST memory will fail. To reproduce + // this error, don not handle any functional accesses and the kernel + // boot will fail while reading the correct partition from the vda + // device. + + // we cannot allow any functional reads to go to SST + if (pkt->isRead()) { + assert(false && "Outgoing bridge cannot handle functional reads!"); + } + sstResponder->handleRecvFunctional(pkt); + } } Tick OutgoingRequestBridge:: OutgoingRequestPort::recvAtomic(PacketPtr pkt) { + // return 0; assert(false && "OutgoingRequestPort::recvAtomic not implemented"); return Tick(); } @@ -133,8 +181,19 @@ bool OutgoingRequestBridge:: OutgoingRequestPort::recvTimingReq(PacketPtr pkt) { - owner->sstResponder->handleRecvTimingReq(pkt); - return true; + return owner->handleTiming(pkt); +} + +bool OutgoingRequestBridge::handleTiming(PacketPtr pkt) +{ + // see if the responder responded true or false. if it's true, then we + // increment the stats counters. + bool return_status = sstResponder->handleRecvTimingReq(pkt); + if(return_status == true) { + ++stats.numOutgoingPackets; + stats.sizeOutgoingPackets += pkt->getSize(); + } + return return_status; } void @@ -151,4 +210,16 @@ OutgoingRequestPort::getAddrRanges() const return owner->physicalAddressRanges; } +OutgoingRequestBridge::StatGroup::StatGroup(statistics::Group *parent) + : statistics::Group(parent), + ADD_STAT(numOutgoingPackets, statistics::units::Count::get(), + "Number of packets going out of the gem5 port"), + ADD_STAT(sizeOutgoingPackets, statistics::units::Byte::get(), + "Cumulative size of all the outgoing packets"), + ADD_STAT(numIncomingPackets, statistics::units::Count::get(), + "Number of packets coming into the gem5 port"), + ADD_STAT(sizeIncomingPackets, statistics::units::Byte::get(), + "Cumulative size of all the incoming packets") +{ +} }; // namespace gem5 diff --git a/src/sst/outgoing_request_bridge.hh b/src/sst/outgoing_request_bridge.hh index af049af45a..dfb2a60dbc 100644 --- a/src/sst/outgoing_request_bridge.hh +++ b/src/sst/outgoing_request_bridge.hh @@ -30,6 +30,7 @@ #include #include +#include "base/statistics.hh" #include "mem/port.hh" #include "params/OutgoingRequestBridge.hh" #include "sim/sim_object.hh" @@ -53,6 +54,7 @@ namespace gem5 class OutgoingRequestBridge: public SimObject { + public: class OutgoingRequestPort: public ResponsePort { @@ -69,6 +71,36 @@ class OutgoingRequestBridge: public SimObject AddrRangeList getAddrRanges() const; }; + // We need a boolean variable to distinguish between INIT and RUN phases in + // SST. Gem5 does functional accesses to the SST memory when: + // (a) It loads the kernel (at the start of the simulation + // (b) During VIO/disk accesses. + // While loading the kernel, it is easy to handle all functional accesses + // as SST allows initializing of untimed data during its INIT phase. + // However, functional accesses done to the SST memory during RUN phase has + // to handled separately. In this implementation, we convert all such + // functional accesses to timing accesses so that it is correctly read from + // the memory. + bool init_phase_bool; + + public: + // we need a statistics counter for this simobject to find out how many + // requests were sent to or received from the outgoing port. + struct StatGroup : public statistics::Group + { + StatGroup(statistics::Group *parent); + /** Count the number of outgoing packets */ + statistics::Scalar numOutgoingPackets; + + + /** Cumulative size of the all outgoing packets */ + statistics::Scalar sizeOutgoingPackets; + + /** Count the number of incoming packets */ + statistics::Scalar numIncomingPackets; + /** Cumulative size of all the incoming packets */ + statistics::Scalar sizeIncomingPackets; + } stats; public: // a gem5 ResponsePort OutgoingRequestPort outgoingPort; @@ -85,7 +117,8 @@ class OutgoingRequestBridge: public SimObject // Required to let the OutgoingRequestPort to send range change request. void init(); - + + bool handleTiming(PacketPtr pkt); // Returns the range of addresses that the ports will handle. // Currently, it will return the range of [0x80000000, inf), which is // specific to RISCV (SiFive's HiFive boards). @@ -97,8 +130,18 @@ class OutgoingRequestBridge: public SimObject // Returns the buffered data for initialization. This is necessary as // when gem5 sends functional requests to memory for initialization, // the connection in SST Memory Hierarchy has not been constructed yet. + // This buffer is only used during the INIT phase. std::vector>> getInitData() const; + // We need Set/Get functions to set the init_phase_bool. + // `initPhaseComplete` is used to signal the outgoing bridge that INIT + // phase is completed and RUN phase will start. + void initPhaseComplete(bool value); + + // We read the value of the init_phase_bool using `getInitPhaseStatus` + // method. + + bool getInitPhaseStatus(); // gem5 Component (from SST) will call this function to let set the // bridge's corresponding SSTResponderSubComponent (which implemented // SSTResponderInterface). I.e., this will connect this bridge to the @@ -115,6 +158,8 @@ class OutgoingRequestBridge: public SimObject // to SST. Should only be called during the SST construction phase, i.e. // not at the simulation time. void handleRecvFunctional(PacketPtr pkt); + + }; }; // namespace gem5