diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2ce5ffa13d..492346b086 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.13)
 include(FetchContent)
 set(FETCHCONTENT_QUIET OFF)
-
+set(CMAKE_VERBOSE_MAKEFILE ON)
 # FetchContent_MakeAvailable was introduced in 3.14
 # there also isn't a version that accepts extra args at the end for add_subdirectory
 # hence we write one ourselves
@@ -122,6 +122,8 @@ option(SIMENG_ENABLE_TESTS "Whether to enable testing for SimEng" OFF)
 option(SIMENG_USE_EXTERNAL_LLVM "Use an external LLVM rather than building it as a submodule" OFF)
 option(SIMENG_SANITIZE "Enable compiler sanitizers" OFF)
 option(SIMENG_OPTIMIZE "Enable Extra Compiler Optimizatoins" OFF)
+option(SIMENG_ENABLE_SST "Compile SimEng SST Wrapper" OFF)
+option(SIMENG_ENABLE_SST_TESTS "Enable testing for SST" OFF)
 
 if (SIMENG_OPTIMIZE)
   # Turn on link time optimization for all targets.
@@ -219,3 +221,24 @@ endif()
 # include sources
 add_subdirectory(src)
 add_subdirectory(docs)
+
+if (SIMENG_ENABLE_SST)
+  if (SST_INSTALL_DIR)
+    add_subdirectory(sst)
+    if (SIMENG_ENABLE_SST_TESTS)
+      enable_testing()
+      list(APPEND CMAKE_CTEST_ARGUMENTS "--verbose")
+      add_custom_target(test-sst-simeng
+      COMMAND ${CMAKE_CTEST_COMMAND}
+      DEPENDS sstsimengtest
+      )
+    endif()
+  else()
+    message(WARNING "SST build was selected but SST install directory was not specified.
+    Please specify -DSST_INSTALL_DIR=<path> for the SST build to proceed.")  
+  endif()
+endif()
+
+# Install SimEng model configs in the build directory
+set(SIMENG_CONFIG_INSTALL_DIR "${CMAKE_BINARY_DIR}/simeng-configs")
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/configs/ DESTINATION ${SIMENG_CONFIG_INSTALL_DIR})
\ No newline at end of file
diff --git a/configs/sst-cores/a64fx-sst.yaml b/configs/sst-cores/a64fx-sst.yaml
new file mode 100644
index 0000000000..9ac21daf05
--- /dev/null
+++ b/configs/sst-cores/a64fx-sst.yaml
@@ -0,0 +1,269 @@
+---
+# The following resources where utilised to create the config file and naming schemes:
+# https://github.com/fujitsu/A64FX
+
+Core:
+  Simulation-Mode: outoforder
+  # Clock Frequency is in GHz.
+  Clock-Frequency: 1.8
+  # Timer-Frequency is in MHz.
+  Timer-Frequency: 100
+  Micro-Operations: True
+  Vector-Length: 512
+Fetch:
+  Fetch-Block-Size: 32
+  Loop-Buffer-Size: 48
+  Loop-Detection-Threshold: 4
+Process-Image:
+  Heap-Size: 1073741824
+  Stack-Size: 1048576
+Register-Set:
+  GeneralPurpose-Count: 96
+  FloatingPoint/SVE-Count: 128
+  Predicate-Count: 48
+  Conditional-Count: 128
+Pipeline-Widths:
+  Commit: 4
+  FrontEnd: 4
+  LSQ-Completion: 2
+Queue-Sizes:
+  ROB: 128
+  Load: 40
+  Store: 24
+Branch-Predictor:
+  BTB-Tag-Bits: 11
+  Saturating-Count-Bits: 2
+  Global-History-Length: 11
+  RAS-entries: 8
+  Fallback-Static-Predictor: "Always-Taken"
+L1-Data-Memory:
+  Interface-Type: External
+L1-Instruction-Memory:
+  Interface-Type: Flat
+LSQ-L1-Interface:
+  Access-Latency: 5
+  Exclusive: True
+  Load-Bandwidth: 128
+  Store-Bandwidth: 64
+  Permitted-Requests-Per-Cycle: 2
+  Permitted-Loads-Per-Cycle: 2
+  Permitted-Stores-Per-Cycle: 1
+Ports:
+  0:
+    Portname: FLA
+    Instruction-Support:
+    - FP
+    - SVE
+  1:
+    Portname: PR
+    Instruction-Support:
+    - PREDICATE
+  2:
+    Portname: EXA
+    Instruction-Support:
+    - INT_SIMPLE
+    - INT_MUL
+    - STORE_DATA
+  3:
+    Portname: FLB
+    Instruction-Support:
+    - FP_SIMPLE
+    - FP_MUL
+    - SVE_SIMPLE
+    - SVE_MUL
+  4:
+    Portname: EXB
+    Instruction-Support:
+    - INT_SIMPLE
+    - INT_DIV_OR_SQRT
+  5:
+    Portname: EAGA
+    Instruction-Support:
+    - LOAD
+    - STORE_ADDRESS
+    - INT_SIMPLE_ARTH_NOSHIFT
+    - INT_SIMPLE_LOGICAL_NOSHIFT
+    - INT_SIMPLE_CMP
+  6:
+    Portname: EAGB
+    Instruction-Support:
+    - LOAD
+    - STORE_ADDRESS
+    - INT_SIMPLE_ARTH_NOSHIFT
+    - INT_SIMPLE_LOGICAL_NOSHIFT
+    - INT_SIMPLE_CMP
+  7:
+    Portname: BR
+    Instruction-Support:
+    - BRANCH
+Reservation-Stations:
+  0:
+    Size: 20
+    Dispatch-Rate: 2
+    Ports:
+    - FLA
+    - PR
+    - EXA
+  1:
+    Size: 20
+    Dispatch-Rate: 2
+    Ports:
+    - FLB
+    - EXB
+  2:
+    Size: 10
+    Dispatch-Rate: 2
+    Ports:
+    - EAGA
+  3:
+    Size: 10
+    Dispatch-Rate: 2
+    Ports:
+    - EAGB
+  4:
+    Size: 19
+    Dispatch-Rate: 2
+    Ports:
+    - BR
+Execution-Units:
+  0:
+    Pipelined: True
+    Blocking-Groups:
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  1:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  2:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  3:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  4:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  5:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  6:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+  7:
+    Pipelined: True
+    Blocking-Groups: 
+    - INT_DIV_OR_SQRT
+    - FP_DIV_OR_SQRT
+    - SVE_DIV_OR_SQRT
+Latencies:
+  0:
+    Instruction-Groups: 
+    - INT
+    Execution-Latency: 2
+    Execution-Throughput: 2
+  1:
+    Instruction-Groups: 
+    - INT_SIMPLE_ARTH_NOSHIFT
+    - INT_SIMPLE_LOGICAL_NOSHIFT
+    - INT_SIMPLE_CVT
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  2:
+    Instruction-Groups: 
+    - INT_MUL
+    Execution-Latency: 5
+    Execution-Throughput: 1
+  3:
+    Instruction-Groups: 
+    - INT_DIV_OR_SQRT
+    Execution-Latency: 41
+    Execution-Throughput: 41
+  4:
+    Instruction-Groups: 
+    - SCALAR_SIMPLE
+    - VECTOR_SIMPLE_LOGICAL
+    - SVE_SIMPLE_LOGICAL
+    - VECTOR_SIMPLE_CMP
+    - SVE_SIMPLE_CMP
+    Execution-Latency: 4
+    Execution-Throughput: 1
+  5:
+    Instruction-Groups: 
+    - FP_DIV_OR_SQRT
+    Execution-Latency: 29
+    Execution-Throughput: 29
+  6:
+    Instruction-Groups: 
+    - VECTOR_SIMPLE
+    - SVE_SIMPLE
+    - SCALAR_SIMPLE_CVT
+    - FP_MUL
+    - SVE_MUL
+    Execution-Latency: 9
+    Execution-Throughput: 1
+  7:
+    Instruction-Groups: 
+    - SVE_DIV_OR_SQRT
+    Execution-Latency: 98
+    Execution-Throughput: 98
+  8:
+    Instruction-Groups: 
+    - PREDICATE
+    Execution-Latency: 3
+    Execution-Throughput: 1
+  9:
+    Instruction-Groups: 
+    - LOAD_SCALAR
+    - LOAD_VECTOR
+    - STORE_ADDRESS_SCALAR
+    - STORE_ADDRESS_VECTOR
+    Execution-Latency: 3
+    Execution-Throughput: 1
+  10:
+    Instruction-Groups: 
+    - LOAD_SVE
+    - STORE_ADDRESS_SVE
+    Execution-Latency: 6
+    Execution-Throughput: 1
+# CPU-Info mainly used to generate a replica of the special (or system) file directory 
+# structure
+CPU-Info:
+  # Set Generate-Special-Dir to True to generate the special files directory, or to False to not.
+  # (Not generating the special files directory may require the user to copy over files manually)
+  Generate-Special-Dir: True
+  # Core-Count MUST be 1 as multi-core is not supported at this time. (A64FX true value is 48)
+  Core-Count: 1
+  # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (A64FX true value is 1)
+  Socket-Count: 1
+  # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (A64FX true value is 1)
+  SMT: 1
+  # Below are the values needed to generate /proc/cpuinfo
+  BogoMIPS: 200.00
+  Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve
+  CPU-Implementer: "0x46"
+  CPU-Architecture: 8
+  CPU-Variant: "0x1"
+  CPU-Part: "0x001"
+  CPU-Revision: 0
+  # Package-Count is used to generate 
+  # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id}
+  Package-Count: 1
\ No newline at end of file
diff --git a/configs/sst-cores/m1_firestorm-sst.yaml b/configs/sst-cores/m1_firestorm-sst.yaml
new file mode 100644
index 0000000000..2c67ce80cc
--- /dev/null
+++ b/configs/sst-cores/m1_firestorm-sst.yaml
@@ -0,0 +1,320 @@
+# M1 Firestorm core
+Core:
+  Simulation-Mode: outoforder
+  Clock-Frequency: 3.2
+  Timer-Frequency: 100
+  Micro-Operations: True
+Fetch:
+  Fetch-Block-Size: 64
+  Loop-Buffer-Size: 48
+  Loop-Detection-Threshold: 4
+Process-Image:
+  Heap-Size: 1073741824 
+  Stack-Size: 1048576 
+Register-Set:
+  GeneralPurpose-Count: 394
+  FloatingPoint/SVE-Count: 432
+  Conditional-Count: 128
+Pipeline-Widths:
+  Commit: 16
+  FrontEnd: 8 
+  LSQ-Completion: 4 
+Queue-Sizes:
+  ROB: 630
+  Load: 130
+  Store: 60
+Branch-Predictor:
+  BTB-Tag-Bits: 11 
+  Saturating-Count-Bits: 2  
+  Global-History-Length: 11 
+  RAS-entries: 8 
+  Fallback-Static-Predictor: "Always-Taken"
+L1-Data-Memory:
+  Interface-Type: External
+L1-Instruction-Memory:
+  Interface-Type: Flat
+LSQ-L1-Interface:
+  Access-Latency: 3
+  Exclusive: False
+  L1 Load Bandwidth: 48
+  L1 Store Bandwidth: 48
+  Permitted Requests-Per-Cycle: 4
+  Permitted-Loads-Per-Cycle: 3
+  Permitted-Stores-Per-Cycle: 2
+Ports:
+  0:
+    Portname: INT1
+    Instruction-Support:
+      - INT_SIMPLE
+      - BRANCH
+  1:
+    Portname: INT2
+    Instruction-Support:
+      - INT_SIMPLE
+      - BRANCH
+  2:
+    Portname: INT3
+    Instruction-Support:
+      - INT_SIMPLE
+  3:
+    Portname: INT4
+    Instruction-Support:
+      - INT_SIMPLE
+  4:
+    Portname: INT5
+    Instruction-Support:
+      - INT_SIMPLE
+      - INT_MUL
+      - INT_DIV_OR_SQRT
+  5:
+    Portname: INT6
+    Instruction-Support:
+      - INT_SIMPLE
+      - INT_MUL
+  6:
+    Portname: LS1
+    Instruction-Support:
+      - STORE
+  7:
+    Portname: LS2
+    Instruction-Support:
+      - LOAD
+      - STORE
+  8:
+    Portname: LS3
+    Instruction-Support:
+      - LOAD
+  9:
+    Portname: LS4
+    Instruction-Support:
+      - LOAD
+  10:
+    Portname: FP_SIMD1
+    Instruction-Support:
+      - FP
+      - VECTOR
+  11:
+    Portname: FP_SIMD2
+    Instruction-Support:
+      - FP
+      - VECTOR
+  12:
+    Portname: FP_SIMD3
+    Instruction-Support:
+      - FP
+      - VECTOR
+  13:
+    Portname: FP_SIMD4
+    Instruction-Support:
+      - FP
+      - VECTOR
+Reservation-Stations:
+  0:
+    Size: 24
+    Dispatch-Rate: 16
+    Ports:
+      - INT1
+  1:
+    Size: 26
+    Dispatch-Rate: 16
+    Ports:
+      - INT2
+  2:
+    Size: 16
+    Dispatch-Rate: 16
+    Ports:
+      - INT3
+  3:
+    Size: 12
+    Dispatch-Rate: 16
+    Ports:
+      - INT4
+  4:
+    Size: 28
+    Dispatch-Rate: 16
+    Ports:
+      - INT5
+  5:
+    Size: 28
+    Dispatch-Rate: 16
+    Ports:
+      - INT6
+  6:
+    Size: 12
+    Dispatch-Rate: 16
+    Ports:
+      - LS1
+  7:
+    Size: 12
+    Dispatch-Rate: 16
+    Ports:
+      - LS2
+  8:
+    Size: 12
+    Dispatch-Rate: 16
+    Ports:
+      - LS3
+  9:
+    Size: 12
+    Dispatch-Rate: 16
+    Ports:
+      - LS4
+  10:
+    Size: 36
+    Dispatch-Rate: 16
+    Ports:
+      - FP_SIMD1
+  11:
+    Size: 36
+    Dispatch-Rate: 16
+    Ports:
+      - FP_SIMD2
+  12:
+    Size: 36
+    Dispatch-Rate: 16
+    Ports:
+      - FP_SIMD3
+  13:
+    Size: 36
+    Dispatch-Rate: 16
+    Ports:
+      - FP_SIMD4
+
+Execution-Units:
+  0:
+    Pipelined: True
+  1:
+    Pipelined: True
+  2:
+    Pipelined: True
+  3:
+    Pipelined: True
+  4:
+    Pipelined: True
+  5:
+    Pipelined: True
+  6:
+    Pipelined: True
+  7:
+    Pipelined: True
+  8:
+    Pipelined: True
+  9:
+    Pipelined: True
+  10:
+    Pipelined: True
+  11:
+    Pipelined: True
+  12:
+    Pipelined: True
+  13:
+    Pipelined: True
+Latencies:
+  0:
+    Instruction-Groups:
+      - INT_SIMPLE
+      - BRANCH
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  1:
+    Instruction-Groups:
+      - INT_SIMPLE
+      - BRANCH
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  2:
+    Instruction-Groups:
+      - INT_SIMPLE
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  3:
+    Instruction-Groups:
+      - INT_SIMPLE
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  4:
+    Instruction-Groups:
+      - INT_SIMPLE
+      - INT_MUL
+      - INT_DIV_OR_SQRT
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  5:
+    Instruction-Groups:
+      - INT_SIMPLE
+      - INT_MUL
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  6:
+    Instruction-Groups:
+      - STORE
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  7:
+    Instruction-Groups:
+      - LOAD
+      - STORE
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  8:
+    Instruction-Groups:
+      - LOAD
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  9:
+    Instruction-Groups:
+      - LOAD
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  10:
+    Instruction-Groups:
+      - FP
+      - VECTOR
+      - SCALAR
+    Execution-Latency: 3
+    Execution-Throughput: 1
+  11:
+    Instruction-Groups:
+      - FP
+      - VECTOR
+      - SCALAR
+    Execution-Latency: 3
+    Execution-Throughput: 1
+  12:
+    Instruction-Groups:
+      - FP
+      - VECTOR
+      - SCALAR
+    Execution-Latency: 3
+    Execution-Throughput: 1
+  13:
+    Instruction-Groups:
+      - FP
+      - VECTOR
+      - SCALAR
+    Execution-Latency: 3
+    Execution-Throughput: 1
+# CPU-Info mainly used to generate a replica of the special (or system) file directory 
+# structure
+CPU-Info:
+  # Set Generate-Special-Dir to True to generate the special files directory, or to False to not.
+  # (Not generating the special files directory may require the user to copy over files manually)
+  Generate-Special-Dir: True
+  # Core-Count MUST be 1 as multi-core is not supported at this time. (A64FX true value is 48)
+  Core-Count: 1
+  # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (A64FX true value is 1)
+  Socket-Count: 1
+  # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (A64FX true value is 1)
+  SMT: 1
+  # Below are the values needed to generate /proc/cpuinfo
+  BogoMIPS: 48.00
+  Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 asimddp sha512 asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp flagm2 frint
+  CPU-Implementer: "0x46"
+  CPU-Architecture: 8
+  CPU-Variant: "0x1"
+  CPU-Part: "0x001"
+  CPU-Revision: 0
+  # Package-Count is used to generate 
+  # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id}
+  Package-Count: 1
+
diff --git a/configs/sst-cores/tx2-sst.yaml b/configs/sst-cores/tx2-sst.yaml
new file mode 100644
index 0000000000..0b1c6dec2c
--- /dev/null
+++ b/configs/sst-cores/tx2-sst.yaml
@@ -0,0 +1,185 @@
+---
+# The following resources where utilised to create the config file and naming schemes:
+# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan
+
+Core:
+  Simulation-Mode: outoforder
+  # Clock Frequency is in GHz.
+  Clock-Frequency: 2.5
+  # Timer-Frequency is in MHz.
+  Timer-Frequency: 200
+  Micro-Operations: True
+Fetch:
+  Fetch-Block-Size: 32
+  Loop-Buffer-Size: 64
+  Loop-Detection-Threshold: 4
+Process-Image:
+  Heap-Size: 1073741824
+  Stack-Size: 1048576
+Register-Set:
+  GeneralPurpose-Count: 154
+  FloatingPoint/SVE-Count: 90
+  Conditional-Count: 128
+Pipeline-Widths:
+  Commit: 4
+  FrontEnd: 4
+  LSQ-Completion: 2
+Queue-Sizes:
+  ROB: 180
+  Load: 64
+  Store: 36
+Branch-Predictor:
+  BTB-Tag-Bits: 11
+  Saturating-Count-Bits: 2
+  Global-History-Length: 10
+  RAS-entries: 5
+  Fallback-Static-Predictor: "Always-Taken"
+L1-Data-Memory:
+  Interface-Type: External
+L1-Instruction-Memory:
+  Interface-Type: Flat
+LSQ-L1-Interface:
+  Access-Latency: 4
+  Exclusive: False
+  Load-Bandwidth: 32
+  Store-Bandwidth: 16
+  Permitted-Requests-Per-Cycle: 2
+  Permitted-Loads-Per-Cycle: 2
+  Permitted-Stores-Per-Cycle: 1
+Ports:
+  0:
+    Portname: Port 0
+    Instruction-Support:
+    - INT_SIMPLE
+    - INT_MUL
+    - FP
+  1:
+    Portname: Port 1
+    Instruction-Support:
+    - INT
+    - FP
+  2:
+    Portname: Port 2
+    Instruction-Support:
+    - INT_SIMPLE
+    - INT_MUL
+    - BRANCH
+  3:
+    Portname: Port 4
+    Instruction-Support:
+    - LOAD
+    - STORE_ADDRESS
+  4:
+    Portname: Port 5
+    Instruction-Support:
+    - LOAD
+    - STORE_ADDRESS
+  5:
+    Portname: Port 3
+    Instruction-Support:
+    - STORE_DATA
+Reservation-Stations:
+  0:
+    Size: 60
+    Dispatch-Rate: 4
+    Ports:
+    - Port 0
+    - Port 1
+    - Port 2
+    - Port 4
+    - Port 5
+    - Port 3
+Execution-Units:
+  0:
+    Pipelined: True
+  1:
+    Pipelined: True
+  2:
+    Pipelined: True
+  3:
+    Pipelined: True
+  4:
+    Pipelined: True
+  5:
+    Pipelined: True
+Latencies:
+  0:
+    Instruction-Groups: 
+    - INT_SIMPLE_ARTH
+    - INT_SIMPLE_LOGICAL
+    Execution-Latency: 2
+    Execution-Throughput: 2
+  1:
+    Instruction-Groups: 
+    - INT_SIMPLE_ARTH_NOSHIFT
+    - INT_SIMPLE_LOGICAL_NOSHIFT
+    Execution-Latency: 1
+    Execution-Throughput: 1
+  2:
+    Instruction-Groups: 
+    - INT_MUL
+    Execution-Latency: 5
+    Execution-Throughput: 1
+  3:
+    Instruction-Groups: 
+    - INT_DIV_OR_SQRT
+    Execution-Latency: 39
+    Execution-Throughput: 39
+  4:
+    Instruction-Groups: 
+    - SCALAR_SIMPLE
+    - SCALAR_MUL
+    Execution-Latency: 6
+    Execution-Throughput: 1
+  5:
+    Instruction-Groups: 
+    - SCALAR_SIMPLE_CMP
+    Execution-Latency: 5
+    Execution-Throughput: 1
+  6:
+    Instruction-Groups: 
+    - FP_SIMPLE_CVT
+    Execution-Latency: 7
+    Execution-Throughput: 1
+  7:
+    Instruction-Groups: 
+    - FP_DIV_OR_SQRT
+    Execution-Latency: 16
+    Execution-Throughput: 16
+  8:
+    Instruction-Groups: 
+    - VECTOR_SIMPLE
+    - VECTOR_MUL
+    Execution-Latency: 7
+    Execution-Throughput: 1
+  9:
+    Instruction-Groups: 
+    - SCALAR_SIMPLE_LOGICAL
+    - SCALAR_SIMPLE_LOGICAL_NOSHIFT
+    - VECTOR_SIMPLE_LOGICAL
+    - VECTOR_SIMPLE_LOGICAL_NOSHIFT
+    Execution-Latency: 1
+    Execution-Throughput: 1
+# CPU-Info mainly used to generate a replica of the special (or system) file directory 
+# structure
+CPU-Info:
+  # Set Generate-Special-Dir to True to generate the special files directory, or to False to not.
+  # (Not generating the special files directory may require the user to copy over files manually)
+  Generate-Special-Dir: True
+  # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32)
+  Core-Count: 1
+  # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2)
+  Socket-Count: 1
+  # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4)
+  SMT: 1
+  # Below are the values needed to generate /proc/cpuinfo
+  BogoMIPS: 400.00
+  Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm
+  CPU-Implementer: "0x43"
+  CPU-Architecture: 8
+  CPU-Variant: "0x1"
+  CPU-Part: "0x0af"
+  CPU-Revision: 2
+  # Package-Count is used to generate 
+  # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id}
+  Package-Count: 1
\ No newline at end of file
diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst
index f0e9e5a500..a09022785f 100644
--- a/docs/sphinx/developer/arch/supported/aarch64.rst
+++ b/docs/sphinx/developer/arch/supported/aarch64.rst
@@ -226,4 +226,4 @@ Similar to instructions, system register support is added when they are encounte
 System Counter Timers
 ---------------------
 
-Present in AArch64 are two main system timers; the Counter-timer Virtual Count Register `CNTVCT <https://developer.arm.com/documentation/ddi0601/2022-09/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-register?lang=en>`_, and the Performance Monitors Cycle Count Register `PMCCNTR <https://developer.arm.com/documentation/ddi0601/2022-09/AArch64-Registers/PMCCNTR-EL0--Performance-Monitors-Cycle-Count-Register?lang=en>`_. The CNTVCT system register holds a virtual cycle count, and is incremented at a defined frequency (see :ref:` Configuring SimEng <core>`). The PMCCNTR system register holds the real processor cycle count. Both are supported in SimEng and are accessible to the programmer through the appropriate ``mrs`` instructions. The logic which updates these registers can be found at ``src/lib/arch/aarch64/Architecture.cc:updateSystemTimerRegisters`` and is invoked inside each of the core model's ``tick()`` function.
\ No newline at end of file
+Present in AArch64 are two main system timers; the Counter-timer Virtual Count Register `CNTVCT <https://developer.arm.com/documentation/ddi0601/2022-09/AArch64-Registers/CNTVCT-EL0--Counter-timer-Virtual-Count-register?lang=en>`_, and the Performance Monitors Cycle Count Register `PMCCNTR <https://developer.arm.com/documentation/ddi0601/2022-09/AArch64-Registers/PMCCNTR-EL0--Performance-Monitors-Cycle-Count-Register?lang=en>`_. The CNTVCT system register holds a virtual cycle count, and is incremented at a defined frequency (see :ref:` Configuring SimEng <core>`). The PMCCNTR system register holds the real processor cycle count. Both are supported in SimEng and are accessible to the programmer through the appropriate ``mrs`` instructions. The logic which updates these registers can be found at ``src/lib/arch/aarch64/Architecture.cc:updateSystemTimerRegisters`` and is invoked inside each of the core model's ``tick()`` function.
diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh
index a8916d816e..3713d58c42 100644
--- a/src/include/simeng/CoreInstance.hh
+++ b/src/include/simeng/CoreInstance.hh
@@ -53,6 +53,11 @@ class CoreInstance {
   CoreInstance(std::string configPath, std::string executablePath,
                std::vector<std::string> executableArgs);
 
+  /** CoreInstance with source code assembled by LLVM and a model configuration.
+   */
+  CoreInstance(char* assembledSource, size_t sourceSize,
+               std::string configPath);
+
   ~CoreInstance();
 
   /** Set the SimEng L1 instruction cache memory. */
@@ -86,6 +91,9 @@ class CoreInstance {
   /** Getter for the size of the created process image. */
   const uint64_t getProcessImageSize() const;
 
+  /* Getter for heap start. */
+  const uint64_t getHeapStart() const;
+
  private:
   /** Generate the appropriate simulation objects as parameterised by the
    * configuration.*/
@@ -113,6 +121,15 @@ class CoreInstance {
   /** Construct the special file directory. */
   void createSpecialFileDirectory();
 
+  /** Whether or not the source has been assembled by LLVM. */
+  bool assembledSource_ = false;
+
+  /** Reference to source assembled by LLVM. */
+  char* source_ = nullptr;
+
+  /** Size of the source code assembled by LLVM. */
+  size_t sourceSize_ = 0;
+
   /** The config file describing the modelled core to be created. */
   YAML::Node config_;
 
diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc
index c150185b60..8bb5a4d6ad 100644
--- a/src/lib/CoreInstance.cc
+++ b/src/lib/CoreInstance.cc
@@ -14,7 +14,22 @@ CoreInstance::CoreInstance(std::string configPath, std::string executablePath,
   generateCoreModel(executablePath, executableArgs);
 }
 
-CoreInstance::~CoreInstance() {}
+CoreInstance::CoreInstance(char* assembledSource, size_t sourceSize,
+                           std::string configPath) {
+  config_ = simeng::ModelConfig(configPath).getConfigFile();
+  source_ = assembledSource;
+  sourceSize_ = sourceSize;
+  assembledSource_ = true;
+  // Pass an empty string for executablePath and empty vector of strings for
+  // executableArgs.
+  generateCoreModel("", std::vector<std::string>{});
+}
+
+CoreInstance::~CoreInstance() {
+  if (source_) {
+    delete[] source_;
+  }
+}
 
 void CoreInstance::generateCoreModel(std::string executablePath,
                                      std::vector<std::string> executableArgs) {
@@ -97,6 +112,17 @@ void CoreInstance::createProcess(std::string executablePath,
                 << commandLine[0] << std::endl;
       exit(1);
     }
+  } else if (assembledSource_) {
+    // Create a process image from the source code assembled by LLVM.
+    process_ = std::make_unique<simeng::kernel::LinuxProcess>(
+        simeng::span<char>(source_, sourceSize_), config_);
+    // Raise error if created process is not valid
+    if (!process_->isValid()) {
+      std::cerr << "[SimEng:CoreInstance] Could not create process based on "
+                   "source assembled by LLVM"
+                << std::endl;
+      exit(1);
+    }
   } else {
     // Create a process image from the set of instructions held in hex_
     process_ = std::make_unique<simeng::kernel::LinuxProcess>(
@@ -307,4 +333,8 @@ const uint64_t CoreInstance::getProcessImageSize() const {
   return processMemorySize_;
 }
 
+const uint64_t CoreInstance::getHeapStart() const {
+  return process_->getHeapStart();
+};
+
 }  // namespace simeng
diff --git a/src/lib/pipeline/DispatchIssueUnit.cc b/src/lib/pipeline/DispatchIssueUnit.cc
index d79675e09d..dfa0d4731d 100644
--- a/src/lib/pipeline/DispatchIssueUnit.cc
+++ b/src/lib/pipeline/DispatchIssueUnit.cc
@@ -56,8 +56,8 @@ void DispatchIssueUnit::tick() {
 
   /** Stores the number of instructions dispatched for each
    * reservation station. */
-  std::vector<uint16_t> dispatches = {
-      0, static_cast<unsigned short>(reservationStations_.size())};
+  std::vector<uint16_t> dispatches(
+      static_cast<unsigned short>(reservationStations_.size()), 0);
 
   for (size_t slot = 0; slot < input_.getWidth(); slot++) {
     auto& uop = input_.getHeadSlots()[slot];
diff --git a/sst/Assemble.cc b/sst/Assemble.cc
new file mode 100644
index 0000000000..f8b85f9101
--- /dev/null
+++ b/sst/Assemble.cc
@@ -0,0 +1,162 @@
+#include "Assemble.hh"
+
+#ifdef SIMENG_ENABLE_SST_TESTS
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#endif
+
+using namespace SST::SSTSimEng;
+
+#define ASSERT(expr, errStr)                                                 \
+  if (!(expr)) {                                                             \
+    std::cerr << "[SSTSimEngTest:Assembler] Error occured while assembling " \
+                 "source through LLVM:\n"                                    \
+              << errStr << std::endl;                                        \
+    exit(1);                                                                 \
+  }
+
+Assembler::Assembler(std::string source) {
+  std::string sourceWithTerminator = source + "\n.word 0";
+  assemble(source.c_str(), "aarch64");
+};
+
+Assembler::~Assembler(){};
+
+#ifdef SIMENG_ENABLE_SST_TESTS
+void Assembler::assemble(const char* source, const char* triple) {
+  // Initialise LLVM
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64TargetMC();
+  LLVMInitializeAArch64AsmParser();
+
+  // Get LLVM target
+  std::string errStr;
+  const llvm::Target* target =
+      llvm::TargetRegistry::lookupTarget(triple, errStr);
+  ASSERT(target != nullptr, errStr);
+
+  // Create source buffer from assembly
+  llvm::SourceMgr srcMgr;
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> srcBuffer =
+      llvm::MemoryBuffer::getMemBuffer(source);
+  ASSERT(srcBuffer, "Failed to create LLVM source buffer")
+  srcMgr.AddNewSourceBuffer(std::move(*srcBuffer), llvm::SMLoc());
+
+  // Create MC register info
+  std::unique_ptr<llvm::MCRegisterInfo> regInfo(
+      target->createMCRegInfo(triple));
+  ASSERT(regInfo != nullptr, "Failed to create LLVM register info");
+
+  // Create MC asm info
+  llvm::MCTargetOptions options;
+#if SIMENG_LLVM_VERSION < 10
+  std::unique_ptr<llvm::MCAsmInfo> asmInfo(
+      target->createMCAsmInfo(*regInfo, triple));
+#else
+  std::unique_ptr<llvm::MCAsmInfo> asmInfo(
+      target->createMCAsmInfo(*regInfo, triple, options));
+#endif
+  ASSERT(asmInfo != nullptr, "Failed to create LLVM asm info");
+
+  // Create MC context and object file info
+  llvm::MCObjectFileInfo objectFileInfo;
+  llvm::MCContext context(asmInfo.get(), regInfo.get(), &objectFileInfo,
+                          &srcMgr);
+  objectFileInfo.InitMCObjectFileInfo(llvm::Triple(triple), false, context,
+                                      false);
+
+  // Create MC subtarget info
+  std::unique_ptr<llvm::MCSubtargetInfo> subtargetInfo(
+      target->createMCSubtargetInfo(triple, "", "+sve,+lse"));
+  ASSERT(subtargetInfo != nullptr, "Failed to create LLVM subtarget info");
+
+  // Create MC instruction info
+  std::unique_ptr<llvm::MCInstrInfo> instrInfo(target->createMCInstrInfo());
+  ASSERT(instrInfo != nullptr, "Failed to create LLVM instruction info");
+
+  // Create MC asm backend
+  std::unique_ptr<llvm::MCAsmBackend> asmBackend(
+      target->createMCAsmBackend(*subtargetInfo, *regInfo, options));
+  ASSERT(asmBackend != nullptr, "Failed to create LLVM asm backend");
+
+  // Create MC code emitter
+  std::unique_ptr<llvm::MCCodeEmitter> codeEmitter(
+      target->createMCCodeEmitter(*instrInfo, *regInfo, context));
+  ASSERT(codeEmitter != nullptr, "Failed to create LLVM code emitter");
+
+  // Create MC object writer
+  llvm::SmallVector<char, 1024> objectStreamData;
+  llvm::raw_svector_ostream objectStream(objectStreamData);
+  std::unique_ptr<llvm::MCObjectWriter> objectWriter =
+      asmBackend->createObjectWriter(objectStream);
+  ASSERT(objectWriter != nullptr, "Failed to create LLVM object writer");
+
+  // Create MC object streamer
+  std::unique_ptr<llvm::MCStreamer> objectStreamer(
+      target->createMCObjectStreamer(
+          llvm::Triple(triple), context, std::move(asmBackend),
+          std::move(objectWriter), std::move(codeEmitter), *subtargetInfo,
+          options.MCRelaxAll, options.MCIncrementalLinkerCompatible, false));
+  ASSERT(objectStreamer != nullptr, "Failed to create LLVM object streamer");
+
+  // Create MC asm parser
+  std::unique_ptr<llvm::MCAsmParser> asmParser(
+      llvm::createMCAsmParser(srcMgr, context, *objectStreamer, *asmInfo));
+  ASSERT(asmParser != nullptr, "Failed to create LLVM asm parser");
+
+  // Create MC target asm parser
+  std::unique_ptr<llvm::MCTargetAsmParser> targetAsmParser(
+      target->createMCAsmParser(*subtargetInfo, *asmParser, *instrInfo,
+                                options));
+  ASSERT(asmParser != nullptr, "Failed to create LLVM target asm parser");
+  asmParser->setTargetParser(*targetAsmParser);
+
+  // Run asm parser to generate assembled object code
+  ASSERT(!asmParser->Run(false), "");
+
+  // Create ELF object from output
+  llvm::StringRef objectData = objectStream.str();
+  auto elfOrErr = llvm::object::ELFFile<
+      llvm::object::ELFType<llvm::support::little, true>>::create(objectData);
+  ASSERT(!elfOrErr.takeError(), "Failed to load ELF object");
+  auto& elf = *elfOrErr;
+
+  // Get handle to .text section
+  auto textOrErr = elf.getSection(2);
+  ASSERT(!textOrErr.takeError(), "Failed to find .text section");
+  auto& text = *textOrErr;
+
+  // Get reference to .text section data
+#if SIMENG_LLVM_VERSION < 12
+  auto textDataOrErr = elf.getSectionContents(text);
+#else
+  auto textDataOrErr = elf.getSectionContents(*text);
+#endif
+  ASSERT(!textDataOrErr.takeError(), "Failed to get .text contents");
+  llvm::ArrayRef<uint8_t> textData = *textDataOrErr;
+
+  // Make copy of .text section data
+  codeSize_ = textData.size();
+  code_ = new uint8_t[codeSize_];
+  std::copy(textData.begin(), textData.end(), code_);
+}
+#else
+void Assembler::assemble(const char* source, const char* triple) {}
+#endif
+
+char* Assembler::getAssembledSource() { return reinterpret_cast<char*>(code_); }
+size_t Assembler::getAssembledSourceSize() { return codeSize_; }
diff --git a/sst/CMakeLists.txt b/sst/CMakeLists.txt
new file mode 100644
index 0000000000..a6603dac3b
--- /dev/null
+++ b/sst/CMakeLists.txt
@@ -0,0 +1,62 @@
+set(SIMENG_SST_SOURCES SimEngCoreWrapper.cc SimEngMemInterface.cc Assemble.cc)
+
+add_library(sstsimeng SHARED ${SIMENG_SST_SOURCES})
+# Including SimEng include files and SST include files
+target_include_directories(sstsimeng PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(sstsimeng PUBLIC ${SST_INSTALL_DIR}/include)
+# Including libsimeng.so
+target_include_directories(sstsimeng PUBLIC ${PROJECT_SOURCE_DIR}/src/lib)
+# Including capstone and yaml
+target_include_directories(sstsimeng PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(sstsimeng PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+
+# Extra compile flags generate by sst-config --ELEMENT_CXXFLAGS
+# The command had to be replaced as some extra flags have already been added using
+# target_include_directories and target_compile_definitions.
+target_compile_options(sstsimeng PUBLIC -fPIC)
+
+# Bypasses "cannot used typeid with -fno-rtti".This compile flag causes errors in SST source.
+target_compile_options(sstsimeng PUBLIC -frtti)
+target_compile_options(sstsimeng PUBLIC -g)
+
+# Define statements generated by sst-config --ELEMENT_CXXFLAGS
+# SIMENG_ENABLE_SST is used as a preprocessing directive and is defined by us,
+# without this definition files in simeng/sst won't compile.
+target_compile_definitions(sstsimeng PUBLIC SIMENG_ENABLE_SST __STDC_FORMAT_MACROS HAVE_CONFIG_H)
+
+# Extra link flags generated by sst-config --ELEMENT_LDFLAGS
+# The commmand had to be replaced as some extra flags added using target_include_directories
+target_link_options(sstsimeng PUBLIC -fno-common LINKER:-undefined,dynamic_lookup)
+
+# Linking yaml and libsimeng.so libraries to sstsimeng.so
+target_link_libraries(sstsimeng PUBLIC libsimeng)
+
+if (SIMENG_ENABLE_TESTS)
+  if(SIMENG_ENABLE_SST_TESTS)
+    target_compile_definitions(sstsimeng PUBLIC SIMENG_ENABLE_TESTS SIMENG_ENABLE_SST_TESTS)
+    # Add LLVM includes
+    target_include_directories(sstsimeng PUBLIC ${LLVM_INCLUDE_DIRS})
+    # Link to LLVM libraries
+    llvm_map_components_to_libnames(LLVM_LIBS aarch64asmparser object)
+    target_link_libraries(sstsimeng PUBLIC ${LLVM_LIBS})
+    add_subdirectory(test)
+  endif()
+endif()
+
+set(SST_SIMENG_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/sst")
+set(SST_REGISTER_BASE_COMMAND "sstsimeng sstsimeng_LIBDIR=${SST_SIMENG_INSTALL_DIR}")
+set(SST_REGISTER_ELEMENT_COMMAND "SST_ELEMENT_SOURCE sstsimeng=${SST_SIMENG_INSTALL_DIR}")
+
+# Install libsstsimeng.so library into <path_to_simeng_install_dir>/sst
+install(TARGETS sstsimeng DESTINATION sst)
+# Install SimEngCoreWrapper.hh and SimEngMemInterface.hh into <path_to_simeng_install_dir>/include
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include
+        DESTINATION ${SST_SIMENG_INSTALL_DIR})
+# Install SST config into <path_to_simeng_install_dir>/config
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/config
+        DESTINATION ${SST_SIMENG_INSTALL_DIR})
+
+# Execute the sst-register command from the command-line to register the sstsimeng
+# library with SST.
+install(CODE "execute_process(COMMAND sst-register ${SST_REGISTER_BASE_COMMAND} COMMAND_ECHO STDOUT)")
+install(CODE "execute_process(COMMAND sst-register ${SST_REGISTER_ELEMENT_COMMAND} COMMAND_ECHO STDOUT)")
diff --git a/sst/SimEngCoreWrapper.cc b/sst/SimEngCoreWrapper.cc
new file mode 100644
index 0000000000..b4fd6547ea
--- /dev/null
+++ b/sst/SimEngCoreWrapper.cc
@@ -0,0 +1,368 @@
+// clang-format off
+// DO NOT MOVE FROM TOP OF FILE - https://github.com/sstsimulator/sst-core/issues/865
+#include <sst/core/sst_config.h>
+// clang-format on
+
+#include "SimEngCoreWrapper.hh"
+
+#include <cstdlib>
+#include <iostream>
+
+#include "Assemble.hh"
+
+using namespace SST::SSTSimEng;
+using namespace SST::Interfaces;
+
+SimEngCoreWrapper::SimEngCoreWrapper(SST::ComponentId_t id, SST::Params& params)
+    : SST::Component(id) {
+  output_.init("[SSTSimEng:SimEngCoreWrapper] " + getName() + "@p:@l ", 999, 0,
+               SST::Output::STDOUT);
+  clock_ = registerClock(params.find<std::string>("clock", "1GHz"),
+                         new SST::Clock::Handler<SimEngCoreWrapper>(
+                             this, &SimEngCoreWrapper::clockTick));
+
+  // Extract variables from config.py
+  executablePath_ = params.find<std::string>("executable_path", "");
+  executableArgs_ = splitArgs(params.find<std::string>("executable_args", ""));
+  simengConfigPath_ = params.find<std::string>("simeng_config_path", "");
+  cacheLineWidth_ = params.find<uint64_t>("cache_line_width", "64");
+  maxAddrMemory_ = params.find<uint64_t>("max_addr_memory", "0");
+  source_ = params.find<std::string>("source", "");
+  assembleWithSource_ = params.find<bool>("assemble_with_source", false);
+  heapStr_ = params.find<std::string>("heap", "");
+  debug_ = params.find<bool>("debug", false);
+
+  if (executablePath_.length() == 0 && !assembleWithSource_) {
+    output_.verbose(CALL_INFO, 10, 0,
+                    "SimEng executable binary filepath not provided.");
+    std::exit(EXIT_FAILURE);
+  }
+  if (maxAddrMemory_ == 0) {
+    output_.verbose(CALL_INFO, 10, 0,
+                    "Maximum address range for memory not provided");
+    std::exit(EXIT_FAILURE);
+  }
+
+  iterations_ = 0;
+
+  // Instantiate the StandardMem Interface defined in config.py
+  sstMem_ = loadUserSubComponent<SST::Interfaces::StandardMem>(
+      "memory", ComponentInfo::SHARE_NONE, clock_,
+      new StandardMem::Handler<SimEngCoreWrapper>(
+          this, &SimEngCoreWrapper::handleMemoryEvent));
+
+  dataMemory_ = std::make_shared<SimEngMemInterface>(sstMem_, cacheLineWidth_,
+                                                     maxAddrMemory_, debug_);
+
+  handlers_ = new SimEngMemInterface::SimEngMemHandlers(*dataMemory_, &output_);
+
+  // Protected methods from SST::Component used to start simulation
+  registerAsPrimaryComponent();
+  primaryComponentDoNotEndSim();
+}
+
+SimEngCoreWrapper::~SimEngCoreWrapper() {}
+
+void SimEngCoreWrapper::setup() {
+  sstMem_->setup();
+  output_.verbose(CALL_INFO, 1, 0, "Memory setup complete\n");
+  // Run Simulation
+  std::cout << "[SimEng] Starting...\n" << std::endl;
+  startTime_ = std::chrono::high_resolution_clock::now();
+}
+
+void SimEngCoreWrapper::handleMemoryEvent(StandardMem::Request* memEvent) {
+  memEvent->handle(handlers_);
+}
+
+void SimEngCoreWrapper::finish() {
+  output_.verbose(CALL_INFO, 1, 0,
+                  "Simulation complete. Finalising stats....\n");
+
+  auto endTime = std::chrono::high_resolution_clock::now();
+  auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
+                      endTime - startTime_)
+                      .count();
+  double khz =
+      (iterations_ / (static_cast<double>(duration) / 1000.0)) / 1000.0;
+  uint64_t retired = core_->getInstructionsRetiredCount();
+  double mips = retired / (static_cast<double>(duration) / 1000.0);
+
+  // Print stats
+  std::cout << "\n";
+  auto stats = core_->getStats();
+  for (const auto& [key, value] : stats) {
+    std::cout << "[SimEng] " << key << ": " << value << "\n";
+  }
+
+  std::cout << "\n[SimEng] Finished " << iterations_ << " ticks in " << duration
+            << "ms (" << std::round(khz) << " kHz, " << std::setprecision(2)
+            << mips << " MIPS)" << std::endl;
+}
+
+void SimEngCoreWrapper::init(unsigned int phase) {
+  sstMem_->init(phase);
+  // Init can have multiple phases, only fabricate the core once at phase 0
+  if (phase == 0) {
+    fabricateSimEngCore();
+  }
+}
+
+bool SimEngCoreWrapper::clockTick(SST::Cycle_t current_cycle) {
+  // Tick the core and memory interfaces until the program has halted
+  if (!core_->hasHalted() || dataMemory_->hasPendingRequests()) {
+    // Tick the data memory.
+    dataMemory_->tick();
+
+    // Tick the core.
+    core_->tick();
+
+    // Tick the instruction memory.
+    instructionMemory_->tick();
+
+    iterations_++;
+
+    return false;
+  } else {
+    // Protected method from SST::Component used to end SST simulation
+    primaryComponentOKToEndSim();
+    return true;
+  }
+}
+std::string SimEngCoreWrapper::trimSpaces(std::string strArgs) {
+  int trailingEnd = -1;
+  int leadingEnd = -1;
+  for (int x = 0; x < strArgs.size(); x++) {
+    int end = strArgs.size() - 1 - x;
+    // Find the index, from the start of the string, which is not a space.
+    if (strArgs.at(x) != ' ' && leadingEnd == -1) {
+      leadingEnd = x;
+    }
+    // Find the index, from the end of the string, which is not a space.
+    if (strArgs.at(end) != ' ' && trailingEnd == -1) {
+      trailingEnd = end;
+    }
+    if (trailingEnd != -1 && leadingEnd != -1) {
+      break;
+    }
+  }
+  // The string has leading or trailing spaces, return the substring which
+  // doesn't have those spaces.
+  if (trailingEnd != -1 && leadingEnd != -1) {
+    return strArgs.substr(leadingEnd, trailingEnd - leadingEnd + 1);
+  }
+  // The string does not have leading or trailing spaces, return the original
+  // string.
+  return strArgs;
+};
+
+std::vector<std::string> SimEngCoreWrapper::splitArgs(std::string strArgs) {
+  std::string trimmedStrArgs = trimSpaces(strArgs);
+  std::string str = "";
+  std::vector<std::string> args;
+  std::size_t argSize = trimmedStrArgs.size();
+  bool escapeSingle = false;
+  bool escapeDouble = false;
+  bool captureEscape = false;
+  uint64_t index = 0;
+  if (argSize == 0) {
+    return args;
+  }
+
+  for (int x = 0; x < argSize; x++) {
+    index = x;
+    bool escaped = escapeDouble || escapeSingle;
+    char currChar = trimmedStrArgs.at(x);
+    if (captureEscape) {
+      captureEscape = false;
+      str += currChar;
+    }
+    // This if statement check for an escaped '\' in the string.
+    // Any character after the '\' is appended to the current argument,
+    // without any delimiting or escape behaviour.
+    else if (currChar == '\\') {
+      captureEscape = true;
+    } else if (escaped) {
+      // If a portion of the argument string starts with a single quote (") and
+      // we encounter another single quote, capture the substring enclosed by a
+      // valid set of single quotes into an argument without producing any
+      // delimiting or escape behavior even with double quotes.
+      // e.g "arg1=1 arg2='"Hi"' arg3=2" will be parsed as
+      // std::vector<std::string>{arg1=1, arg2="Hi", arg3=2}
+      if (currChar == '\'' && escapeSingle) {
+        escapeSingle = 0;
+      }
+      // If a portion of the argument string starts with a double quote (") and
+      // we encounter another double quote, capture the substring enclosed by a
+      // valid set of double quotes into an argument without producing any
+      // delimiting or escape behavior even with single quotes.
+      // e.g "arg1=1 arg2="James' Car" arg3=2" will be parsed as
+      // std::vector<std::string>{arg1=1, arg2=James' Car, arg3=2}
+      else if (currChar == '\"' && escapeDouble) {
+        escapeDouble = 0;
+      } else {
+        str += currChar;
+      }
+    } else {
+      if (currChar == ' ') {
+        if (str != "") {
+          args.push_back(str);
+          str = "";
+        }
+      }
+      // Check for escape character ("), this signals the algorithm to capture
+      // any char inside a set of ("") without producing any delimiting or
+      // escape behavior.
+      else if (currChar == '\"') {
+        escapeDouble = 1;
+        // Check for escape character ('), this signals the algorithm to capture
+        // any char inside a set of ('') without producing any delimiting or
+        // escape behavior.
+      } else if (currChar == '\'') {
+        escapeSingle = 1;
+      } else {
+        str += currChar;
+      }
+    }
+  }
+  if (escapeSingle || escapeDouble) {
+    std::string err;
+    output_.verbose(CALL_INFO, 1, 0, R"(
+           Parsing failed: Invalid format - Please make sure all
+           characters/strings are escaped properly within a set single or 
+           double quotes. To escape quotes use (\\\) instead of (\).\n
+           )");
+    std::cerr << "Error occured at index " << index
+              << " of the argument string - substring: "
+              << "[ " << str << " ]" << std::endl;
+    std::exit(EXIT_FAILURE);
+  }
+  args.push_back(str);
+  return args;
+}
+
+void SimEngCoreWrapper::fabricateSimEngCore() {
+  output_.verbose(CALL_INFO, 1, 0, "Setting up SimEng Core\n");
+  if (simengConfigPath_ != "") {
+#ifdef SIMENG_ENABLE_SST_TESTS
+    if (assembleWithSource_) {
+      output_.verbose(CALL_INFO, 1, 0,
+                      "Assembling source instructions using LLVM\n");
+      Assembler assemble = Assembler(source_);
+      coreInstance_ = std::make_unique<simeng::CoreInstance>(
+          assemble.getAssembledSource(), assemble.getAssembledSourceSize(),
+          simengConfigPath_);
+    } else {
+      coreInstance_ = std::make_unique<simeng::CoreInstance>(
+          simengConfigPath_, executablePath_, executableArgs_);
+    }
+#else
+    coreInstance_ = std::make_unique<simeng::CoreInstance>(
+        simengConfigPath_, executablePath_, executableArgs_);
+#endif
+  } else {
+#ifdef SIMENG_ENABLE_SST_TESTS
+    std::string a64fxConfigPath = std::string(SIMENG_BUILD_DIR) +
+                                  "/simeng-configs/sst-cores/a64fx-sst.yaml";
+    output_.verbose(
+        CALL_INFO, 1, 0,
+        "No config path provided so defaulting to a64fx-sst.yaml\n");
+    if (assembleWithSource_) {
+      output_.verbose(CALL_INFO, 1, 0,
+                      "Assembling source instructions using LLVM\n");
+      Assembler assemble = Assembler(source_);
+      coreInstance_ = std::make_unique<simeng::CoreInstance>(
+          assemble.getAssembledSource(), assemble.getAssembledSourceSize(),
+          a64fxConfigPath);
+    } else {
+      coreInstance_ = std::make_unique<simeng::CoreInstance>(
+          a64fxConfigPath, executablePath_, executableArgs_);
+    }
+#else
+    coreInstance_ = std::make_unique<simeng::CoreInstance>(executablePath_,
+                                                           executableArgs_);
+#endif
+  }
+  if (coreInstance_->getSimulationMode() !=
+      simeng::SimulationMode::OutOfOrder) {
+    output_.verbose(CALL_INFO, 1, 0,
+                    "SimEng currently only supports Out-of-Order "
+                    "archetypes with SST.");
+    std::exit(EXIT_FAILURE);
+  }
+  // Set the SST data memory SimEng should use
+  coreInstance_->setL1DataMemory(dataMemory_);
+
+  // Construct core
+  coreInstance_->createCore();
+
+  // Get remaining simulation objects needed to forward simulation
+  core_ = coreInstance_->getCore();
+  instructionMemory_ = coreInstance_->getInstructionMemory();
+
+  // This check ensures that SST has enough memory to store the entire
+  // processImage constructed by SimEng.
+  if (maxAddrMemory_ < coreInstance_->getProcessImageSize()) {
+    output_.verbose(
+        CALL_INFO, 1, 0,
+        "Error: SST backend memory is less than processImage size. "
+        "Please increase the memory allocated to memHierarchy.memBackend and "
+        "ensure it is consistent with \'max_addr_memory\' and "
+        "\'addr_range_end\'. \n");
+    primaryComponentOKToEndSim();
+    std::exit(EXIT_FAILURE);
+  }
+// If testing is enabled populate heap if heap values have been specified.
+#ifdef SIMENG_ENABLE_SST_TESTS
+  if (heapStr_ != "") {
+    std::vector<uint8_t> initialHeapData;
+    std::vector<uint64_t> heapVals = splitHeapStr();
+    uint64_t heapSize = heapVals.size() * 8;
+    initialHeapData.resize(heapSize);
+    uint64_t* heap = reinterpret_cast<uint64_t*>(initialHeapData.data());
+    for (size_t x = 0; x < heapVals.size(); x++) {
+      heap[x] = heapVals[x];
+    }
+    uint64_t heapStart = coreInstance_->getHeapStart();
+    std::copy(initialHeapData.begin(), initialHeapData.end(),
+              coreInstance_->getProcessImage().get() + heapStart);
+  }
+#endif
+  // Send the process image data over to the SST memory
+  dataMemory_->sendProcessImageToSST(coreInstance_->getProcessImage().get(),
+                                     coreInstance_->getProcessImageSize());
+
+  output_.verbose(CALL_INFO, 1, 0, "SimEng core setup successfully.\n");
+  // Print out build metadata
+  std::cout << "[SimEng] Build metadata:" << std::endl;
+  std::cout << "[SimEng] \tVersion: " SIMENG_VERSION << std::endl;
+  std::cout << "[SimEng] \tCompile Time - Date: " __TIME__ " - " __DATE__
+            << std::endl;
+  std::cout << "[SimEng] \tBuild type: " SIMENG_BUILD_TYPE << std::endl;
+  std::cout << "[SimEng] \tCompile options: " SIMENG_COMPILE_OPTIONS
+            << std::endl;
+  std::cout << "[SimEng] \tTest suite: " SIMENG_ENABLE_TESTS << std::endl;
+  std::cout << std::endl;
+
+  std::cout << "[SimEng] Running in "
+            << coreInstance_->getSimulationModeString() << " mode" << std::endl;
+  std::cout << "[SimEng] Workload: " << executablePath_;
+  for (const auto& arg : executableArgs_) std::cout << " " << arg;
+  std::cout << std::endl;
+  std::cout << "[SimEng] Config file: " << simengConfigPath_ << std::endl;
+}
+
+std::vector<uint64_t> SimEngCoreWrapper::splitHeapStr() {
+  std::vector<uint64_t> out;
+  std::string acc = "";
+  for (size_t a = 0; a < heapStr_.size(); a++) {
+    if (heapStr_[a] == ',') {
+      out.push_back(static_cast<uint64_t>(std::stoull(acc)));
+      acc = "";
+    } else {
+      acc += heapStr_[a];
+    }
+  }
+  out.push_back(static_cast<uint64_t>(std::stoull(acc)));
+  return out;
+}
\ No newline at end of file
diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc
new file mode 100644
index 0000000000..4e07801f21
--- /dev/null
+++ b/sst/SimEngMemInterface.cc
@@ -0,0 +1,307 @@
+// clang-format off
+// DO NOT MOVE FROM TOP OF FILE - https://github.com/sstsimulator/sst-core/issues/865
+#include <sst/core/sst_config.h>
+// clang-format on
+
+#include "SimEngMemInterface.hh"
+
+#include <iostream>
+
+using namespace SST::SSTSimEng;
+
+SimEngMemInterface::SimEngMemInterface(StandardMem* mem, uint64_t cl,
+                                       uint64_t max_addr, bool debug)
+    : simeng::MemoryInterface() {
+  this->sstMem_ = mem;
+  this->cacheLineWidth_ = cl;
+  this->maxAddrMemory_ = max_addr;
+  this->debug_ = debug;
+};
+
+void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size) {
+  std::vector<uint8_t> data;
+  data.reserve(size);
+
+  for (uint64_t i = 0; i < size; i++) {
+    data.push_back((uint8_t)image[i]);
+  }
+
+  StandardMem::Request* req = new StandardMem::Write(0, data.size(), data);
+  sstMem_->sendUntimedData(req);
+  return;
+};
+
+template <typename T,
+          typename std::enable_if<std::is_base_of<
+              SimEngMemInterface::SimEngMemoryRequest, T>::value>::type*>
+std::vector<StandardMem::Request*> SimEngMemInterface::makeSSTRequests(
+    T* aggrReq, uint64_t addrStart, uint64_t addrEnd, uint64_t size) {
+  /*
+      Here we check if the memory request spans multiple cache lines.
+      i.e from the start address to the end of the cache line there isn't
+      enough space to store data or the data to read continues to succeeding
+      cache lines. To handle this case the request addresses are divided as
+      follows:
+          1) addrStart to end of first cache-line.
+          2) Start of second cache-line to addrEnd.
+      Note: addrEnd can be multiple cache-lines ahead of addrStart
+
+      |   cache-line 1   |   cache-line 2   |
+      |         |        |        |         |
+      |         |        |        |         |
+      |         |        |        |         |
+      |         V        |        V         |
+      |     addrStart    |     addrEnd      |
+      |          <--------------->          |
+      |             Request size            |
+      |------------------|------------------|
+  */
+  if (requestSpansMultipleCacheLines(addrStart, addrEnd)) {
+    std::vector<StandardMem::Request*> reqs;
+    uint64_t cacheLineEndAddr =
+        nearestCacheLineEnd(addrStart) * cacheLineWidth_;
+    uint64_t firstFragmentSize = cacheLineEndAddr - addrStart;
+    uint64_t secondFragmentSize = size - firstFragmentSize;
+    std::vector<StandardMem::Request*> rvec1 =
+        splitAggregatedRequest(aggrReq, addrStart, firstFragmentSize);
+    std::vector<StandardMem::Request*> rvec2 =
+        splitAggregatedRequest(aggrReq, cacheLineEndAddr, secondFragmentSize);
+    reqs.insert(reqs.end(), rvec1.begin(), rvec1.end());
+    reqs.insert(reqs.end(), rvec2.begin(), rvec2.end());
+    return reqs;
+  }
+  return splitAggregatedRequest(aggrReq, addrStart, size);
+}
+
+std::vector<StandardMem::Request*> SimEngMemInterface::splitAggregatedRequest(
+    AggregateWriteRequest* aggrReq, uint64_t addrStart, uint64_t size) {
+  std::vector<StandardMem::Request*> requests;
+  uint64_t dataIndex = 0;
+  // Determine the number of cache-lines needed to store the data in the write
+  // request
+  int numCacheLinesNeeded = getNumCacheLinesNeeded(size);
+  /*
+      This check here increments the data index to a value indexing the portion
+     of data which succeeds the portion data already copied incase the request
+     spans multiple cache-lines. In reference to the diagram above, this check
+     will succeed only for cache-line 2.
+  */
+  if (addrStart > aggrReq->target.address) {
+    dataIndex += addrStart - aggrReq->target.address;
+  }
+  // Loop used to divide a write request from SimEng based on cache-line size.
+  for (int x = 0; x < numCacheLinesNeeded; x++) {
+    uint64_t currReqSize = size;
+    if (size > cacheLineWidth_) {
+      size -= cacheLineWidth_;
+      currReqSize = cacheLineWidth_;
+    }
+    // SST write requests accept uint8_t vectors as data.
+    std::vector<uint8_t> payload;
+    payload.resize(currReqSize);
+
+    // Fill the payload vector currReqSize number of bytes starting
+    // and inclusive of the dataIndex.
+    const char* data = aggrReq->data.getAsVector<char>();
+    memcpy((void*)&payload[0], &(data[dataIndex]), currReqSize);
+    StandardMem::Request* writeReq =
+        new StandardMem::Write(addrStart, currReqSize, payload);
+
+    dataIndex += currReqSize;
+    addrStart += currReqSize;
+    requests.push_back(writeReq);
+  }
+  return requests;
+}
+
+std::vector<StandardMem::Request*> SimEngMemInterface::splitAggregatedRequest(
+    AggregateReadRequest* aggrReq, uint64_t addrStart, uint64_t size) {
+  std::vector<StandardMem::Request*> requests;
+  // Get the number of cache-lines needed to read the data requested by the read
+  // request.
+  int numCacheLinesNeeded = getNumCacheLinesNeeded(size);
+
+  // Loop used to divide a read request from SimEng based on cache-line size.
+  for (int x = 0; x < numCacheLinesNeeded; x++) {
+    uint64_t currReqSize = size;
+    if (size > cacheLineWidth_) {
+      size -= cacheLineWidth_;
+      currReqSize = cacheLineWidth_;
+    }
+
+    StandardMem::Request* readReq =
+        new StandardMem::Read(addrStart, currReqSize);
+
+    // Increase the aggregate count to denote the number SST requests a read
+    // request from SimEng was split into.
+    aggrReq->aggregateCount_++;
+    addrStart += currReqSize;
+    requests.push_back(readReq);
+    /*
+    Insert a key-value pair of SST request id and AggregatedReadRequest
+    reference in the aggregation map. These key-value pairs will later be
+    used to store read response data recieved from SST. This models a
+    many-to-one relation between multiple SST requests and a SimEng read
+    request.
+    */
+    aggregationMap_.insert({readReq->getID(), aggrReq});
+  }
+  return requests;
+}
+
+void SimEngMemInterface::requestRead(const MemoryAccessTarget& target,
+                                     uint64_t requestId) {
+  uint64_t addrStart = target.address;
+  uint64_t size = unsigned(target.size);
+  uint64_t addrEnd = addrStart + size - 1;
+  /*
+      Check if address is greater than max memory address or overflows.
+      This often happens on wrongly speculated branches leading to
+      large values. In this case we queue an empty register value
+      which signals an exception. However, wrongly speculated branches
+      lead to a pipeline flush after which execution continues.
+  */
+  if (addrEnd > maxAddrMemory_ || unsignedOverflow_(addrStart, size)) {
+    completedReadRequests_.push_back({target, RegisterValue(), requestId});
+    return;
+  }
+
+  AggregateReadRequest* aggrReq = new AggregateReadRequest(target, requestId);
+  std::vector<StandardMem::Request*> requests =
+      makeSSTRequests<AggregateReadRequest>(aggrReq, addrStart, addrEnd, size);
+  // SST output data parsed by the testing framework.
+  // Format:
+  // [SSTSimEng:SSTDebug] MemRead-read-<type=request|response>-<request ID>
+  // -cycle-<cycle count>-split-<number of requests>
+  if (debug_) {
+    std::cout << "[SSTSimEng:SSTDebug] MemRead"
+              << "-read-request-" << requestId << "-cycle-" << tickCounter_
+              << "-split-" << requests.size() << std::endl;
+  }
+  for (StandardMem::Request* req : requests) {
+    sstMem_->send(req);
+  }
+}
+
+void SimEngMemInterface::requestWrite(const MemoryAccessTarget& target,
+                                      const RegisterValue& data) {
+  uint64_t addrStart = target.address;
+  uint64_t size = unsigned(target.size);
+  uint64_t addrEnd = addrStart + size - 1;
+
+  AggregateWriteRequest* aggrReq = new AggregateWriteRequest(target, data);
+  std::vector<StandardMem::Request*> requests =
+      makeSSTRequests<AggregateWriteRequest>(aggrReq, addrStart, addrEnd, size);
+
+  for (StandardMem::Request* req : requests) {
+    sstMem_->send(req);
+  }
+}
+
+void SimEngMemInterface::tick() { tickCounter_++; }
+
+void SimEngMemInterface::clearCompletedReads() {
+  completedReadRequests_.clear();
+}
+
+bool SimEngMemInterface::hasPendingRequests() const {
+  return aggregationMap_.size() > 0;
+};
+
+const span<MemoryReadResult> SimEngMemInterface::getCompletedReads() const {
+  return {const_cast<MemoryReadResult*>(completedReadRequests_.data()),
+          completedReadRequests_.size()};
+};
+
+void SimEngMemInterface::aggregatedReadResponses(
+    AggregateReadRequest* aggrReq) {
+  if (aggrReq->aggregateCount_ != 0) return;
+  std::vector<uint8_t> mergedData;
+  // Loop through the ordered map and merge the data in order inside the
+  // mergedData vector. Also remove entries from the aggregation_map as we loop
+  // through each SST Request id.
+  for (auto itr = aggrReq->responseMap_.begin();
+       itr != aggrReq->responseMap_.end(); itr++) {
+    mergedData.insert(mergedData.end(), itr->second.begin(), itr->second.end());
+    aggregationMap_.erase(itr->first);
+  }
+  // Send the completed read request back to SimEng via the
+  // completed_read_requests queue.
+  uint64_t resp = 0;
+  for (int x = mergedData.size() - 1; x >= 0; x--) {
+    resp = (resp << 8) | mergedData[x];
+  }
+  // SST output data parsed by the testing framework.
+  // Format:
+  // [SSTSimEng:SSTDebug] MemRead-read-<type=request|response>-<request ID>
+  // -cycle-<cycle count>-data-<value>
+  uint64_t id = aggrReq->id_;
+  if (debug_) {
+    std::cout << "[SSTSimEng:SSTDebug] MemRead"
+              << "-read-response-" << id << "-cycle-" << tickCounter_
+              << "-data-" << resp << std::endl;
+  }
+
+  const char* char_data = reinterpret_cast<const char*>(&mergedData[0]);
+  completedReadRequests_.push_back(
+      {aggrReq->target,
+       RegisterValue(char_data, uint16_t(unsigned(aggrReq->target.size))),
+       aggrReq->id_});
+
+  // Cleanup
+  aggrReq->responseMap_.clear();
+  delete aggrReq;
+}
+
+void SimEngMemInterface::SimEngMemHandlers::handle(
+    StandardMem::WriteResp* rsp) {
+  delete rsp;
+}
+
+void SimEngMemInterface::SimEngMemHandlers::handle(StandardMem::ReadResp* rsp) {
+  uint64_t id = rsp->getID();
+  auto data = rsp->data;
+  delete rsp;
+
+  // Upon recieving a response from SST the aggregation_map is used to retrieve
+  // the AggregatedReadRequest the recieved SST response is a part of.
+  auto itr = memInterface_.aggregationMap_.find(id);
+  if (itr == memInterface_.aggregationMap_.end()) return;
+  /*
+      After succesful retrieval of AggregatedReadRequest from aggregation_map
+     the response data is stored inside the AggregatedReadRequest in an ordered
+     map. It is neccesary to maintain order in which the orginal read request
+     from SimEng was split into otherwise garbage values will be obtained upon
+     merging. An ordered map is used here because SST::StandardMem::Request ids
+     are generated using an atomic incrementing couter. Reference -
+     "interfaces/stdMem.(hh/cc)" (SST-Core)
+  */
+  SimEngMemInterface::AggregateReadRequest* aggrReq = itr->second;
+  aggrReq->responseMap_.insert({id, data});
+  /*
+      Decrement aggregateCount as we keep on recieving responses from SST.
+      If all responses have been recieved aggregate all responses and send
+      data back to SimEng.
+  */
+  if (--aggrReq->aggregateCount_ <= 0) {
+    memInterface_.aggregatedReadResponses(aggrReq);
+  }
+}
+
+int SimEngMemInterface::getNumCacheLinesNeeded(uint64_t size) const {
+  if (size < cacheLineWidth_) return 1;
+  if (size % cacheLineWidth_ == 0) return size / cacheLineWidth_;
+  return (size / cacheLineWidth_) + 1;
+}
+bool SimEngMemInterface::unsignedOverflow_(uint64_t a, uint64_t b) const {
+  return (a + b) < a || (a + b) < b;
+};
+bool SimEngMemInterface::requestSpansMultipleCacheLines(
+    uint64_t addrStart, uint64_t addrEnd) const {
+  uint64_t lineDiff =
+      (addrEnd / cacheLineWidth_) - (addrStart / cacheLineWidth_);
+  return lineDiff > 0;
+};
+uint64_t SimEngMemInterface::nearestCacheLineEnd(uint64_t addrStart) const {
+  return (addrStart / cacheLineWidth_) + 1;
+};
\ No newline at end of file
diff --git a/sst/config/L1L2_config.py b/sst/config/L1L2_config.py
new file mode 100644
index 0000000000..ad2e79a233
--- /dev/null
+++ b/sst/config/L1L2_config.py
@@ -0,0 +1,75 @@
+import sst
+import sys
+
+DEBUG_L1 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 10
+
+clw = "64"
+
+# Define the simulation components
+cpu = sst.Component("core", "sstsimeng.simengcore")
+cpu.addParams({
+    "simeng_config_path": "<PATH TO SIMENG MODEL CONFIG .YAML FILE>",
+    "executable_path": "<PATH TO EXECUTABLE BINARY>",
+    "executable_args": "",
+    "clock" : "2GHz",
+    "max_addr_memory": 2*1024*1024*1024-1,
+    "cache_line_width": clw,
+    "source": "",
+    "assemble_with_source": False,
+    "heap": "",
+    "debug": False
+})
+
+iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface")
+
+l1cache = sst.Component("l1cache.msi", "memHierarchy.Cache")
+l1cache.addParams({
+    "access_latency_cycles" : "4",
+    "cache_frequency" : "2Ghz",
+    "replacement_policy" : "lru",
+    "coherence_protocol" : "MSI",
+    "associativity" : "4",
+    "cache_line_size" : clw,
+    "cache_size" : "1KiB",
+    "L1" : "1",
+    "debug" : DEBUG_L1,
+    "debug_level" : DEBUG_LEVEL,
+    "verbose": "2"
+})
+l2cache = sst.Component("l2cache.msi.inclus", "memHierarchy.Cache")
+l2cache.addParams({
+    "access_latency_cycles" : "10",
+    "cache_frequency" : "1.8Ghz",
+    "replacement_policy" : "lru",
+    "coherence_protocol" : "MSI",
+    "associativity" : "8",
+    "cache_line_size" : clw,
+    "cache_size" : "16 KiB",
+    "debug_level" : "10",
+    "debug": "1"
+})
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.addParams({
+    "clock" : "1GHz",
+    "backend.access_time" : "100 ns",
+    "debug" : DEBUG_MEM,
+    "debug_level" : DEBUG_LEVEL,
+    "addr_range_end" : 2*1024*1024*1024-1,
+})
+    
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+    "access_time" : "100 ns",
+    "mem_size" : "2GiB",
+})
+
+
+# Define the simulation links
+link_cpu_l1cache = sst.Link("link_cpu_l1cache_link")
+link_cpu_l1cache.connect( (iface, "port", "10ps"), (l1cache, "high_network_0", "10ps") )
+link_l1cache_l2cache = sst.Link("link_l1cache_l2cache_link")
+link_l1cache_l2cache.connect( (l1cache, "low_network_0", "100ps"), (l2cache, "high_network_0", "100ps") )
+link_mem_bus = sst.Link("link_mem_bus_link")
+link_mem_bus.connect( (l2cache, "low_network_0", "100ps"), (memctrl, "direct_link", "100ps") )
diff --git a/sst/config/a64fx-config.py b/sst/config/a64fx-config.py
new file mode 100644
index 0000000000..f97d524071
--- /dev/null
+++ b/sst/config/a64fx-config.py
@@ -0,0 +1,187 @@
+import sst
+
+DEBUG_L1 = 0
+DEBUG_L2 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 0
+
+
+# ------------------------------------------------ Utility -------------------------------------------
+
+def getMemoryProps(memory_size: int, si: str):
+      props = {
+            "start_addr": 0,
+            "end_addr": 0,
+            "size": ""
+      }
+      props["size"] = "%s%s" % (memory_size , si)
+      if si == "GiB":
+            props["end_addr"] = memory_size * 1024 * 1024 * 1024 - 1
+      elif si == "MiB":
+            props["end_addr"] = memory_size * 1024 * 1024 - 1
+      elif si == "KiB":
+            props["end_addr"] = memory_size * 1024 - 1
+      elif si == "B":
+            props["end_addr"] = memory_size - 1
+      else:
+            raise Exception("Unknown SI units provided to getMemoryProps")
+      return props
+
+# ------------------------------------------------ Utility -------------------------------------------
+
+
+
+# ------------------------------------------- A64FX Properties ---------------------------------------
+
+# This SST configuration file represents the memory model for the Fujitsu A64fx processor.
+# Reference: https://github.com/fujitsu/A64FX/blob/master/doc/A64FX_Microarchitecture_Manual_en_1.8.pdf
+
+# Cache line size of L1 & L2 in A64FX in bytes.
+A64FX_CLW = 256
+# Clock Frequency of A64FX.
+A64FX_CLOCK = "2GHz"
+# Size of L1 cache in A64fx.
+A64FX_L1_SIZE = "64KiB"
+# Size of L2 cache in A64fx.
+A64FX_L2_SIZE = "8MiB"
+# Set associativity of A64FX L1
+A64FX_SA_L1 = 4
+# Set associativity of A64FX L2
+A64FX_SA_L2 = 16
+# Hit latency of A64FX L1 cache (cycles).
+A64FX_HL_L1 = 5
+# Hit latency of A64FX L2 cache (cycles).
+A64FX_HL_L2 = 56
+# Cohenrence protocol of A64FX caches.
+A64FX_COHP = "MESI"
+# L1 & L2 cache type of A64FX.
+A64FX_CACHE_TYPE = "inclusive"
+# Throughput of L1 to L2 per core in A64FX. (bytes per cycle)
+A64FX_L1TOL2_PC_TPUT = "32B"
+# Throughput of L1 to CPU per core in A64FX. Value of 0 indicates infinity. (bytes per cycle)
+A64FX_L1TOCPU_PC_TPUT = "128B"
+# Throughput of L2 to Memory per CMG in A64FX. (bytes per cycle)
+A64FX_L2TOMEM_PCMG_TPUT = "64B"
+# Throughput of L2 to L1 per core in A64FX. (bytes per cycle)
+A64FX_L2TOL1_PC_TPUT = "64B"
+# Throughput of Memory to L2 per CMG in A64FX. (bytes per cycle)
+A64FX_MEMTOL2_PCMG_TPUT = 128
+# A64FX Memory access time.
+A64FX_MEM_ACCESS = "144.5ns"
+
+# ------------------------------------------- A64FX Properties ---------------------------------------
+
+
+# ---------------------------------------------- Variables -------------------------------------------
+
+memprops = getMemoryProps(8, "GiB")
+
+# ---------------------------------------------- Variables -------------------------------------------
+
+
+# --------------------------------------------- SSTSimEng Core ---------------------------------------
+
+# Using sst-info sstsimeng.simengcore to get all cache parameters, ports and subcomponent slots.
+cpu = sst.Component("core", "sstsimeng.simengcore")
+cpu.addParams({
+    "simeng_config_path": "<PATH TO A64FX SIMENG MODEL CONFIG>",
+    "executable_path": "<PATH TO EXECUTABLE BINARY>",
+    "executable_args": "",
+    "clock" : A64FX_CLOCK,
+    "max_addr_memory": memprops["end_addr"],
+    "cache_line_width": A64FX_CLW,
+})
+
+# Instantiating the StandardInterface which communicates with the SST memory model.
+interface = cpu.setSubComponent("memory", "memHierarchy.standardInterface")
+
+# --------------------------------------------- SSTSimEng Core ---------------------------------------
+
+
+# --------------------------------------------- L1 Cache ---------------------------------------------
+
+# Using sst-info memHierarchy.Cache to get all cache parameters, ports and subcomponent slots.
+l1cache = sst.Component("a64fx.l1cache", "memHierarchy.Cache")
+l1cache.addParams({
+      "L1" : 1,
+      "cache_type": A64FX_CACHE_TYPE,
+      "access_latency_cycles" : A64FX_HL_L1,
+      "cache_frequency" : A64FX_CLOCK,
+      "associativity" : A64FX_SA_L1,
+      "cache_line_size" : A64FX_CLW,
+      "cache_size" : A64FX_L1_SIZE,
+      "debug" : DEBUG_L1,
+      "debug_level" : DEBUG_LEVEL,
+      "coherence_protocol": A64FX_COHP,
+      "request_link_width": A64FX_L1TOL2_PC_TPUT,
+      "response_link_width": A64FX_L1TOCPU_PC_TPUT
+})
+# Set MESI L1 coherence controller to the "coherence" slot
+coherence_controller_l1 = l1cache.setSubComponent("coherence", "memHierarchy.coherence.mesi_l1")
+# Set LRU replacement policy to the "replacement" slot.
+# index=0 indicates replacement policy is for cache.
+replacement_policy_l1 = l1cache.setSubComponent("replacement", "memHierarchy.replacement.lru", 0)
+
+# --------------------------------------------- L1 Cache ---------------------------------------------
+
+
+# --------------------------------------------- L2 Cache ---------------------------------------------
+
+# Using sst-info memHierarchy.Cache to get all cache parameters, ports and subcomponent slots.
+l2cache = sst.Component("a64fx.l2cache", "memHierarchy.Cache")
+l2cache.addParams({
+      "L1" : 0,
+      "cache_type": A64FX_CACHE_TYPE,
+      "access_latency_cycles" : A64FX_HL_L2,
+      "cache_frequency" : A64FX_CLOCK,
+      "associativity" : A64FX_SA_L2,
+      "cache_line_size" : A64FX_CLW,
+      "cache_size" : A64FX_L2_SIZE,
+      "debug" : DEBUG_L2,
+      "debug_level" : DEBUG_LEVEL,
+      "coherence_protocol": A64FX_COHP,
+      "request_link_width": A64FX_L2TOMEM_PCMG_TPUT,
+      "response_link_width": A64FX_L2TOL1_PC_TPUT,
+})
+# Set MESI L2 coherence controller to the "coherence" slot
+coherence_controller_l2 = l2cache.setSubComponent("coherence", "memHierarchy.coherence.mesi_inclusive")
+# Set LRU replacement policy to the "replacement" slot.
+# index=0 indicates replacement policy is for cache.
+replacement_policy_l2 = l2cache.setSubComponent("replacement", "memHierarchy.replacement.lru", 0)
+
+# --------------------------------------------- L2 Cache ---------------------------------------------
+
+
+# ----------------------------------- Memory Backend & Controller -------------------------------------
+
+memory_controller = sst.Component("a64fx.memorycontroller", "memHierarchy.MemController")
+memory_controller.addParams({
+      "clock": A64FX_CLOCK,
+      "backend.access_time": A64FX_MEM_ACCESS,
+      "request_width": A64FX_MEMTOL2_PCMG_TPUT,
+      "debug": DEBUG_MEM,
+      "debug_level": DEBUG_LEVEL,
+      "addr_range_start": memprops["start_addr"],
+      "addr_range_end": memprops["end_addr"]
+})
+
+memory_backend = memory_controller.setSubComponent("backend", "memHierarchy.simpleMem")
+memory_backend.addParams({
+      "access_time": A64FX_MEM_ACCESS,
+      "mem_size": memprops["size"],
+      "request_width": 128,
+})
+
+# ----------------------------------- Memory Backend & Controller -------------------------------------
+
+
+# ---------------------------------------------- Links ------------------------------------------------
+
+link_cpu_l1cache = sst.Link("link_cpu_l1cache_link")
+link_cpu_l1cache.connect( (interface, "port", "0ps"), (l1cache, "high_network_0", "0ps") )
+link_l1cache_l2cache = sst.Link("link_l1cache_l2cache_link")
+link_l1cache_l2cache.connect( (l1cache, "low_network_0", "0ps"), (l2cache, "high_network_0", "0ps") )
+link_mem_bus = sst.Link("link_mem_bus_link")
+link_mem_bus.connect( (l2cache, "low_network_0", "0ps"), (memory_controller, "direct_link", "0ps") )
+
+# ---------------------------------------------- Links ------------------------------------------------
diff --git a/sst/config/config.py b/sst/config/config.py
new file mode 100644
index 0000000000..743a9908ce
--- /dev/null
+++ b/sst/config/config.py
@@ -0,0 +1,68 @@
+import sst
+
+DEBUG_L1 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 0
+
+clw = "64"
+
+# Define the simulation components
+cpu = sst.Component("core", "sstsimeng.simengcore")
+cpu.addParams({
+    "simeng_config_path": "<PATH TO SIMENG MODEL CONFIG .YAML FILE>",
+    "executable_path": "<PATH TO EXECUTABLE BINARY>",
+    "executable_args": "",
+    "clock" : "1GHz",
+    "max_addr_memory": 2*1024*1024*1024-1,
+    "cache_line_width": clw,
+    "source": "",
+    "assemble_with_source": False,
+    "heap": "",
+    "debug": False
+})
+
+iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface")
+
+l1cache = sst.Component("l1cache.mesi", "memHierarchy.Cache")
+l1cache.addParams({
+      "access_latency_cycles" : "2",
+      "cache_frequency" : "2Ghz",
+      "replacement_policy" : "nmru",
+      "coherence_protocol" : "MESI",
+      "associativity" : "4",
+      "cache_line_size" : clw,
+      "debug" : DEBUG_L1,
+      "debug_level" : DEBUG_LEVEL,
+      "L1" : "1",
+      "cache_size" : "200KiB"
+})
+
+# Explicitly set the link subcomponents instead of having cache figure them out based on connected port names
+l1toC = l1cache.setSubComponent("cpulink", "memHierarchy.MemLink")
+l1toM = l1cache.setSubComponent("memlink", "memHierarchy.MemLink")
+
+# Memory controller
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.addParams({
+    "clock" : "1GHz",
+    "request_width" : "64",
+    "debug" : DEBUG_MEM,
+    "debug_level" : DEBUG_LEVEL,
+    "addr_range_end" : 2*1024*1024*1024-1,
+})
+Mtol1 = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink")
+
+# Memory model
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+      "access_time" : "1ns",
+      "mem_size" : "2GiB",
+      "request_width": "64"
+})
+
+# Define the simulation links
+link_cpu_cache_link = sst.Link("link_cpu_cache_link")
+link_cpu_cache_link.connect( (iface, "port", "100ps"), (l1toC, "port", "100ps") )
+link_mem_bus_link = sst.Link("link_mem_bus_link")
+link_mem_bus_link.connect( (l1toM, "port", "50ps"), (Mtol1, "port", "50ps") )
+
diff --git a/sst/include/Assemble.hh b/sst/include/Assemble.hh
new file mode 100644
index 0000000000..93b7a999ae
--- /dev/null
+++ b/sst/include/Assemble.hh
@@ -0,0 +1,38 @@
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <iostream>
+#include <string>
+
+#include "simeng/version.hh"
+
+namespace SST {
+
+namespace SSTSimEng {
+
+class Assembler {
+ private:
+  /** The flat binary produced by assembling the test source. */
+  uint8_t* code_ = nullptr;
+
+  /** The size of the assembled flat binary in bytes. */
+  size_t codeSize_ = 0;
+
+  /** Assemble test source to a flat binary for the given triple. */
+  void assemble(const char* source, const char* triple);
+
+ public:
+  /** Constructor for Assembler class which takes in source code. */
+  Assembler(std::string source);
+  ~Assembler();
+
+  /** Returns the assembled source as a char array. */
+  char* getAssembledSource();
+
+  /** Returns the size of the assembled source. */
+  size_t getAssembledSourceSize();
+};
+
+}  // namespace SSTSimEng
+}  // namespace SST
\ No newline at end of file
diff --git a/sst/include/SimEngCoreWrapper.hh b/sst/include/SimEngCoreWrapper.hh
new file mode 100644
index 0000000000..a9a2c9bad3
--- /dev/null
+++ b/sst/include/SimEngCoreWrapper.hh
@@ -0,0 +1,234 @@
+// clang-format off
+// DO NOT MOVE FROM TOP OF FILE - https://github.com/sstsimulator/sst-core/issues/865
+#include <sst/core/sst_config.h>
+// clang-format on
+#include <sst/core/component.h>
+#include <sst/core/eli/elementinfo.h>
+#include <sst/core/interfaces/stdMem.h>
+
+#include <chrono>
+#include <cmath>
+#include <cstring>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "SimEngMemInterface.hh"
+#include "simeng/Core.hh"
+#include "simeng/CoreInstance.hh"
+#include "simeng/MemoryInterface.hh"
+#include "simeng/SpecialFileDirGen.hh"
+#include "simeng/version.hh"
+
+using namespace SST;
+using namespace SST::Interfaces;
+using namespace SST::SSTSimEng;
+using namespace simeng;
+
+namespace SST {
+
+namespace SSTSimEng {
+
+/**
+ * A Wrapper class registered as a custom SST::Component to participate in an
+ * SST simulation. The SimEng core as well as componets/interfaces from SST
+ * required to ensure a succesful integration are instantiated and configured in
+ * this class as well. This class acts as the point of main contact for clock
+ * ticks recieved from SST and hence is also responsible for ticking the SimEng
+ * core and other classes assosciated to it.
+ */
+class SimEngCoreWrapper : public SST::Component {
+ public:
+  SimEngCoreWrapper(SST::ComponentId_t id, SST::Params& params);
+  ~SimEngCoreWrapper();
+
+  /** SST lifecycle methods (in-order of invocation) overriden from
+   * SST::Component. */
+
+  /**
+   * This is the init lifecycle method present in all SST::Components.
+   * Here it is overriden to include init calls to all other SST::Components
+   * which are contained inside SimEngCoreWrapper. It is neccessary to call all
+   * lifecycle methods for SST::Component(s).
+   */
+  void init(unsigned int phase);
+
+  /**
+   * This is the setup lifecycle method present in all SST::Components.
+   * Here it is overriden to include setup calls to all other SST::Components
+   * which are contained inside SimEngCoreWrapper. It is neccessary to call all
+   * lifecycle methods for SST::Component(s).
+   */
+  void setup();
+
+  /**
+   * This is the finish lifecycle method present in all SST::Components.
+   * Here it is overriden to finish statistics about the SimEng simulation.
+   */
+  void finish();
+
+  /**
+   * The clockTick is a method present in all SST::Components. This fuction
+   * is called everytime the SST clock ticks. The current clock cycle is passed
+   * as an argument by SST. The SimEng core ticks in this method.
+   */
+  bool clockTick(SST::Cycle_t currentCycle);
+
+  /**
+   * This handle event method is registered to StandardMem interface. This
+   * method is called everytime a memory request is forwarded by the interface.
+   * This function acts as a callback and invokes SimEngMemHandler on the memory
+   * requests.
+   */
+  void handleMemoryEvent(StandardMem::Request* memEvent);
+
+  /**
+   * SST supplied MACRO used to register custom SST:Components with
+   * the SST Core.
+   */
+  SST_ELI_REGISTER_COMPONENT(SimEngCoreWrapper, "sstsimeng", "simengcore",
+                             SST_ELI_ELEMENT_VERSION(1, 0, 0),
+                             "SimEng core wrapper for SST",
+                             COMPONENT_CATEGORY_PROCESSOR)
+
+  /**
+   * SST supplied MACRO used to document all parameters needed by
+   * a custom SST:Component.
+   */
+  SST_ELI_DOCUMENT_PARAMS(
+      {"simeng_config_path",
+       "Value which specifies the path to SimEng YAML model config file. "
+       "(string)",
+       ""},
+      {"executable_path",
+       "Value which specifies the path to executable binary to be run by "
+       "SimEng. (string)",
+       ""},
+      {"executable_args",
+       "Value which specifies the argument to be passed to the executable "
+       "binary. (string)",
+       ""},
+      {"clock", "Value which specifies clock rate of the SST clock. (string)",
+       ""},
+      {"max_addr_memory",
+       "Value which specifies the maximum address that memory can access. "
+       "(int)",
+       ""},
+      {"cache_line_width",
+       "Value which specifies the width of the cache line in bytes. (int)", ""},
+      {"source",
+       "Value which specifies the string of instructions to be assembled by "
+       "LLVM and executed by SimEng (if any). (string)",
+       ""},
+      {"assemble_with_source",
+       "Value which indicates whether to assemble the instructions supplied "
+       "through the source parameter using LLVM. (boolean)",
+       "false"},
+      {"heap",
+       "Value which specifies comma separated uint64_t values used to populate "
+       "the heap. This parameter will only be used if "
+       "assemble_with_source=true. (string)",
+       ""},
+      {"debug",
+       "Value which enables output statistics that can be parsed by the "
+       "testing framework. (boolean)",
+       "false"})
+
+ private:
+  /** Method used to assemble SimEng core. */
+  void fabricateSimEngCore();
+
+  /** Method to split the passed executable argument's string into a vector of
+   * individual arguments. */
+  std::vector<std::string> splitArgs(std::string argString);
+
+  /** This method trims any leading or trailing spaces in a string. */
+  std::string trimSpaces(std::string argsStr);
+
+  /** This method splits the comma separated heap string into a vector of
+   * uint32_t values. */
+  std::vector<uint64_t> splitHeapStr();
+
+  // SST properties
+  /**
+   * SST defined output class used to output information to standard output.
+   * This class has in-built method for different levels of severity and can
+   * also be configured to output information like line-number and filename.
+   */
+  SST::Output output_;
+
+  /**
+   * SST clock for the component register with the custom component
+   * during instantiation using the registerClock method provided
+   * by SST.
+   */
+  TimeConverter* clock_;
+
+  /**
+   * SST::Interfaces::StandardMem interface responsible for converting
+   * SST::StandardMem::Request(s) into SST memory events to be passed
+   * down the memory heirarchy.
+   */
+  StandardMem* sstMem_;
+
+  // SimEng properties
+  /** Reference to the CoreInstance class responsible for creating the core to
+   * be simulated. */
+  std::unique_ptr<simeng::CoreInstance> coreInstance_;
+
+  /** Reference to SimEng core. */
+  std::shared_ptr<simeng::Core> core_;
+
+  /** Path to the YAML configuration file for SimEng. */
+  std::string simengConfigPath_;
+
+  /** Path to the executable binary to be simulated by SimEng. */
+  std::string executablePath_;
+
+  /** Arguments to be passed to executable binary. */
+  std::vector<std::string> executableArgs_;
+
+  /** The cache line width for SST. */
+  uint64_t cacheLineWidth_;
+
+  /** Maximum address availbale to SimEng for memory purposes. */
+  uint64_t maxAddrMemory_;
+
+  /** Reference to the process memory used in SimEng. */
+  std::shared_ptr<char> processMemory_;
+
+  /** Reference to SimEng instruction memory. */
+  std::shared_ptr<simeng::MemoryInterface> instructionMemory_;
+
+  /** Reference to SimEngMemInterface used for interfacing with SST. */
+  std::shared_ptr<SimEngMemInterface> dataMemory_;
+
+  /** Number of clock iterations. */
+  int iterations_;
+
+  /** Start time of simulation. */
+  std::chrono::high_resolution_clock::time_point startTime_;
+
+  /** Reference to memory request handler class defined in SimEngMemInterface.
+   */
+  SimEngMemInterface::SimEngMemHandlers* handlers_;
+
+  /** String which holds source instructions to be assembled. (if any)*/
+  std::string source_;
+
+  /** Boolean which indicates whether or not to assemble by source. */
+  bool assembleWithSource_ = false;
+
+  /** Heap contents as string. */
+  std::string heapStr_;
+
+  /** Variable to enable parseable print debug statements in test mode. */
+  bool debug_ = false;
+};
+
+}  // namespace SSTSimEng
+
+}  // namespace SST
diff --git a/sst/include/SimEngMemInterface.hh b/sst/include/SimEngMemInterface.hh
new file mode 100644
index 0000000000..79789a9f39
--- /dev/null
+++ b/sst/include/SimEngMemInterface.hh
@@ -0,0 +1,244 @@
+// clang-format off
+// DO NOT MOVE FROM TOP OF FILE - https://github.com/sstsimulator/sst-core/issues/865
+#include <sst/core/sst_config.h>
+// clang-format on
+#include <sst/core/eli/elementinfo.h>
+#include <sst/core/interfaces/stdMem.h>
+
+#include <chrono>
+#include <cmath>
+#include <cstring>
+#include <iomanip>
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "simeng/MemoryInterface.hh"
+#include "simeng/span.hh"
+
+using namespace simeng;
+using namespace SST::Interfaces;
+
+namespace SST {
+
+namespace SSTSimEng {
+
+/** A memory interface used by SimEng to communicate with SST's memory model. */
+class SimEngMemInterface : public MemoryInterface {
+ public:
+  SimEngMemInterface(StandardMem* mem, uint64_t cl, uint64_t max_addr,
+                     bool debug);
+  /** Send SimEng's processImage to SST memory backend during `init` lifecycle
+   * phase of SST. */
+  void sendProcessImageToSST(char* image, uint64_t size);
+
+  /**
+   * Construct an AggregatedReadRequest and use it to generate
+   * SST::StandardMem::Read request(s). These request(s) are then sent to SST.
+   */
+  void requestRead(const MemoryAccessTarget& target, uint64_t requestId = 0);
+
+  /**
+   * Construct an AggregatedWriteRequest and use it to generate
+   * SST::StandardMem::Write request(s). These request(s) are then sent to SST.
+   */
+  void requestWrite(const MemoryAccessTarget& target,
+                    const RegisterValue& data);
+
+  /** Retrieve all completed read requests. */
+  const span<MemoryReadResult> getCompletedReads() const;
+
+  /** Clear the completed reads. */
+  void clearCompletedReads();
+
+  /** Returns true if there are any oustanding memory requests. */
+  bool hasPendingRequests() const;
+
+  /**
+   * Tick the memory interface to process SimEng related tasks. Since all memory
+   * operations are handled by SST this method is only used increment
+   * `tickCounter`.
+   */
+  void tick();
+
+  /**
+   * An instance of `SimEngMemHandlers` is registered to an instance of
+   * SST::StandardMem and is used to handle Read and Write response. The same
+   * instance of SST::StandardMem is passed to `SimEngMemHandlers` to access
+   * private variables needed to handle responses correctly. Defining
+   * `SimEngMemHandlers` as a friend class gives it access to all private
+   * variables defined in `SimEngMemInterface`.
+   */
+  class SimEngMemHandlers : public StandardMem::RequestHandler {
+    friend class SimEngMemInterface;
+
+   public:
+    SimEngMemHandlers(SimEngMemInterface& interface, SST::Output* out)
+        : StandardMem::RequestHandler(out), memInterface_(interface) {}
+
+    ~SimEngMemHandlers() {}
+
+    /**
+     * Overloaded instance of handle method to handle read request responses
+     * overriden to aggregate responses and send them back to SimEng.
+     */
+    void handle(StandardMem::ReadResp* resp) override;
+
+    /**
+     * Overloaded instance of handle method to handle write request responses
+     * overriden to delete the incoming responses as SimEng does not have any
+     * use for it.
+     */
+    void handle(StandardMem::WriteResp* resp) override;
+
+    /** Reference to SimEngMemInterface used for interfacing with SST. */
+    SimEngMemInterface& memInterface_;
+  };
+
+  /**
+   * This struct represents a memory request from SimEng. It is used as base
+   * struct for AggregateWriteRequest and AggregateReadRequest.
+   */
+  struct SimEngMemoryRequest {
+    /** MemoryAccessTarget from SimEng memory instruction. */
+    const MemoryAccessTarget target;
+
+    SimEngMemoryRequest() : target(MemoryAccessTarget()){};
+    SimEngMemoryRequest(const MemoryAccessTarget& target) : target(target){};
+  };
+
+  /**
+   * Struct AggregatedWriteRequest is used to store information regarding
+   * the multiple SST::StandardMem::Request (Write) a memory request from SimEng
+   * is split into. This happens if its size is greater than the cache line
+   * width. These structs are also used to represent SimEng write requests which
+   * aren't split for ease of implementation.
+   */
+  struct AggregateWriteRequest : public SimEngMemoryRequest {
+    /** RegisterValue (write data) from SimEng memory instruction. */
+    const RegisterValue data;
+
+    AggregateWriteRequest() : SimEngMemoryRequest(), data(RegisterValue()){};
+    AggregateWriteRequest(const MemoryAccessTarget& target,
+                          const RegisterValue& data)
+        : SimEngMemoryRequest(target), data(data){};
+  };
+
+  /**
+   * Struct AggregatedReadRequest is used to store information regarding
+   * the multiple SST::StandardMem::Request (Read) a memory request from SimEng
+   * is split into. This happens if its size is greater than the cache line
+   * width. These structs are also used to represent SimEng read requests which
+   * aren't split for ease of implementation.
+   */
+  struct AggregateReadRequest : public SimEngMemoryRequest {
+    /** Unique identifier of each AggregatedReadRequest copied from SimEng read
+     * request. */
+    const uint64_t id_;
+    /**
+     * This response map is used to store all responses of SST read request,
+     * this aggregated read request was split into. An ordered map is used to
+     * record and maintain the order to split responses.
+     */
+    std::map<uint64_t, std::vector<uint8_t>> responseMap_;
+    /** Total number of SST request the SimEng memory request was split into. */
+    int aggregateCount_ = 0;
+
+    AggregateReadRequest() : SimEngMemoryRequest(), id_(0){};
+    AggregateReadRequest(const MemoryAccessTarget& target, const uint64_t id)
+        : SimEngMemoryRequest(target), id_(id) {}
+  };
+
+ private:
+  /**
+   * SST::Interfaces::StandardMem interface responsible for converting
+   * SST::StandardMem::Request(s) into SST memory events to be passed
+   * down the memory heirarchy.
+   */
+  StandardMem* sstMem_;
+
+  /** Counter for clock ticks. */
+  uint64_t tickCounter_ = 0;
+
+  /** The cache line width specified by SST config.py. */
+  uint64_t cacheLineWidth_;
+
+  /** Maximum address available for memory purposes. */
+  uint64_t maxAddrMemory_;
+
+  /** A vector containing all completed read requests. */
+  std::vector<MemoryReadResult> completedReadRequests_;
+
+  /**
+   * This map is used to store unique ids of SST::StandardMem::Read requests and
+   * their corresponding AggregateReadRequest as key-value pairs (In some cases
+   * SimEngMemoryRequest has to be divided into multiple
+   * SST::StandardMem::Request(s) if the SimEngMemoryRequest size > cache line
+   * width). That is, the unique ids of multiple read requests and their
+   * corresponding aggregatedReadRequest are stored in a many-to-one fashion.
+   * An entry from this map is removed when a response for
+   * SST::StandardMem::Read request is recieved and recorded. The response holds
+   * the same unique id as the request. No such key-value pairs are maintained
+   * for AggregatedWriteRequest(s) even if they are split into multiple
+   * SST::StandardMem::Write requests as their responses do not need to be
+   * aggregated.
+   */
+  std::unordered_map<uint64_t, AggregateReadRequest*> aggregationMap_;
+
+  /** This method only accepts structs derived from the SimEngMemoryRequest
+   * struct as the value for aggrReq. */
+  template <typename T, typename std::enable_if<std::is_base_of<
+                            SimEngMemoryRequest, T>::value>::type* = nullptr>
+  std::vector<StandardMem::Request*> makeSSTRequests(T* aggrReq,
+                                                     uint64_t addrStart,
+                                                     uint64_t addrEnd,
+                                                     uint64_t size);
+
+  /** The overloaded instance of splitAggregatedRequest is used to split an
+   * AggregatedWriteRequest into multiple SST write requests.
+   */
+  std::vector<StandardMem::Request*> splitAggregatedRequest(
+      AggregateWriteRequest* aggrReq, uint64_t addrStart, uint64_t size);
+
+  /** The overloaded instance of splitAggregatedRequest is used to split an
+   * AggregatedReadRequest into multiple SST read requests.
+   */
+  std::vector<StandardMem::Request*> splitAggregatedRequest(
+      AggregateReadRequest* aggrReq, uint64_t addrStart, uint64_t size);
+
+  /** This method is used to aggregate responses from multiple read request into
+   * one response. */
+  void aggregatedReadResponses(AggregateReadRequest* aggrReq);
+
+  /** Get the number of cache lines needed incase the size of a memory request
+   * is larger than cache line width.
+   */
+  int getNumCacheLinesNeeded(uint64_t size) const;
+  bool unsignedOverflow_(uint64_t a, uint64_t b) const;
+
+  /**
+   * Check to see if a request spans multiple cache lines. This method
+   * identifies the case when the start and end address of the request do not
+   * lie on the same cache line. This can even happen if the size of the memory
+   * request is less than cache line width.
+   */
+  bool requestSpansMultipleCacheLines(uint64_t addrStart,
+                                      uint64_t addrEnd) const;
+
+  /**
+   * This method is used to find the end address of the cache line specified by
+   * the start address of the memory request. This method is used when a memory
+   * request spans multiple cache lines.
+   */
+  uint64_t nearestCacheLineEnd(uint64_t addrStart) const;
+
+  /** Variable to enable parseable print debug statements in test mode. */
+  bool debug_ = false;
+};
+
+};  // namespace SSTSimEng
+
+};  // namespace SST
diff --git a/sst/test/CMakeLists.txt b/sst/test/CMakeLists.txt
new file mode 100644
index 0000000000..06f0e02278
--- /dev/null
+++ b/sst/test/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(SIMENG_SST_TEST_SOURCES 
+    main.cc
+    src/tg0_llvm_assemble.cc
+    src/tg1_load_store.cc
+    src/tg2_cache_access.cc
+    src/tg3_request_split.cc
+    src/tg4_request_misaligned.cc
+    src/tg5_benchmarks.cc
+)
+add_executable(sstsimengtest ${SIMENG_SST_TEST_SOURCES})
+
+add_compile_options(-Wall)
+
+set(SSTTESTDIR ${CMAKE_CURRENT_SOURCE_DIR})
+if(SST_TEST_CMD)
+
+target_compile_definitions(
+ sstsimengtest PUBLIC 
+ SST_INSTALL_DIR="${SST_INSTALL_DIR}"
+ SST_TEST_CMD="${SST_TEST_CMD}"
+ SST_TEST_DIR="${SSTTESTDIR}"
+ SST_TESTS_MODEL_CONFIG_PATH="${SST_TESTS_MODEL_CONFIG_PATH}"
+ )
+
+else()
+target_compile_definitions(
+ sstsimengtest PUBLIC 
+ SST_INSTALL_DIR="${SST_INSTALL_DIR}"
+ SST_TEST_DIR="${SSTTESTDIR}"
+ SST_TESTS_MODEL_CONFIG_PATH="${SST_TESTS_MODEL_CONFIG_PATH}"
+ )
+endif()
+
+target_include_directories(libsimeng PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
+target_include_directories(sstsimengtest PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+add_test(NAME simeng_sst_tests COMMAND sstsimengtest)
diff --git a/sst/test/include/framework/context.hh b/sst/test/include/framework/context.hh
new file mode 100644
index 0000000000..8759566d55
--- /dev/null
+++ b/sst/test/include/framework/context.hh
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <functional>
+#include <string>
+
+using TestFunc = std::function<void(std::string)>;
+
+/**
+ * This struct stores the filename and line number of the
+ * TEST_CASE.
+ */
+struct SourceInfo {
+  SourceInfo(const char* fl_name, uint64_t ln_num)
+      : fname_(fl_name), lnum_(ln_num){};
+  SourceInfo() : fname_(NULL), lnum_(0){};
+
+  const char* fname_;
+  uint64_t lnum_;
+};
+
+/**
+ * This class represents a test case, it stores all contextual information
+ * regarding the test case. The TEST_CASE Macro ultimately leads to the creation
+ * of a TestContext. TextContext(s) are run inside Runner(s).
+ */
+class TestContext {
+ private:
+  /** The name of the test case passed using the TEST_CASE macro. */
+  std::string tname_;
+  /** The function which contains all the testable logic. */
+  TestFunc tfn_;
+  /** The source of the test case. */
+  SourceInfo tsinfo_;
+
+ public:
+  /** Constructor used to a TestContext by the TEST_CASE macro. */
+  TestContext(TestFunc fn, const SourceInfo& info, std::string tname) {
+    tsinfo_ = info;
+    tname_ = tname;
+    tfn_ = fn;
+  };
+
+  /** Constructor used to create an empty TestContext. */
+  TestContext() {
+    tsinfo_ = SourceInfo{};
+    tname_ = "";
+  };
+  /** Returns the name of the test case. */
+  std::string getTestCaseName() const { return tname_; }
+
+  /** Returns the name of the file the test is written in. */
+  std::string getTestCaseSrcFile() const { return std::string(tsinfo_.fname_); }
+
+  /** Returns the TestFunc of the test case. */
+  TestFunc getTestCaseFn() const { return tfn_; }
+
+  /** Returns the line where the test exists. */
+  uint64_t getTestCaseLineNum() const { return tsinfo_.lnum_; }
+};
diff --git a/sst/test/include/framework/expression.hh b/sst/test/include/framework/expression.hh
new file mode 100644
index 0000000000..fa0cdae03e
--- /dev/null
+++ b/sst/test/include/framework/expression.hh
@@ -0,0 +1,255 @@
+#pragma once
+
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+/**
+ * Substitution failure is not an error (SFINAE) refers to a
+ * situation in C++ where an invalid substitution of template parameters is not
+ * in itself an error.
+ * https://en.cppreference.com/w/cpp/language/sfinae
+ */
+
+/**
+ * SFINAE type trait which only allows the template arguments to be a
+ * fundamental type.
+ */
+template <typename T>
+using IsFundamentalType =
+    typename std::enable_if<std::is_fundamental_v<T>, T>::type;
+
+/**
+ * SFINAE type trait which checks if the template argument is char*, const char*
+ * or std::string.
+ */
+template <typename... Ts>
+using IsString = typename std::enable_if<std::conjunction<std::disjunction<
+    std::is_same<char*, typename std::decay_t<Ts>>,
+    std::is_same<const char*, typename std::decay_t<Ts>>,
+    std::is_same<std::string, typename std::decay_t<Ts>>>...>::value>::type;
+
+/**
+ * This struct represents the evaluation of an expression and all details
+ * related to it.
+ */
+struct ExprEval {
+  /** The result of the evaluation. */
+  const bool result_;
+  /** Depicts if the expression was binary or unary. */
+  const bool is_binary_;
+  /** The expression as a string. */
+  std::string exprString_;
+  /** The line on which the source of the expression is written. */
+  uint64_t exprLineNum_;
+  ExprEval() : result_(0), is_binary_(0), exprString_(""), exprLineNum_(0){};
+  ExprEval(bool result, bool is_binary, std::string exprString,
+           uint64_t exprLineNum)
+      : result_(result),
+        is_binary_(is_binary),
+        exprString_(exprString),
+        exprLineNum_(exprLineNum){};
+  ExprEval(bool result, bool is_binary)
+      : result_(result),
+        is_binary_(is_binary),
+        exprString_(""),
+        exprLineNum_(0){};
+};
+
+/**
+ * This class represent an expression that can be evaluated to boolean a result.
+ */
+class BaseExpr {
+ public:
+  /**
+   * This is a virtual method which converts a Unary expression represented by a
+   * class extending BaseExpr into ExprEval.
+   */
+  virtual auto makeUnaryExprEval() -> ExprEval {
+    return ExprEval{false, false, "", 0};
+  }
+};
+
+/**
+ * This class represent the LHS of any expression which can be evaluated to a
+ * boolean result. For Unary expression the value evaluated is still stored
+ * as LHS, however during evaluation the makeUnaryExprEval method uses the
+ * static_cast method for evaluation.
+ */
+template <typename Lhs, typename = IsFundamentalType<Lhs>>
+class LhsExpr : public BaseExpr {
+ private:
+  /** The left hand side of an expression. */
+  Lhs lhs_;
+
+ public:
+  explicit LhsExpr(Lhs lhs) : lhs_(lhs){};
+
+  /**
+   * This operator overload enables the execution of the 'greater than'
+   * operation on the RHS value without having to define its template type. The
+   * templated type is filtered by a SFINAE type trait which only allows
+   * template argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator>(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ > rhs), true};
+  };
+
+  /**
+   * This operator overload enables the execution of the 'less than' operation
+   * on the RHS value without having to define its template type. The templated
+   * type is filtered by a SFINAE type trait which only allows the template
+   * argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator<(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ < rhs), true};
+  };
+
+  /**
+   * This operator overload enables the execution of the 'greater than or
+   * equal' operation on the RHS value without having to define its template
+   * type. The templated type is filtered by a SFINAE type trait which only
+   * allows the template argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator>=(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ >= rhs), true};
+  };
+
+  /**
+   * This operator overload enables the execution of the 'less than or equal'
+   * operation on the RHS value without having to define its template type. The
+   * templated type is filtered by a SFINAE type trait which only allows the
+   * template argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator<=(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ <= rhs), true};
+  };
+
+  /**
+   * This operator overload enables the execution of the 'equals' operation on
+   * the RHS value without having to define its template type. The templated
+   * type is filtered by a SFINAE type trait which only allows the template
+   * argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator==(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ == rhs), true};
+  };
+
+  /**
+   * This operator overload enables the execution of the 'not equals' operation
+   * on the RHS value without having to define its template type. The templated
+   * type is filtered by a SFINAE type trait which only allows the template
+   * argument to be a fundamental type.
+   */
+  template <typename Rhs, typename = IsFundamentalType<Rhs>>
+  auto operator!=(Rhs const& rhs) -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_ != rhs), true};
+  };
+
+  /** Overloaded instance of makeUnaryExprEval from BaseExpr. */
+  auto makeUnaryExprEval() -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_), false};
+  }
+};
+
+/**
+ * ExprBuilder struct exposes a operator which is used to instantiate an
+ * LhsExpr of type T without having to explictly define the template argument.
+ */
+struct ExprBuilder {
+  template <typename T, typename = IsFundamentalType<T>>
+  /**
+   * This operator invocation instantiates the LhsExpr with only fundamental
+   * types.
+   */
+  auto operator<<(T const& lhs) -> LhsExpr<T> {
+    return LhsExpr<T>(lhs);
+  }
+  /**
+   * This overloaded operator throws an exception if invoked with a string.
+   */
+  template <typename Ts, typename = IsString<Ts>>
+  auto operator<<(Ts const& arg) -> bool {
+    throw std::invalid_argument(
+        "String comparisons should be done with STR macros.");
+    return false;
+  }
+};
+
+/**
+ * This class represent the LHS and RHS of any expression containing strings
+ * which can be evaluated to a boolean result.
+ */
+class StrExpr : public BaseExpr {
+  std::string lhs_;
+  std::string rhs_;
+
+ public:
+  explicit StrExpr(std::string lhs) : lhs_(lhs){};
+
+  /**
+   * This operator overload enables initialisation of rhs without having to
+   * define the template argument explicitly. The templated argument is filtered
+   * by the IsString SFINAE type trait.
+   */
+  template <typename T, typename = IsString<T>>
+  auto operator<<(T const& arg) -> StrExpr {
+    rhs_ = std::string(arg);
+    return *(this);
+  }
+  /**
+   * This method checks if the LHS string is equal to the RHS string and returns
+   * the result as an ExprEval struct.
+   */
+  auto compareEqual() -> ExprEval {
+    return ExprEval{!static_cast<bool>(lhs_.compare(rhs_)), true};
+  }
+  /**
+   * This method checks if the LHS string is not equal to the RHS string and
+   * returns the result as an ExprEval struct.
+   */
+  auto compareNotEqual() -> ExprEval {
+    return ExprEval{static_cast<bool>(lhs_.compare(rhs_)), true};
+  }
+  /**
+   * This method checks if the LHS string starts with the RHS string and returns
+   * the result as an ExprEval struct.
+   */
+  auto startWith() -> ExprEval {
+    auto pos = lhs_.find(rhs_);
+    return ExprEval{pos == 0, true};
+  }
+  /**
+   * This method checks if the LHS string contains the RHS string and returns
+   * the result as an ExprEval struct.
+   */
+  auto contains() -> ExprEval {
+    auto pos = lhs_.find(rhs_);
+    return ExprEval{pos != std::string::npos, true};
+  }
+  /**
+   * This method throws an error if StrExpr is ever treated as a Unary
+   * expression.
+   */
+  auto makeUnaryExprEval() -> ExprEval {
+    throw std::domain_error("String Expressions cannot be Unary");
+  }
+};
+
+/**
+ * StrExprBuilder struct exposes a operator which is used to instantiate an
+ * StrExpr of type T (filtered by the SFINAE expression) without having to
+ * explicitly define the template argument of type char*, const char* or
+ * std::string.
+ */
+struct StrExprBuilder {
+  template <typename T, typename = IsString<T>>
+  auto operator<<(T const& arg) -> StrExpr {
+    std::string str = std::string(arg);
+    return StrExpr{str};
+  };
+};
diff --git a/sst/test/include/framework/handlers.hh b/sst/test/include/framework/handlers.hh
new file mode 100644
index 0000000000..a6002ca96e
--- /dev/null
+++ b/sst/test/include/framework/handlers.hh
@@ -0,0 +1,98 @@
+#pragma once
+
+#include <memory>
+
+#include "framework/context.hh"
+#include "framework/expression.hh"
+#include "framework/output.hh"
+#include "framework/process.hh"
+
+/**
+ * This Singleton class is used to handle all exceptions that could happen
+ * within a TEST_CASE.
+ */
+class ExceptionHandler {
+ private:
+  /** TestContext of the currently running TEST_CASE. */
+  TestContext* ctx_;
+  ExceptionHandler(){};
+
+ public:
+  /**
+   * This method returns the singleton instance of the ExceptionHandler
+   * class.
+   */
+  static std::unique_ptr<ExceptionHandler>& getInstance() {
+    static std::unique_ptr<ExceptionHandler> ptr;
+    if (ptr == nullptr) {
+      ptr = std::unique_ptr<ExceptionHandler>(new ExceptionHandler());
+    }
+    return ptr;
+  }
+  /** This method handles any runtime errors that can happen when the logic
+   * inside a TEST_CASE is executed. This method calls exit(EXIT_FAILURE) and
+   * terminates the execution of the test suite.
+   */
+  void handleTestRuntimeException(const std::exception& e,
+                                  const Output& output = Output()) {
+    std::string excpStr = e.what();
+    output.fail(ctx_);
+    output.output("", 8, Formatter::bold_bright_red("Runtime error: "));
+    output.output("", 8, excpStr);
+    output.printCapturedStdCout();
+    std::exit(EXIT_FAILURE);
+  }
+
+  /**
+   * This method handles any exception raised by an Expression inside a
+   * TEST_CASE.
+   */
+  void handleExpressionException(const ExprEval& expr,
+                                 const Output& output = Output()) {
+    output.fail(ctx_);
+    output.output("", 8, Formatter::bold("Expression failed: "),
+                  ctx_->getTestCaseSrcFile(), ":", expr.exprLineNum_);
+    output.output("", 8, Formatter::blue(expr.exprString_));
+    output.printCapturedStdCout();
+  }
+
+  /**
+   * This method handles ProcessException thrown by the Process class upon
+   * encountering errors/exceptions inside the child process.
+   */
+  void handleProcessException(const ProcessException& procExcp,
+                              const Output& output = Output()) {
+    output.output("", 0, procExcp.errString_);
+    if (procExcp.stdoutStr_ != "") output.output("", 4, procExcp.stdoutStr_);
+    if (procExcp.stderrStr_ != "") output.output("", 4, procExcp.stderrStr_);
+  }
+  /**
+   * This method registers the TestContext of the currently running test case
+   * to the exception handler.
+   */
+  void registerContext(TestContext* ctx) { ctx_ = ctx; };
+};
+
+/**
+ * This class is used to throw ExprEval exceptions upon encounting a failing
+ * expression.
+ */
+class ExpressionHandler {
+ public:
+  ExpressionHandler(){};
+  /** This method is used to handle all Unary Expressions. */
+  void handleExpression(BaseExpr expr, std::string exprSource,
+                        uint64_t lineNum) {
+    handleExpression(expr.makeUnaryExprEval(), exprSource, lineNum);
+  };
+  /** This method is used to handle all Expression that have been evaluated. */
+  void handleExpression(ExprEval exprRes, std::string exprSource,
+                        uint64_t lineNum) {
+    exprRes.exprString_ = exprSource;
+    exprRes.exprLineNum_ = lineNum;
+    if (!exprRes.result_) {
+      throw exprRes;
+    };
+    return;
+  };
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/macros/eval.hh b/sst/test/include/framework/macros/eval.hh
new file mode 100644
index 0000000000..2c47629c86
--- /dev/null
+++ b/sst/test/include/framework/macros/eval.hh
@@ -0,0 +1,70 @@
+#pragma once
+
+#include "framework/expression.hh"
+#include "framework/handlers.hh"
+#include "framework/macros/util.hh"
+
+// This MACRO defines the source code each expression expands into. This MACRO
+// also adds the line number on which this MACRO is defined in the source code.
+#define GENERIC_EXPECT_WITH_LINE(A, OP, B, line, SRC)      \
+  {                                                        \
+    ExpressionHandler handler = ExpressionHandler();       \
+    handler.handleExpression(ExprBuilder() << A OP B, SRC, \
+                             static_cast<uint64_t>(line)); \
+  }
+
+// This MACRO defines the source code each String expression expands into. This
+// MACRO also adds the line number on which this MACRO is defined in the source
+// code.
+#define GENERIC_STR_MATCHER_WITH_LINE(A, OP, B, line, SRC)         \
+  {                                                                \
+    ExpressionHandler handler = ExpressionHandler();               \
+    handler.handleExpression((StrExprBuilder() << A << B).OP, SRC, \
+                             static_cast<uint64_t>(line));         \
+  }
+
+// This MACRO is used invoke GENERIC_STR_MATCHER_WITH_LINE MACROS with the
+// __LINE__ MACRO.
+#define GENERIC_STR_MATCHER(A, OP, B, SRC) \
+  GENERIC_STR_MATCHER_WITH_LINE(A, OP, B, __LINE__, SRC)
+
+// This MACRO is used invoke GENERIC_EXPECT_WITH_LINE MACROS with the
+// __LINE__ MACRO.
+#define GENERIC_EXPECT(A, OP, B, SRC) \
+  GENERIC_EXPECT_WITH_LINE(A, OP, B, __LINE__, SRC)
+
+// This MACRO expands with the '==' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_EQ(A, B) GENERIC_EXPECT(A, ==, B, STRINGIFY(EXPECT_EQ(A, B)))
+// This MACRO expands with the '>' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_GT(A, B) GENERIC_EXPECT(A, >, B, STRINGIFY(EXPECT_GT(A, B)))
+// This MACRO expands with the '<' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_LT(A, B) GENERIC_EXPECT(A, <, B, STRINGIFY(EXPECT_LT(A, B)))
+// This MACRO expands with the '==' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_GTE(A, B) GENERIC_EXPECT(A, >=, B, STRINGIFY(EXPECT_GTE(A, B)))
+// This MACRO expands with the '>=' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_LTE(A, B) GENERIC_EXPECT(A, <=, B, STRINGIFY(EXPECT_LTE(A, B)))
+// This MACRO expands with the '<=' operator on the LHS and RHS of the
+// expression.
+#define EXPECT_NEQ(A, B) GENERIC_EXPECT(A, !=, B, STRINGIFY(EXPECT_NEQ(A, B)))
+
+// This MACRO expands to invoke the compareEquals() method on the LHS and RHS
+// strings.
+#define STR_EQ(A, B) \
+  GENERIC_STR_MATCHER(A, compareEqual(), B, STRINGIFY(STR_EQ(A, B)))
+// This MACRO expands to invoke the compareNotEqual() method on the LHS and RHS
+// strings.
+#define STR_NOT_EQ(A, B) \
+  GENERIC_STR_MATCHER(A, compareNotEqual(), B, STRINGIFY(STR_NOT_EQ(A, B)))
+// This MACRO expands to invoke the startsWith() method on the LHS and RHS
+// strings.
+#define STR_STARTS_WITH(A, B) \
+  GENERIC_STR_MATCHER(A, startsWith(), B, STRINGIFY(STR_STARTSWITH(A, B)))
+// This MACRO expands to invoke the contains() method on the LHS and RHS
+// strings.
+#define STR_CONTAINS(A, B) \
+  GENERIC_STR_MATCHER(A, contains(), B, STRINGIFY(STR_CONTAINS(A, B)))
diff --git a/sst/test/include/framework/macros/group.hh b/sst/test/include/framework/macros/group.hh
new file mode 100644
index 0000000000..40b9c0e861
--- /dev/null
+++ b/sst/test/include/framework/macros/group.hh
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <algorithm>
+
+#include "framework/context.hh"
+#include "framework/macros/util.hh"
+#include "framework/registry.hh"
+#include "framework/runner.hh"
+
+// This MACRO instantiates the SourceInfo struct needed for TestContext.
+#define MAKE_TEST_SOURCE \
+  SourceInfo { __FILE__, static_cast<uint64_t>(__LINE__) }
+
+// This MACRO uses the concat MACRO internally as calling CONCAT inside CONCAT
+// leads to preprocessing errors.
+#define CONSTRUCT_UNQIUE_GROUP_NAME(counter) CONCAT(Test_Group_, counter)
+
+// This internal MACRO expands to define the class implementation of a
+// TEST_GROUP.
+#define CONSTRUCT_GROUP_COMPLETE(ClassName, groupName, sstConfigFile, ...)  \
+  class ClassName : public Group<ClassName> {                               \
+    static const bool registered_;                                          \
+                                                                            \
+   private:                                                                 \
+    const GroupConfig config_ = GroupConfig{                                \
+        groupName, std::vector<std::string>{SST_INSTALL_DIR, sstConfigFile, \
+                                            __VA_ARGS__}};                  \
+                                                                            \
+   public:                                                                  \
+    const GroupConfig& getGroupConfig() { return config_; }                 \
+    static std::string getGroupName() { return groupName; }                 \
+    FACTORY(ClassName)                                                      \
+  };                                                                        \
+  REGISTER(ClassName, groupName, __FILE__)
+
+// This MACRO expands to define all source code required for a TEST_GROUP
+#define TEST_GROUP(ClassName, groupName, sstConfigFile, ...) \
+  CONSTRUCT_GROUP_COMPLETE(ClassName, groupName, sstConfigFile, __VA_ARGS__)
+
+// This MACRO uses the concat MACRO internally as calling CONCAT inside CONCAT
+// leads to preprocessing errors.
+#define CREATE_UNIQUE_TEST_NAME_G(tname, counter) CONCAT(tname, counter)
+
+// This MACRO creates a unique test name from the class name.
+#define CREATE_TEST_NAME_G(ClassName) \
+  CREATE_UNIQUE_TEST_NAME_G(CONCAT(ClassName, TEST_CASE), __COUNTER__)
+
+// This MACRO expands to define all logic which creates and registers the
+// TestContext related to a TEST_CASE to a TEST_GROUP.
+#define REGISTER_TC_G(ClassName, TestName, ptr, counter, TestCaseName, ...) \
+  std::unique_ptr<TestContext> CREATE_UNIQUE_TEST_NAME_G(                   \
+      ClassName, CONCAT(counter, ptr)) =                                    \
+      std::make_unique<TestContext>(&TestName, MAKE_TEST_SOURCE,            \
+                                    TestCaseName);                          \
+  const bool CONCAT(TestName, _registered_) = ClassName::registerTest(      \
+      CREATE_UNIQUE_TEST_NAME_G(ClassName, CONCAT(counter, ptr)),           \
+      ClassName::getGroupName(), std::vector<std::string>{__VA_ARGS__});
+
+// Internal MACRO called inside TEST_CASE MACRO which declares and registers the
+// TEST_CASE.
+#define CREATE_TEST_CASE_G(ClassName, TestName, TestCaseName, ...)   \
+  void TestName(std::string capturedStdout);                         \
+  REGISTER_TC_G(ClassName, TestName, ptr, __COUNTER__, TestCaseName, \
+                __VA_ARGS__)                                         \
+  void TestName(std::string capturedStdout)
+
+// This MACRO expands to define all source code required for the TEST_CASE
+#define TEST_CASE(ClassName, TestCaseName, ...)                              \
+  CREATE_TEST_CASE_G(ClassName, CREATE_TEST_NAME_G(ClassName), TestCaseName, \
+                     __VA_ARGS__)
diff --git a/sst/test/include/framework/macros/util.hh b/sst/test/include/framework/macros/util.hh
new file mode 100644
index 0000000000..ffb940d7ef
--- /dev/null
+++ b/sst/test/include/framework/macros/util.hh
@@ -0,0 +1,6 @@
+#pragma once
+
+// The MACRO concatenates any two arguments.
+#define CONCAT(A, B) A##B
+// The MACRO converts any argument into a string.
+#define STRINGIFY(STR) #STR
diff --git a/sst/test/include/framework/output.hh b/sst/test/include/framework/output.hh
new file mode 100644
index 0000000000..e817bbc950
--- /dev/null
+++ b/sst/test/include/framework/output.hh
@@ -0,0 +1,192 @@
+#pragma once
+
+#include <iostream>
+#include <sstream>
+#include <string>
+
+#include "framework/context.hh"
+
+/** Static class which exposes methods to color strings. */
+class Formatter {
+ private:
+  /**
+   * Genric method which takes in a color string and a target string, and
+   * applies the color to the target string.
+   */
+  static std::ostringstream colour(std::string str, std::string tcolour) {
+    std::ostringstream ss;
+    ss << tcolour << str << reset();
+    return ss;
+  }
+  /**
+   * Reset the color such that characters following colored characters don't
+   * inherit color.
+   */
+  static std::string reset() { return "\033[00m"; }
+
+ public:
+  /** Method which returns a blue string. */
+  static std::string blue(std::string str) {
+    return colour(str, "\033[34m").str();
+  }
+  /** Method which returns a grey string. */
+  static std::string grey(std::string str) {
+    return colour(str, "\033[30m").str();
+  }
+  /** Method which returns a bright green string. */
+  static std::string bright_green(std::string str) {
+    return colour(str, "\033[92m").str();
+  }
+  /** Method which returns a bright red string. */
+  static std::string bright_red(std::string str) {
+    return colour(str, "\033[91m").str();
+  }
+  /** Method which returns a bright grey string. */
+  static std::string bright_grey(std::string str) {
+    return colour(str, "\033[90m").str();
+  }
+  /** Method which returns a bold string. */
+  static std::string bold(std::string str) {
+    std::ostringstream ss;
+    ss << "\033[1m" << str << reset();
+    return ss.str();
+  }
+  /** Method which returns a bold bright green string. */
+  static std::string bold_bright_green(std::string str) {
+    return bold(bright_green(str));
+  };
+  /** Method which returns a bold grey string. */
+  static std::string bold_grey(std::string str) { return bold(grey(str)); };
+  /** Method which returns a bold bright grey string. */
+  static std::string bold_bright_grey(std::string str) {
+    return bold(bright_grey(str));
+  }
+  /** Method which returns a bold blue string. */
+  static std::string bold_blue(std::string str) { return bold(blue(str)); };
+  /** Method which returns a bold bright red string. */
+  static std::string bold_bright_red(std::string str) {
+    return bold(bright_red(str));
+  };
+};
+
+/**
+ * Output class which implements string building and output methods. This class
+ * is also used capture the stdout of logic running inside a TEST_CASE.
+ */
+class Output {
+ private:
+  /** buffer used to capture stdout. */
+  std::ostringstream buffer_;
+  /** reference to the actual stdout stream which outputs to the terminal. */
+  std::streambuf* prevcoutbuf_;
+  /** default indent value set for all string builders and output methods. */
+  int indent_ = 0;
+  /** This method return an indent string. */
+  auto indent(uint32_t t) const {
+    std::string ss = "";
+    for (uint32_t x = 0; x < t; x++) {
+      ss += ' ';
+    }
+    return ss;
+  }
+
+ public:
+  /**
+   * Type trait SFINAE expression which filters the template argument to only
+   * allow fundamental types and strings to pass argument types.
+   */
+  template <typename... Ts>
+  using IsAllowed = typename std::enable_if<std::conjunction<
+      std::disjunction<std::is_same<char*, typename std::decay_t<Ts>>,
+                       std::is_same<const char*, typename std::decay_t<Ts>>,
+                       std::is_same<std::string, typename std::decay_t<Ts>>,
+                       std::is_fundamental<Ts>>...>::value>::type;
+
+  template <typename... Ts, typename = IsAllowed<Ts...>,
+            std::size_t N = sizeof...(Ts)>
+  /** output method which outputs any fundamental type arguments to stdout. */
+  void output(std::string delimiter, int ind, Ts const&... xs) const {
+    std::ostringstream oss;
+    ([&] { oss << xs << delimiter; }(), ...);
+
+    std::string str = oss.str();
+    std::string builder = "";
+    for (size_t x = 0; x < oss.str().size(); x++) {
+      if (str[x] == '\n') {
+        std::cout << indent(ind + indent_) << builder << std::endl;
+        builder = "";
+      } else {
+        builder += str[x];
+      }
+    }
+    if (builder != "") {
+      std::cout << indent(ind + indent_) << builder << std::endl;
+    }
+  }
+
+  template <typename... Ts, typename = IsAllowed<Ts...>,
+            std::size_t N = sizeof...(Ts)>
+  /**
+   * String builder method which builds a string out of any fundamental type
+   * argumemts.
+   */
+  std::string strBuilder(std::string delimiter, Ts const&... xs) const {
+    std::ostringstream oss;
+    ([&] { oss << xs << delimiter; }(), ...);
+    return oss.str();
+  }
+
+  /** Method which prints a passing test case given a TestContext. */
+  void pass(TestContext* ctx) const {
+    output(" ", 0, Formatter::bold_bright_green("[PASS]:"),
+           ctx->getTestCaseName());
+  };
+
+  /** Method which prints a failing test case given a TestContext. */
+  void fail(TestContext* ctx) const {
+    output(" ", 0, Formatter::bold_bright_red("[FAIL]:"),
+           ctx->getTestCaseName());
+    output("", 4, Formatter::bold("Source: "), ctx->getTestCaseSrcFile(), ":",
+           ctx->getTestCaseLineNum());
+  };
+
+  /** Method which prints a group name. */
+  void group(std::string groupName) {
+    output(" ", 0, Formatter::bold_blue("[TEST GROUP]:"),
+           Formatter::bold(groupName));
+  }
+
+  /**
+   * This method which captures the stdout, it replaces the default stream
+   * buffer with ostringstream buffer enabling all calls to std::cout to be
+   * captured in buffer_.
+   */
+  void captureStdCout() {
+    buffer_ = std::ostringstream();
+    prevcoutbuf_ = std::cout.rdbuf(buffer_.rdbuf());
+  }
+
+  /** This method prints the captured stdout. */
+  void printCapturedStdCout(int indent = 0) const {
+    std::string str = buffer_.str();
+    if (str[str.size() - 1] == '\n') {
+      str[str.size() - 1] = '\0';
+    }
+    if (str.size()) {
+      output("", 0, Formatter::bold_bright_grey("Captured output: "));
+      output("", indent, Formatter::grey(str));
+    }
+  }
+
+  /**
+   * This method resets the stream buffer of stdout so that it prints back to
+   * the terminal.
+   */
+  void resetStdCoutBuffer() { std::cout.rdbuf(prevcoutbuf_); }
+
+  /**
+   * Method used to set default indent for any output or string building
+   * methods.
+   */
+  void setIndent(int indent) { indent_ = indent; }
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/parser.hh b/sst/test/include/framework/parser.hh
new file mode 100644
index 0000000000..1bf7287088
--- /dev/null
+++ b/sst/test/include/framework/parser.hh
@@ -0,0 +1,119 @@
+#pragma once
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+struct ParsedMemRead {
+  uint64_t reqId_;
+  uint64_t startCycle_;
+  uint64_t endCycle_;
+  uint64_t data_;
+  uint64_t numReqs_;
+};
+
+/**
+ * Parser class used to parse captured stdout into meaninful and comparable
+ * data.
+ */
+class Parser {
+ private:
+  /** Captured Stdout split into lines. */
+  std::vector<std::string> splitStdout_;
+
+  /** vector of ParsedMemReads. */
+  std::vector<ParsedMemRead*> parsedMemReads_;
+
+  /** vector of stdout lines having the SimEng:SSTDebug:OutputLine prefix. */
+  std::vector<std::string> outputLines;
+
+ public:
+  Parser(std::string capturedStdOut) {
+    splitStdout_ = splitStdoutIntoLines(capturedStdOut);
+    parsedMemReads_ = parseOutput();
+  };
+  ~Parser(){};
+  /** Returns the parsed stdout as ParsedMemReads. */
+  std::vector<ParsedMemRead*> getParsedMemReads() { return parsedMemReads_; }
+
+  /**
+   * Returns a vector of stdout lines having the SimEng:SSTDebug:OutputLine
+   * prefix.
+   */
+  std::vector<std::string> getOutputLines() { return outputLines; }
+
+ private:
+  /** This methiod parses the captured stdout */
+  std::vector<ParsedMemRead*> parseOutput() {
+    std::map<uint64_t, ParsedMemRead*> pmap;
+    std::vector<ParsedMemRead*> out;
+    for (size_t x = 0; x < splitStdout_.size(); x++) {
+      if (splitStdout_[x].find("SSTSimEng:SSTDebug") == std::string::npos)
+        continue;
+      std::vector<std::string> splitStr = split(splitStdout_[x], "-");
+      if (splitStr[0] == "[SSTSimEng:SSTDebug] MemRead") {
+        uint64_t id = std::stoull(splitStr[3]);
+        auto itr = pmap.find(id);
+        if (itr != pmap.end()) {
+          itr->second->endCycle_ = std::stoull(splitStr[5]);
+          itr->second->data_ = std::stoull(splitStr[7]);
+          out.push_back(itr->second);
+        } else {
+          ParsedMemRead* p = new ParsedMemRead();
+          p->reqId_ = id;
+          p->startCycle_ = std::stoull(splitStr[5]);
+          p->numReqs_ = std::stoull(splitStr[7]);
+          pmap.insert(std::pair<uint64_t, ParsedMemRead*>(id, p));
+        }
+      }
+      if (splitStr[0] == "[SSTSimEng:SSTDebug] OutputLine") {
+        std::string str = "";
+
+        for (size_t y = 1; y < splitStr.size(); y++) {
+          str += splitStr[y];
+        }
+        outputLines.push_back(str);
+      }
+    }
+    return out;
+  }
+
+  /** This method splits capturedStdout into lines. */
+  std::vector<std::string> splitStdoutIntoLines(std::string capturedStdout) {
+    std::stringstream ss(capturedStdout);
+    std::string line;
+    std::vector<std::string> lines;
+    bool start = false;
+
+    while (std::getline(ss, line, '\n')) {
+      if (!start) {
+        if (line.find("[SimEng] Starting...") != std::string::npos) {
+          start = true;
+        }
+      } else {
+        if (line.find("Simulation complete. Finalising stats....") !=
+            std::string::npos) {
+          break;
+        }
+        lines.push_back(line);
+      }
+    }
+    return lines;
+  };
+
+  /** This method split a string into a vector of strings given a delimiter. */
+  std::vector<std::string> split(std::string s, std::string delimiter) {
+    size_t pos_start = 0, pos_end, delim_len = delimiter.length();
+    std::string token;
+    std::vector<std::string> res;
+
+    while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
+      token = s.substr(pos_start, pos_end - pos_start);
+      pos_start = pos_end + delim_len;
+      res.push_back(token);
+    }
+
+    res.push_back(s.substr(pos_start));
+    return res;
+  }
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/process.hh b/sst/test/include/framework/process.hh
new file mode 100644
index 0000000000..b9b624085a
--- /dev/null
+++ b/sst/test/include/framework/process.hh
@@ -0,0 +1,271 @@
+#pragma once
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "framework/output.hh"
+
+/**
+ * This struct holds all information related to any
+ * errors/exceptions that could happen during execution of the binary specified
+ * by a TEST_GROUP. A process exception is thrown upon failure and handled
+ * inside the exception handler.
+ */
+struct ProcessException {
+  /** Custom error string specifying the type of error. */
+  std::string errString_;
+  /** Captured stdout of the child process. */
+  std::string stderrStr_;
+  /** Captured stderr of the child process. */
+  std::string stdoutStr_;
+  ProcessException() : errString_(""), stderrStr_(""), stdoutStr_(""){};
+  ProcessException(std::string errString)
+      : errString_(errString), stderrStr_(""), stdoutStr_(""){};
+  ProcessException(std::string errString, std::string stderrStr,
+                   std::string stdoutStr)
+      : errString_(errString), stderrStr_(stderrStr), stdoutStr_(stdoutStr){};
+};
+
+/**
+ * Process class responsible for creating a child process which executes
+ * SST with the configuration file and CLI arguments specified by TEST_GROUP(s)
+ * and TEST_CASE(s). This class also captures the stdout and stderr of the child
+ * process and passes it back to the main process.
+ */
+class Process {
+ private:
+  /** Captured stdout of the child process. */
+  std::string stdoutCapture_;
+  /** Captured stderr of the child process.*/
+  std::string stderrCapture_;
+  /** Reference to the output class.*/
+  Output output_;
+  /** Default CLI argument specified by a TEST_GROUP. */
+  std::vector<std::string> defaultCliArgs_;
+  /** Path to the SST binary. */
+  std::string sstBinPath_;
+  /** Path to the SST config file defined by the TEST_GROUP. */
+  std::string sstSimConfigFile_;
+  /** Command used to invoke the SST binary. */
+  std::string sstCmd_;
+
+ public:
+  Process(std::vector<std::string> args) {
+#ifdef SST_TEST_CMD
+    sstCmd_ = SST_TEST_CMD;
+#else
+    sstCmd_ = "sst";
+#endif
+    sstSimConfigFile_ = std::string(SST_TEST_DIR) + "/sstconfigs/" + args.at(1);
+    sstBinPath_ = std::string(SST_INSTALL_DIR) + "/bin/" + sstCmd_;
+    args.erase(args.begin());
+    args.erase(args.begin());
+    defaultCliArgs_ = args;
+  };
+  Process() {}
+
+  /**
+   * This method is used to run the SST executable with a config file and
+   * command line arguments. This method takes in additional command line
+   * arguments which replace default ones. If no additional command line
+   * arguments are given the default ones are used.
+   */
+  void runExecAndCaptureStdout(
+      std::vector<std::string> newArgs = std::vector<std::string>()) {
+    stderrCapture_ = "";
+    stdoutCapture_ = "";
+
+    // variable used to capture the status of the child process.
+    int status;
+    // int arrays defined for stdout and stderr pipes.
+    int stdout_pipes[2];
+    int stderr_pipes[2];
+
+    // Pipe syscall which converts the int arrays defined above into pipes.
+    if (pipe(stdout_pipes) < 0) {
+      perror("Error occured while creating stdout pipes.");
+      exit(EXIT_FAILURE);
+    };
+    if (pipe(stderr_pipes) < 0) {
+      perror("Error occured while creating stderr pipes.");
+      exit(EXIT_FAILURE);
+    };
+
+    // forking into a child process.
+    pid_t pid = fork();
+
+    // If fork fails, throw exception.
+    if (pid < 0) {
+      perror("Could not fork process");
+      exit(EXIT_FAILURE);
+    }
+
+    // If pid == 0, then we are in the forked child process.
+    if (pid == 0) {
+      // redirect STDOUT and STDERR to refer to the same open file descriptor
+      // as stdout_pipes[1] and stderr_pipes[1] i.e anything written to STDOUT
+      // and STDERR will now be redirected to the stdout and stderr pipes;
+      dup2(stdout_pipes[1], STDOUT_FILENO);
+      dup2(stderr_pipes[1], STDERR_FILENO);
+
+      // Close the local copies of stdout and stderr, this needs to be done as
+      // the child process doesn't have any use for them because execv will
+      // replace the entire process image. STDOUT and STDERR have already been
+      // redirected in the previous step.
+      // For a thorough explaination see:
+      // https://stackoverflow.com/questions/35447474/in-c-are-file-descriptors-that-the-child-closes-also-closed-in-the-parent
+
+      if (close(stdout_pipes[0]) < 0) {
+        perror(
+            "Error occured while closing the read end of the stdout pipe in "
+            "child process");
+        exit(EXIT_FAILURE);
+      };
+      if (close(stdout_pipes[1]) < 0) {
+        perror(
+            "Error occured while closing the old write end of the stdout pipe "
+            "in child process");
+        exit(EXIT_FAILURE);
+      };
+      if (close(stderr_pipes[0]) < 0) {
+        perror(
+            "Error occured while closing the read end of the stderr pipe in "
+            "child process");
+        exit(EXIT_FAILURE);
+      };
+      if (close(stderr_pipes[1]) < 0) {
+        perror(
+            "Error occured while closing the old write end of the stderr pipe "
+            "in child process");
+        exit(EXIT_FAILURE);
+      };
+
+#ifdef SST_TESTS_MODEL_CONFIG_PATH
+      std::string modelConfigPath =
+          "model=" + std::string(SST_TESTS_MODEL_CONFIG_PATH);
+#else
+      std::string modelConfigPath = R"(model="")";
+#endif
+      // Execute the binary using the execv syscall. Execv doesn't return as it
+      // replaces the current process image (child process) with the process
+      // image of the executable. However, a return from execv indicates an
+      // error in invocation of execv and not the executable.
+
+      std::vector<std::string> argsToCpy =
+          newArgs.size() ? newArgs : defaultCliArgs_;
+      argsToCpy.push_back(modelConfigPath);
+      // Execv calls takes in a char* path to the binary and char* argv[] array
+      // for all command line arguments. To maintain consistency in parsing, the
+      // structure of cliArgs mimics the invocation format of SST i.e. [sstCmd_]
+      // [sstSimConfigFile] -- [cliArg1] [cliArg2] [cliArg3] for e.g:
+      // {"sst","/home/a/b/c/config.py" , "--", "e", "f", "g", nullptr} Anything
+      // following '--' is treated as an argument to the config.py file by SST
+      // and can be used to change values in the config.py file.
+      std::vector<char*> cliArgs;
+      // The additional 4 entries are reserved for:
+      // sstCmd_ , sstSimConfigFile, '--' and nullptr.
+      // The last entry of cliArgs vector is nullptr because execv requires a
+      // null terminated char* argv[].
+      char* sstDelim = (char*)"--";
+      cliArgs.resize(argsToCpy.size() + 4, nullptr);
+      cliArgs[0] = strToCharPtr(sstCmd_);
+      cliArgs[1] = strToCharPtr(sstSimConfigFile_);
+      cliArgs[2] = sstDelim;
+      std::transform(argsToCpy.begin(), argsToCpy.end(), cliArgs.begin() + 3,
+                     [&](const std::string& str) { return strToCharPtr(str); });
+      execv(sstBinPath_.c_str(), &cliArgs[0]);
+      exit(EXIT_FAILURE);
+    };
+    if (pid > 0) {
+      pid = wait(&status);
+      // Close the write pipes of both stdout and stderr otherwise the read
+      // function will never encounter an EOF and the while loop will never
+      // terminate.
+      if (close(stdout_pipes[1]) < 0) {
+        std::string err = output_.strBuilder(
+            " ", Formatter::bold_bright_red("Error in parent process:"),
+            "Failed to close stdout output pipe.");
+        throw ProcessException{err};
+      }
+      if (close(stderr_pipes[1]) < 0) {
+        std::string err = output_.strBuilder(
+            " ", Formatter::bold_bright_red("Error in parent process:"),
+            "Failed to close stderr output pipe.");
+        throw ProcessException{err};
+      }
+      std::string std_out;
+      std::string std_err;
+      char ch;
+      // Read the redirected stdout from the stdout output pipe.
+      while (read(stdout_pipes[0], &ch, 1) > 0) {
+        std_out.push_back(ch);
+      }
+      // Read the redirected stderr from the stderr output pipe.
+      while (read(stderr_pipes[0], &ch, 1) > 0) {
+        std_err.push_back(ch);
+      }
+
+      // Check if child process running the executable terminated
+      // successfully.
+      if (!WIFEXITED(status)) {
+        std::string err;
+        // check if failure was caused by a segfault.
+        if (WIFSIGNALED(status) && WTERMSIG(status) == SIGSEGV) {
+          err = output_.strBuilder(
+              " ", Formatter::bold_bright_red("Error in executable:"),
+              "Process terminated with a segfault");
+        } else {
+          err = output_.strBuilder(
+              " ", Formatter::bold_bright_red("Error in executable:"),
+              "Process terminated with an error");
+        }
+        throw ProcessException{err, std_err, std_out};
+      }
+      // Check if child process exited succesfully and status was EXIT_FAILURE.
+      // Calling exit(EXIT_FAILURE) is still a successful exit albeit with a
+      // failing status.
+      if (WIFEXITED(status) && WEXITSTATUS(status) == EXIT_FAILURE) {
+        std::string err = output_.strBuilder(
+            " ", Formatter::bold_bright_red("Error in executable:"),
+            "Process exited with an error");
+        throw ProcessException{err, std_err, std_out};
+      }
+
+      // Close the read pipes of both stdout and stderr.
+      if (close(stderr_pipes[0]) < 0) {
+        std::string err = output_.strBuilder(
+            " ", Formatter::bold_bright_red("Error in parent process:"),
+            "Failed to close stderr input pipe.");
+        throw ProcessException{err};
+      }
+      if (close(stdout_pipes[0]) < 0) {
+        std::string err = output_.strBuilder(
+            " ", Formatter::bold_bright_red("Error in parent process:"),
+            "Failed to close stdout input pipe.");
+        throw ProcessException{err};
+      }
+
+      stdoutCapture_ = std_out;
+      stderrCapture_ = std_err;
+    }
+  };
+  /** This method converts a std::string into char*. */
+  char* strToCharPtr(const std::string& str) {
+    char* strd = new char[str.size() + 1];
+    std::copy(str.begin(), str.end(), strd);
+    strd[str.size()] = '\0';
+    return strd;
+  }
+  /** This method returns the captured stdout of the child process. */
+  std::string getStdOutCapture() { return stdoutCapture_; }
+  /** This method returns the captured stderr of the child process. */
+  std::string getStdErrCapture() { return stderrCapture_; }
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/registry.hh b/sst/test/include/framework/registry.hh
new file mode 100644
index 0000000000..820312e568
--- /dev/null
+++ b/sst/test/include/framework/registry.hh
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <functional>
+#include <iostream>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "framework/context.hh"
+#include "framework/runner.hh"
+#include "framework/uid.hh"
+
+/**
+ * The Registry class is used register Factory(s) which are able
+ * to create Runner(s) associated with TEST_GROUP(s). Registry is a singleton
+ * and can be only be instantiated once.
+ */
+class Registry {
+ public:
+  using Factory = std::function<std::unique_ptr<Runner>()>;
+  using RegistryMap = std::map<std::string, std::vector<Factory>>;
+
+  /** Method creates and returns the singleton instance of Registry. */
+  static Registry* getInstance() {
+    /** Static instance of the singleton Registry class. */
+    static Registry* instance_;
+    if (instance_ == NULL) {
+      instance_ = new Registry();
+    }
+    return instance_;
+  }
+
+  /**
+   * This method returns derived instance of classes extending the Runner class
+   * as polymorphic parent instances i.e the make_unique function calls the
+   * constructor of classes extending the Group class and returns them as
+   * polymorphic Runner(s) i.e std::make_unique<T extends Group>()
+   */
+  template <typename T>
+  static std::unique_ptr<Runner> createDerived() {
+    return std::make_unique<T>();
+  }
+
+  /**
+   * This method registers a Factory responsible for creating polymorphic
+   * Runner(s) instances of classes extending the Group class.
+   */
+  static bool registerGroup(std::string fname, uint64_t line, std::string gname,
+                            Factory f) {
+    UidRegistry::validateGroupName(gname, fname, line);
+    return addRunner(fname, f);
+  };
+
+  /**
+   * This method returns the singleton instance of RegistryMap used to store
+   * all Factory(s).
+   */
+  static RegistryMap* getMap() {
+    /** Static instance of the map used to store all Factory(s). */
+    static RegistryMap* map;
+    if (!map) {
+      map = new RegistryMap;
+    }
+    return map;
+  }
+
+ private:
+  Registry(){};
+  /** Method used to add Factory(s) to the RegistryMap. */
+  static bool addRunner(std::string fname, Factory f) {
+    auto rMap = getMap();
+    auto itr = rMap->find(fname);
+    if (itr == rMap->end()) {
+      std::vector<Factory> v;
+      v.push_back(f);
+      rMap->insert(std::pair<std::string, std::vector<Factory>>(fname, v));
+      return true;
+    }
+    itr->second.push_back(f);
+    return true;
+  }
+};
+
+/**
+ * This macro registers Factory(s) of dervied instance of Group(s) into the
+ * Registry.This macro is automatically called inside the TEST_GROUP macro.
+ */
+#define REGISTER(X, gname, cname) \
+  const bool X::registered_ =     \
+      Registry::registerGroup(cname, __LINE__, gname, X::factory());
+
+/**
+ * This macro attaches a Factory, responsible for instantiating a polymorphic
+ * instance of any class extending Group, to any classes extending the Group
+ * class. This macro is automatically called in the TEST_GROUP macro.
+ */
+#define FACTORY(X)                                                       \
+  static Registry::Factory factory() {                                   \
+    Registry::Factory f = []() { return Registry::createDerived<X>(); }; \
+    return f;                                                            \
+  }
diff --git a/sst/test/include/framework/runner.hh b/sst/test/include/framework/runner.hh
new file mode 100644
index 0000000000..2ec182c4af
--- /dev/null
+++ b/sst/test/include/framework/runner.hh
@@ -0,0 +1,189 @@
+#pragma once
+
+#include <functional>
+#include <iostream>
+#include <string>
+
+#include "framework/context.hh"
+#include "framework/handlers.hh"
+#include "framework/process.hh"
+#include "framework/registry.hh"
+#include "framework/stats.hh"
+#include "framework/uid.hh"
+
+/**
+ * A Runner is an interface which is able to run a test case.
+ * Runner(s) are registered in the registry and invoked during
+ * runtime.
+ */
+class Runner {
+ public:
+  Runner() {}
+  /** Method used to run the test(s) inside a runner. */
+  virtual void run(){};
+  /**
+   * Returns the current executing TestContext.
+   * The TestContext for a Group changes everytime a new test is run
+   */
+  virtual std::unique_ptr<TestContext>& getCurrContext() {
+    std::unique_ptr<TestContext> ptr = std::make_unique<TestContext>();
+    return ptr;
+  };
+};
+
+/** GroupConfig used to provide configuration options to a Group. */
+struct GroupConfig {
+  GroupConfig() : groupStr_(""), execArgs_(std::vector<std::string>()){};
+  GroupConfig(std::string groupStr, std::vector<std::string> execArgs)
+      : groupStr_(groupStr), execArgs_(execArgs) {}
+
+  /** String used to describe the test group. */
+  std::string groupStr_;
+  std::vector<std::string> execArgs_;
+};
+
+/**
+ * A Group represents a collection of test cases. In a group a test case is
+ * refereneced through it's context. The Group template is template with the
+ * name to class extending Group. This is called the "Curiously recurring
+ * template pattern" and in this use case it allows the polymorphic children of
+ * Group to have individual and different static contexts.
+ *
+ * For example:
+ * class A : public Group<A> and class B : public Group<B>
+ * will have different static contexts even if the static contexts are invoked
+ * through polymorphic parent references i.e.
+ * Runner runner1 = myClassA();
+ * Runner runner2 = myClassB();
+ * runner1.run() will use a different static context and runner2.run() will use
+ * a different static context. This enables us to register TEST_CASE(s) to
+ * different TEST_GROUP(s) while maintaining isolation among TEST_GROUP(s).
+ */
+
+template <typename T>
+class Group : public Runner {
+  using TestCtxs = std::vector<std::unique_ptr<TestContext>>;
+  using Args = std::vector<std::vector<std::string>>;
+
+ private:
+  /**
+   * Added to remove compiler warning for getGroupConfig base
+   * implementation.
+   */
+  const GroupConfig emptyConfig_ = GroupConfig{};
+
+  /** TextContext of the currently executing test. */
+  std::unique_ptr<TestContext> ctx_;
+  /** Output class used to capture stdout of testcases and output to stdout. */
+  Output output_;
+
+  /** Method which returns all TextContext(s) registered to a Group. */
+  static std::unique_ptr<TestCtxs>& getTestCtxs() {
+    /** Static unique_ptr to the a vector of TextContext(s). */
+    static std::unique_ptr<TestCtxs> instance;
+    if (instance == nullptr) {
+      instance = std::unique_ptr<TestCtxs>(new TestCtxs);
+    }
+    return instance;
+  }
+
+  /**
+   * Method which returns all arguments passed individual test cases registered
+   * to a Group.
+   */
+  static std::unique_ptr<Args>& getArgs() {
+    static std::unique_ptr<Args> vec;
+    if (vec == nullptr) {
+      vec = std::unique_ptr<Args>(new Args);
+    }
+    return vec;
+  }
+
+ public:
+  /** This method is used to execute all test cases in a Group. */
+  void run() {
+    std::unique_ptr<TestCtxs>& tctxs_ = getTestCtxs();
+    std::unique_ptr<Args>& args = getArgs();
+    const GroupConfig& config = getGroupConfig();
+
+    std::unique_ptr<ExceptionHandler>& handler =
+        ExceptionHandler::getInstance();
+
+    std::unique_ptr<Stats>& stats = Stats::getInstance();
+    Process process = Process(config.execArgs_);
+
+    output_.group(config.groupStr_);
+    output_.setIndent(4);
+    for (size_t x = 0; x < tctxs_->size(); x++) {
+      bool fail = true;
+      try {
+        auto arg = args->at(x);
+        ctx_ = std::move(tctxs_->at(x));
+
+        // Run the SST binary with additional args (if any).
+        process.runExecAndCaptureStdout(arg);
+        // Register context with the exception handler.
+        handler->registerContext(ctx_.get());
+        stats->recordTest();
+        // Start capturing stdout.
+        output_.captureStdCout();
+        // Pass the captured output from SST to the TEST_CASE.
+        ctx_->getTestCaseFn()(process.getStdOutCapture());
+        fail = false;
+      } catch (const std::exception& e) {
+        // On exception reset the stdout buffer first and then handle the
+        // exception.
+        output_.resetStdCoutBuffer();
+        handler->handleTestRuntimeException(e, output_);
+      } catch (const ExprEval& expr) {
+        // On exception reset the stdout buffer first and then handle the
+        // exception.
+        output_.resetStdCoutBuffer();
+        handler->handleExpressionException(expr, output_);
+      } catch (const ProcessException& procExcp) {
+        // On exception reset the stdout buffer first and then handle the
+        // exception.
+        handler->handleProcessException(procExcp);
+      }
+      // reset the stdout buffer.
+      output_.resetStdCoutBuffer();
+      if (fail) {
+        stats->recordFailure();
+        continue;
+      }
+      stats->recordSuccess();
+      // Output passing testcase.
+      output_.pass(ctx_.get());
+      // print captured out of the TEST_CASE not the SST executable i.e. any
+      // std::cout calls inside TEST_CASE, for e.g.
+      // TEST_CASE { std::cout << "Print" << std::endl }
+      output_.printCapturedStdCout();
+    }
+  };
+
+  /**
+   * Method used to register TestContext and additional arguments to a
+   * TEST_CASE.
+   */
+  static bool registerTest(std::unique_ptr<TestContext>& ctx, std::string gname,
+                           std::vector<std::string> arg) {
+    std::string tname = ctx->getTestCaseName();
+    uint64_t line = ctx->getTestCaseLineNum();
+    std::string fname = ctx->getTestCaseSrcFile();
+
+    UidRegistry::validateTestName(gname, tname, fname, line);
+
+    getTestCtxs()->push_back(std::move(ctx));
+    getArgs()->push_back(arg);
+    return true;
+  };
+
+  /** Returns the TestContext of the current executing test case. */
+  std::unique_ptr<TestContext>& getCurrContext() { return ctx_; };
+
+  /**
+   * This method returns a reference of GroupConfig. This method gets overriden
+   * by TEST_GROUP Macro with the config defined in the source code.
+   */
+  virtual const GroupConfig& getGroupConfig() { return emptyConfig_; }
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/stats.hh b/sst/test/include/framework/stats.hh
new file mode 100644
index 0000000000..c54db58094
--- /dev/null
+++ b/sst/test/include/framework/stats.hh
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <memory>
+
+#include "framework/output.hh"
+
+/**
+ * A Singleton class which records total number of tests, total failures and
+ * total successes.
+ */
+class Stats {
+ private:
+  Stats() {}
+  /** Total number of test cases. */
+  uint64_t testCount_ = 0;
+  /** Total number of test case failures. */
+  uint64_t failures_ = 0;
+  /** Total number test case successes. */
+  uint64_t success_ = 0;
+
+ public:
+  /** This method returns the singleton instance of the Stats class. */
+  static std::unique_ptr<Stats>& getInstance() {
+    static std::unique_ptr<Stats> ptr;
+    if (ptr == nullptr) {
+      ptr = std::unique_ptr<Stats>(new Stats());
+    }
+    return ptr;
+  }
+  /** This method increments the total test count. */
+  void recordTest() { testCount_++; }
+  /** This method increments the total failure count. */
+  void recordFailure() { failures_++; }
+  /** This method increments the total sucess count. */
+  void recordSuccess() { success_++; }
+  /** This method returns the total test count. */
+  uint64_t getTestCount() { return testCount_; }
+  /** This method returns the total failure count. */
+  uint64_t getFailureCount() { return failures_; }
+  /** This method returns the total success count. */
+  uint64_t getSuccessCount() { return success_; }
+  /** This method prints the all statistics stored by the Stats class. */
+  void printStats() {
+    Output output;
+    output.output("", 0, Formatter::bold("\nStats:"));
+    output.output(" ", 4,
+                  "Total Tests:", Formatter::blue(std::to_string(testCount_)));
+    output.output(" ", 4, "Tests Passed:",
+                  Formatter::bright_green(std::to_string(success_)));
+    output.output(" ", 4, "Tests Failed:",
+                  Formatter::bright_red(std::to_string(failures_)));
+  }
+};
\ No newline at end of file
diff --git a/sst/test/include/framework/uid.hh b/sst/test/include/framework/uid.hh
new file mode 100644
index 0000000000..ef4f491bb4
--- /dev/null
+++ b/sst/test/include/framework/uid.hh
@@ -0,0 +1,65 @@
+#pragma once
+
+#include <memory>
+#include <set>
+#include <string>
+
+#include "framework/output.hh"
+/**
+ * This Singleton class ensures that all TEST_GROUP(s) have unique names, and
+ * all TEST_CASE(s) in a TEST_GROUP have unique names.
+ */
+class UidRegistry {
+ private:
+  UidRegistry(){};
+  /** This method checks if an id is unique. */
+  static bool isUnique(std::string id) {
+    std::unique_ptr<std::set<std::string>>& uidReg = getUidReg();
+    auto itr = uidReg->find(id);
+    if (itr != uidReg->end()) {
+      return false;
+    }
+    uidReg->insert(id);
+    return true;
+  }
+  /** This method returns the singleton reference to the UidRegistry class. */
+  static std::unique_ptr<std::set<std::string>>& getUidReg() {
+    static std::unique_ptr<std::set<std::string>> set;
+    if (set == nullptr) {
+      set = std::unique_ptr<std::set<std::string>>(new std::set<std::string>());
+    }
+    return set;
+  }
+
+ public:
+  /** This method validates the uniqueness of a TEST_GROUP name.*/
+  static void validateGroupName(std::string gname, std::string fname,
+                                uint64_t line) {
+    Output output;
+    if (!isUnique(gname)) {
+      output.output(" ", 0,
+                    Formatter::bold_bright_red("Duplicate TestGroup name:"),
+                    Formatter::bold("\"" + gname + "\""));
+      output.output("", 4, Formatter::bold("Source: "), fname, ':', line);
+      exit(EXIT_FAILURE);
+    }
+  }
+  /**
+   * This method validates the uniqueness of the TEST_CASE name within a
+   * TEST_GROUP.
+   */
+  static void validateTestName(std::string gname, std::string tname,
+                               std::string fname, uint64_t line) {
+    Output output;
+    std::string uid = gname + '.' + tname;
+    if (!isUnique(uid)) {
+      output.output(
+          " ", 0,
+          Formatter::bold_bright_red("Duplicate TestCase name in TestGroup:"),
+          Formatter::bold("\"" + gname + "\""), "-",
+          Formatter::bold("\"" + tname + "\""));
+      output.output("", 4, Formatter::bold("Source: "), fname, ':', line);
+      exit(EXIT_FAILURE);
+    }
+  }
+};
\ No newline at end of file
diff --git a/sst/test/include/sstsimengtest.hh b/sst/test/include/sstsimengtest.hh
new file mode 100644
index 0000000000..7cdda8343b
--- /dev/null
+++ b/sst/test/include/sstsimengtest.hh
@@ -0,0 +1,10 @@
+#include "framework/macros/eval.hh"
+#include "framework/macros/group.hh"
+#include "framework/parser.hh"
+#include "framework/registry.hh"
+#include "framework/runner.hh"
+#include "framework/stats.hh"
+
+inline std::string appendBinDirPath(std::string binName) {
+  return ("execBin=" + std::string(SST_TEST_DIR) + "/sstbinaries/" + binName);
+}
\ No newline at end of file
diff --git a/sst/test/main.cc b/sst/test/main.cc
new file mode 100644
index 0000000000..6f906b6468
--- /dev/null
+++ b/sst/test/main.cc
@@ -0,0 +1,19 @@
+#include "sstsimengtest.hh"
+
+int main(void) {
+  Registry* reg = Registry::getInstance();
+  auto map = reg->getMap();
+  for (auto itr = map->begin(); itr != map->end(); itr++) {
+    std::vector<Registry::Factory> fvec = itr->second;
+    for (auto itrr = fvec.begin(); itrr != fvec.end(); itrr++) {
+      std::unique_ptr<Runner> rn = (*itrr)();
+      rn->run();
+    }
+  }
+  auto& stats = Stats::getInstance();
+  stats->printStats();
+  if (stats->getFailureCount() > 0) {
+    exit(EXIT_FAILURE);
+  }
+  return 0;
+};
\ No newline at end of file
diff --git a/sst/test/src/tg0_llvm_assemble.cc b/sst/test/src/tg0_llvm_assemble.cc
new file mode 100644
index 0000000000..c3919dac3c
--- /dev/null
+++ b/sst/test/src/tg0_llvm_assemble.cc
@@ -0,0 +1,71 @@
+#include "sstsimengtest.hh"
+
+// Only load and store instructions of various types and basic arithmetic
+// instructions are checked as LLVM assembly will only be used by the SST
+// testing framework, which only tests loads and stores.
+
+TEST_GROUP(TG0, "SSTSimEng_correctly_assembles_instructions_using_LLVM",
+           "fastL1WithParams_config.py", "withSrc=True",
+           R"(source=
+    mov x1, #1
+    mov x0, #0
+    add x1, x1, x1
+    sub x2, x1, x1
+    mul x2, x1, x1
+    sdiv x2, x1, x1
+    cmp x1, x1
+    fmov s0, 0.5
+    fmov s1, 1.5
+    fadd s2, s0, s1
+    fsub s2, s1, s0
+    fmul s2, s0, s1
+    fdiv s2, s1, s0
+    str s0, [x1]
+    str x0, [x1]
+    str w0, [x1]
+    strh w0, [x1]
+    strb w0, [x1]
+    ldr s0, [x1]
+    ldr x0, [x1]
+    ldr w2, [x1]
+    ldrh w2, [x1]
+    ldrb w2, [x1]
+    ldp s0, s1, [x0]
+    ldp x3, x4, [x0]
+    ldp w3, w4, [x0]
+    st1 {v0.b}[8], [x0], #1
+    ptrue p0.d
+    st1b {z0.b}, p0, [x0, x1]
+    st1d {z2.d}, p0, [z1.d]
+    st1w {z2.s}, p0, [x4]
+    addvl x1, x1, #1
+    str z1, [x1, #4, mul vl]
+    ld1r {v0.16b}, [x0]
+    ld1r {v1.8b}, [x0], 1
+    ld1 {v0.16b}, [x0]
+    ld1 {v2.16b, v3.16b}, [x0]
+    hlt #0
+  )")
+TEST_CASE(TG0, "Test_asssembly_of_simple_instructions") {
+  size_t pos = capturedStdout.find("[SimEng] retired:");
+  std::string retired = "";
+  // Extract the retired: <count> string from capturedStdout.
+  for (size_t y = pos; y < capturedStdout.length(); y++) {
+    if (capturedStdout[y] != '\n') {
+      retired += capturedStdout[y];
+    } else {
+      break;
+    }
+  }
+  // Extract retired instruction count from "retired: <count>" string and cast
+  // to uint64_t.
+  // Subtract 18 (length of the prefix: "[SimEng] retired:") from retired string
+  // to obtain the length of the substring containing the numeric value
+  // representing the total number of retired instructions.
+  size_t len = retired.length() - 18;
+  uint64_t retiredCount = std::stoull(retired.substr(18, len));
+  std::cout << "Total instructions retired: " << retiredCount << std::endl;
+  // This should be equal to the total number of instructions in the test case.
+  EXPECT_EQ(retiredCount, (uint64_t)38);
+  std::cout << capturedStdout << std::endl;
+}
diff --git a/sst/test/src/tg1_load_store.cc b/sst/test/src/tg1_load_store.cc
new file mode 100644
index 0000000000..a505931a96
--- /dev/null
+++ b/sst/test/src/tg1_load_store.cc
@@ -0,0 +1,191 @@
+#include "sstsimengtest.hh"
+
+TEST_GROUP(TG1, "SSTSimEng_correctly_handles_load_and_store_instructions",
+           "fastL1WithParams_config.py", "withSrc=True",
+           R"(source= mov x1, #1 )");
+
+TEST_CASE(TG1, "load_of_different_size_from_memory_64bits", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    )",
+          "heap=348709988") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)348709988);
+}
+
+TEST_CASE(TG1, "load_of_different_size_from_memory_32bits", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    )",
+          "heap=23323") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)23323);
+}
+
+TEST_CASE(TG1, "load_of_different_size_from_memory_16bits", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    )",
+          "heap=23214") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)23214);
+}
+
+TEST_CASE(TG1, "load of_different_size_8bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    )",
+          "heap=120") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)120);
+}
+
+TEST_CASE(TG1, "multiple_loads_from_memory_64bit", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    ldr x1, [x0, #8]
+    ldr x1, [x0, #16]
+    )",
+          "heap=20,40,50") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first two parsed requests as those will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 3]->data_, (uint64_t)20);
+  EXPECT_EQ(reads[reads.size() - 2]->data_, (uint64_t)40);
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)50);
+}
+
+TEST_CASE(TG1, "multiple_loads_from_memory_32bit", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    ldr w1, [x0, #8]
+    ldr w1, [x0, #16]
+    )",
+          "heap=20,40,50") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first two parsed requests as those will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 3]->data_, (uint64_t)20);
+  EXPECT_EQ(reads[reads.size() - 2]->data_, (uint64_t)40);
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)50);
+}
+
+TEST_CASE(TG1, "multiple_loads_from_memory_16bit", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    ldrh w1, [x0, #8]
+    ldrh w1, [x0, #16]
+    )",
+          "heap=20,40,50") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first two parsed requests as those will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 3]->data_, (uint64_t)20);
+  EXPECT_EQ(reads[reads.size() - 2]->data_, (uint64_t)40);
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)50);
+}
+
+TEST_CASE(TG1, "multiple_loads_from_memory_8bit", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    ldrb w1, [x0, #8]
+    ldrb w1, [x0, #16]
+    )",
+          "heap=20,40,50") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first two parsed requests as those will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 3]->data_, (uint64_t)20);
+  EXPECT_EQ(reads[reads.size() - 2]->data_, (uint64_t)40);
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)50);
+}
+
+TEST_CASE(TG1, "store_then_load_64bit", "withSrc=True", R"(source=
+    mov x0, #1
+    mov x1, #2048
+    str x1, [x0]
+    ldr x2, [x0]
+    )") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)2048);
+}
+
+TEST_CASE(TG1, "store_then_load_32bit", "withSrc=True", R"(source=
+    mov x0, #1
+    mov w1, #256
+    str w1, [x0]
+    ldr w2, [x0]
+    )") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)256);
+}
+
+TEST_CASE(TG1, "store_then_load_16bit", "withSrc=True", R"(source=
+
+    mov x0, #1
+    mov w1, #64
+    strh w1, [x0]
+    ldrh w2, [x0]
+    )") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)64);
+}
+
+TEST_CASE(TG1, "store_then_load_8bit", "withSrc=True", R"(source=
+
+    mov x0, #1
+    mov w1, #8
+    strb w1, [x0]
+    ldrb w2, [x0]
+    )") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)8);
+}
\ No newline at end of file
diff --git a/sst/test/src/tg2_cache_access.cc b/sst/test/src/tg2_cache_access.cc
new file mode 100644
index 0000000000..2d1eff1269
--- /dev/null
+++ b/sst/test/src/tg2_cache_access.cc
@@ -0,0 +1,196 @@
+#include "sstsimengtest.hh"
+
+TEST_GROUP(TG2, "SSTSimEng_uses_cache_for_memory_access",
+           "fastL1WithParams_config.py", "withSrc=True",
+           R"(source= mov x1, #1 )");
+
+TEST_CASE(TG2, "cache_access_of_load_to_same_address", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+
+    # Setup
+    mov x1, #0
+    mov x2, #0
+    mov x5, #0
+
+    # First load
+    ldr x1, [x0]
+
+    # Buffer loop
+    loop:
+    cmp x5, #256
+    b.eq end
+    add x5, x5, #1
+    b loop
+
+    # Last Load
+    end:
+    # Adding dependency to prevent any load being speculatively executed due to branch misprediction.
+    mov x6, #0
+    add x6, x6, x0
+    ldr x1, [x6]
+    )",
+          "heap=2048") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t firstLoadCycleLatency = reads[1]->endCycle_ - reads[1]->startCycle_;
+  uint64_t firstLoadData = reads[1]->data_;
+  // check the last entry of the vector to avoid load executed by branch
+  // prediction.
+  uint64_t lastLoadCycleLatency =
+      reads[reads.size() - 1]->endCycle_ - reads[reads.size() - 1]->startCycle_;
+  uint64_t lastLoadData = reads[reads.size() - 1]->data_;
+
+  EXPECT_LT(lastLoadCycleLatency, firstLoadCycleLatency);
+  EXPECT_EQ(firstLoadData, lastLoadData);
+}
+
+TEST_CASE(TG2, "load_after_store_on_same_address_should_return_from_cache",
+          "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+
+    # Setup
+    mov x2, #2048
+
+    # Load from heap - done to determine max clock cycle without any caching.
+    ldr x1, [x0]
+
+    # Store value in x3 at address in x2
+    str x1, [x2]
+
+    mov x4, #0
+    mov x5, #0
+
+    # Buffer loop
+    loop:
+    cmp x5, #256
+    b.eq end
+    add x5, x5, #1
+    b loop
+
+    # last load
+    end:
+    # Adding dependency to prevent any load being speculatively executed due to branch misprediction.
+    mov x6, #0
+    add x6, x6, #1024
+    add x6, x6, x6
+    ldr x4, [x6]
+
+    )",
+          "heap=1024") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t firstLoadCycleLatency = reads[1]->endCycle_ - reads[1]->startCycle_;
+  uint64_t firstLoadData = reads[1]->data_;
+  uint64_t lastLoadCycleLatency =
+      reads[reads.size() - 1]->endCycle_ - reads[reads.size() - 1]->startCycle_;
+  uint64_t lastLoadData = reads[reads.size() - 1]->data_;
+
+  EXPECT_LT(lastLoadCycleLatency, firstLoadCycleLatency);
+  EXPECT_EQ((uint64_t)1024, firstLoadData);
+  EXPECT_EQ((uint64_t)1024, lastLoadData);
+}
+
+TEST_CASE(TG2,
+          "multiple_loads_after_stores_on_same_address_should_return_"
+          "from_cache",
+          "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+
+    # Setup
+    mov x2, #2048
+    mov x5, #0
+
+    # Load from heap - done to determine max clock cycle without any caching.
+    ldr x1, [x0]
+
+    # Store value in x3 at address in x2
+    str x1, [x2]
+    str x1, [x2, #8]
+    str x1, [x2, #16]
+    str x1, [x2, #24]
+    str x1, [x2, #32]
+    
+    # Buffer loop
+    loop:
+    cmp x5, #512
+    b.eq end
+    add x5, x5, #1
+    b loop
+
+    # last load
+    end:
+    # Adding dependency to prevent any load being speculatively executed due to branch misprediction.
+    mov x6, #0
+    add x6, x6, #1024
+    add x6, x6, x6
+    ldr x4, [x6]
+    ldr x4, [x6, #8]
+    ldr x4, [x6, #16]
+    ldr x4, [x6, #24]
+    ldr x4, [x6, #32]
+
+    )",
+          "heap=1024") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+
+  // First request to load value from heap, Check if it is equal to
+  // initialised data.
+  uint64_t firstLoadData = reads[1]->data_;
+  EXPECT_EQ((uint64_t)1024, firstLoadData);
+
+  // The access latency specified in fastL1WithParams_config.py is 2 clock
+  // cycle. By the time this data is aggregated and delivered to SimEng it
+  // incurs an extra clock cycle, as expected this latency is deterministic
+  // for all loads from cache. Hence to check if data is returned from cache
+  //  we look for clock latencies <= 3.
+
+  // load at addr: [x6]
+  uint64_t secondLoadCycleLatency =
+      reads[reads.size() - 5]->endCycle_ - reads[reads.size() - 5]->startCycle_;
+  uint64_t secondLoadData = reads[reads.size() - 5]->data_;
+  EXPECT_LTE(secondLoadCycleLatency, (uint64_t)3);
+  EXPECT_EQ((uint64_t)1024, secondLoadData);
+  // load at addr: [x6, #8]
+  uint64_t thirdLoadCycleLatency =
+      reads[reads.size() - 4]->endCycle_ - reads[reads.size() - 4]->startCycle_;
+  uint64_t thirdLoadData = reads[reads.size() - 4]->data_;
+  EXPECT_LTE(thirdLoadCycleLatency, (uint64_t)3);
+  EXPECT_EQ((uint64_t)1024, thirdLoadData);
+  // load at addr: [x6, #16]
+  uint64_t fourthLoadCycleLatency =
+      reads[reads.size() - 3]->endCycle_ - reads[reads.size() - 3]->startCycle_;
+  uint64_t fourthLoadData = reads[reads.size() - 3]->data_;
+  EXPECT_LTE(fourthLoadCycleLatency, (uint64_t)3);
+  EXPECT_EQ((uint64_t)1024, fourthLoadData);
+  // load at addr: [x6, #24]
+  uint64_t fifthLoadCycleLatency =
+      reads[reads.size() - 2]->endCycle_ - reads[reads.size() - 2]->startCycle_;
+  uint64_t fifthLoadData = reads[reads.size() - 2]->data_;
+  EXPECT_LTE(fifthLoadCycleLatency, (uint64_t)3);
+  EXPECT_EQ((uint64_t)1024, fifthLoadData);
+  // load at addr: [x6, #32]
+  uint64_t sixthLoadCycleLatency =
+      reads[reads.size() - 1]->endCycle_ - reads[reads.size() - 1]->startCycle_;
+  uint64_t sixthLoadData = reads[reads.size() - 1]->data_;
+  EXPECT_LTE(sixthLoadCycleLatency, (uint64_t)3);
+  EXPECT_EQ((uint64_t)1024, sixthLoadData);
+}
\ No newline at end of file
diff --git a/sst/test/src/tg3_request_split.cc b/sst/test/src/tg3_request_split.cc
new file mode 100644
index 0000000000..ccd02eaa17
--- /dev/null
+++ b/sst/test/src/tg3_request_split.cc
@@ -0,0 +1,261 @@
+#include "sstsimengtest.hh"
+
+TEST_GROUP(TG3, "SSTSimEng_splits_requests_larger_than_cache_line_width",
+           "fastL1WithParams_config.py", "withSrc=True",
+           R"(source= mov x1, #1 )");
+
+TEST_CASE(TG3, "Clw_8_bits_req_size_64bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    )",
+          "heap=10", "clw=1") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)8);
+}
+
+TEST_CASE(TG3, "Clw_8_bits_req_size_32bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    )",
+          "heap=10", "clw=1") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)4);
+}
+
+TEST_CASE(TG3, "Clw_8_bits_req_size_16bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    )",
+          "heap=10", "clw=1") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)2);
+}
+
+TEST_CASE(TG3, "Clw_8_bits_req_size_8bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    )",
+          "heap=10", "clw=1") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_16_bits_req_size_64bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    )",
+          "heap=10", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)4);
+}
+
+TEST_CASE(TG3, "Clw_16_bits_req_size_32bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    )",
+          "heap=10", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)2);
+}
+
+TEST_CASE(TG3, "Clw_16_bits_req_size_16bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    )",
+          "heap=10", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_16_bits_req_size_8bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    )",
+          "heap=10", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_32_bits_req_size_64bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    )",
+          "heap=10", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)2);
+}
+
+TEST_CASE(TG3, "Clw_32_bits_req_size_32bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    )",
+          "heap=10", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_32_bits_req_size_16bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    )",
+          "heap=10", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_32_bits_req_size_8bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    )",
+          "heap=10", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_64_bits_req_size_64bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr x1, [x0]
+    )",
+          "heap=10", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_64_bits_req_size_32bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldr w1, [x0]
+    )",
+          "heap=10", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.214
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_64_bits_req_size_16bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrh w1, [x0]
+    )",
+          "heap=10", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
+
+TEST_CASE(TG3, "Clw_64_bits_req_size_8bits", "withSrc=True", R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    ldrb w1, [x0]
+    )",
+          "heap=10", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  EXPECT_EQ(reads[reads.size() - 1]->data_, (uint64_t)10);
+  EXPECT_EQ(reads[reads.size() - 1]->numReqs_, (uint64_t)1);
+}
diff --git a/sst/test/src/tg4_request_misaligned.cc b/sst/test/src/tg4_request_misaligned.cc
new file mode 100644
index 0000000000..082ae16620
--- /dev/null
+++ b/sst/test/src/tg4_request_misaligned.cc
@@ -0,0 +1,445 @@
+#include "sstsimengtest.hh"
+
+TEST_GROUP(TG4, "SSTSimEng_handles_misaligned_memory_requests",
+           "fastL1WithParams_config.py", "withSrc=True",
+           R"(source= mov x1, #1 )");
+
+TEST_CASE(TG4, "16_bit_cache_line_to_retrieve_16_bit_data", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldrh w1, [x0]
+    strh w1, [x4]
+    ldrh w1, [x4]
+    )",
+          "heap=128", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)128);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "16_bit_cache_line_to_retrieve_32_bit_data", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=128", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)128);
+  EXPECT_EQ(numReqs, (uint64_t)3);
+}
+
+TEST_CASE(TG4, "16_bit_cache_line_to_retrieve_64_bit_data", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=12", "clw=2") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)12);
+  EXPECT_EQ(numReqs, (uint64_t)5);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_16_bit_data", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldrh w1, [x0]
+    strh w1, [x4]
+    ldrh w1, [x4]
+    )",
+          "heap=128", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)128);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_32_bit_data_#1", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #126
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=256", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_32_bit_data_#2", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #125
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=256", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_64_bit_data_#1", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)3);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_64_bit_data_#2", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #126
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)3);
+}
+
+TEST_CASE(TG4, "32_bit_cache_line_to_retrieve_64_bit_data_#3", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #125
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=4") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)3);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_16_bit_data", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_32_bit_data_#1", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_32_bit_data_#2", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #126
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_32_bit_data_#3", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #125
+    ldr w1, [x0]
+    str w1, [x4]
+    ldr w1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#1", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #127
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#2", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #126
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#3", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #125
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#4", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #124
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#5", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #123
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#6", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #122
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
+
+TEST_CASE(TG4, "64_bit_cache_line_to_retrieve_64_bit_data_#7", "withSrc=True",
+          R"(source=
+    # Get heap address
+    mov x0, 0
+    mov x8, 214
+    svc #0
+    mov x4, #121
+    ldr x1, [x0]
+    str x1, [x4]
+    ldr x1, [x4]
+    )",
+          "heap=256", "clw=8") {
+  Parser p = Parser(capturedStdout);
+  std::vector<ParsedMemRead*> reads = p.getParsedMemReads();
+  // skip first parsed request as that one will be caused by heap address
+  // retrieval into x0.
+  uint64_t data = reads[reads.size() - 1]->data_;
+  uint64_t numReqs = reads[reads.size() - 1]->numReqs_;
+  EXPECT_EQ(data, (uint64_t)256);
+  EXPECT_EQ(numReqs, (uint64_t)2);
+}
\ No newline at end of file
diff --git a/sst/test/src/tg5_benchmarks.cc b/sst/test/src/tg5_benchmarks.cc
new file mode 100644
index 0000000000..8c44c08c0f
--- /dev/null
+++ b/sst/test/src/tg5_benchmarks.cc
@@ -0,0 +1,39 @@
+#include "sstsimengtest.hh"
+
+TEST_GROUP(TG5, "Running_benchmarks_on_SSTSimEng", "fastL1ForBinaries.py");
+
+TEST_CASE(TG5, "Running_stream_triad", appendBinDirPath("stream_t")) {
+  Parser p = Parser(capturedStdout);
+  std::vector<std::string> outputLines = p.getOutputLines();
+  STR_CONTAINS(outputLines[0], "Solution Validates");
+}
+
+TEST_CASE(TG5, "Running_stream_triad_sve", appendBinDirPath("stream_t_sve")) {
+  Parser p = Parser(capturedStdout);
+  std::vector<std::string> outputLines = p.getOutputLines();
+  STR_CONTAINS(outputLines[0], "Solution Validates");
+}
+
+// Very basic test cases for cachebw. Since cachebw requires arg size to
+// accurately hit cache levels, More benchmark tests should be added as and when
+// more cache models are added. These test cases will only test whether cachebw
+// works on SSTSimEng or not.
+TEST_CASE(TG5, "Running_cachebw_static", appendBinDirPath("cachebw_static"),
+          "args=32 100") {
+  Parser p = Parser(capturedStdout);
+  std::vector<std::string> outputLines = p.getOutputLines();
+  STR_CONTAINS(outputLines[0], "n");
+  STR_CONTAINS(outputLines[0], "reps");
+  STR_CONTAINS(outputLines[0], "bytes");
+  STR_CONTAINS(outputLines[0], "bandwidth");
+}
+
+TEST_CASE(TG5, "Running_cachebw_static_sve",
+          appendBinDirPath("cachebw_static_sve"), "args=32 100") {
+  Parser p = Parser(capturedStdout);
+  std::vector<std::string> outputLines = p.getOutputLines();
+  STR_CONTAINS(outputLines[0], "n");
+  STR_CONTAINS(outputLines[0], "reps");
+  STR_CONTAINS(outputLines[0], "bytes");
+  STR_CONTAINS(outputLines[0], "bandwidth");
+}
diff --git a/sst/test/sstbinaries/cachebw_static b/sst/test/sstbinaries/cachebw_static
new file mode 100755
index 0000000000..13c6e6e54d
Binary files /dev/null and b/sst/test/sstbinaries/cachebw_static differ
diff --git a/sst/test/sstbinaries/cachebw_static_sve b/sst/test/sstbinaries/cachebw_static_sve
new file mode 100755
index 0000000000..122abedc9d
Binary files /dev/null and b/sst/test/sstbinaries/cachebw_static_sve differ
diff --git a/sst/test/sstbinaries/stream_t b/sst/test/sstbinaries/stream_t
new file mode 100755
index 0000000000..667922930b
Binary files /dev/null and b/sst/test/sstbinaries/stream_t differ
diff --git a/sst/test/sstbinaries/stream_t_sve b/sst/test/sstbinaries/stream_t_sve
new file mode 100755
index 0000000000..b11cebf56d
Binary files /dev/null and b/sst/test/sstbinaries/stream_t_sve differ
diff --git a/sst/test/sstconfigs/fastL1ForBinaries.py b/sst/test/sstconfigs/fastL1ForBinaries.py
new file mode 100644
index 0000000000..7774b43132
--- /dev/null
+++ b/sst/test/sstconfigs/fastL1ForBinaries.py
@@ -0,0 +1,93 @@
+import sst
+import sys
+import os
+
+def split(param: str) -> list[str]:
+    return param.split("=")
+
+def parseParams(params: list[str]):
+    out = {
+        "withSrc": False,
+        "source": "",
+        "clw": 8,
+        "heap": "",
+        "model": "",
+        "args": "",
+        "execBin": ""
+    }
+    for param in params:
+        key, value = split(param)
+        if (key == "withSrc"):
+            out[key] = value == "True"
+        else:
+            out[key] = value
+    return out
+
+DEBUG_L1 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 10
+
+params = parseParams(sys.argv[1:])
+
+
+
+cpu = sst.Component("core", "sstsimeng.simengcore")
+cpu.addParams({
+    "simeng_config_path": params["model"],
+    "executable_path": params["execBin"],
+    "executable_args": params["args"],
+    "clock" : "1.8GHz",
+    "max_addr_memory": 2*1024*1024*1024-1,
+    "cache_line_width": params["clw"],
+    "source": "",
+    "assemble_with_source": False,
+    "heap": "",
+    "debug": False
+})
+
+iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface")
+
+l1cache = sst.Component("l1cache.mesi", "memHierarchy.Cache")
+l1cache.addParams({
+      "access_latency_cycles" : "2",
+      "cache_frequency" : "1.8Ghz",
+      "replacement_policy" : "nmru",
+      "coherence_protocol" : "MESI",
+      "associativity" : "4",
+      "cache_line_size" : params["clw"],
+      "debug" : DEBUG_L1,
+      "debug_level" : DEBUG_LEVEL,
+      "verbose": "2",
+      "L1" : "1",
+      "cache_size" : "64KiB"
+})
+
+# Explicitly set the link subcomponents instead of having cache figure them out based on connected port names
+l1toC = l1cache.setSubComponent("cpulink", "memHierarchy.MemLink")
+l1toM = l1cache.setSubComponent("memlink", "memHierarchy.MemLink")
+
+# Memory controller
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.addParams({
+    "clock" : "1.8GHz",
+    "request_width" : "64",
+    "debug" : DEBUG_MEM,
+    "debug_level" : DEBUG_LEVEL,
+    "addr_range_end" : 2*1024*1024*1024-1,
+})
+Mtol1 = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink")
+
+# Memory model
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+      "access_time" : "0ps",
+      "mem_size" : "2GiB",
+      "request_width": "64"
+})
+
+# Define the simulation links
+link_cpu_cache_link = sst.Link("link_cpu_cache_link")
+link_cpu_cache_link.connect( (iface, "port", "0ps"), (l1toC, "port", "0ps") )
+link_mem_bus_link = sst.Link("link_mem_bus_link")
+link_mem_bus_link.connect( (l1toM, "port", "0ps"), (Mtol1, "port", "0ps") )
+
diff --git a/sst/test/sstconfigs/fastL1WithParams_config.py b/sst/test/sstconfigs/fastL1WithParams_config.py
new file mode 100644
index 0000000000..4237aef02b
--- /dev/null
+++ b/sst/test/sstconfigs/fastL1WithParams_config.py
@@ -0,0 +1,95 @@
+import sst
+import sys
+import os
+
+DEBUG_L1 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 10
+
+def split(param: str) -> list[str]:
+    return param.split("=")
+
+def parseParams(params: list[str]):
+    out = {
+        "withSrc": False,
+        "source": "",
+        "clw": 8,
+        "heap": "",
+        "model": "",
+        "args": "",
+        "execBin": ""
+    }
+    for param in params:
+        key, value = split(param)
+        if (key == "withSrc"):
+            out[key] = value == "True"
+        else:
+            out[key] = value
+    return out
+
+DEBUG_L1 = 0
+DEBUG_MEM = 0
+DEBUG_LEVEL = 10
+
+params = parseParams(sys.argv[1:])
+
+cpu = sst.Component("core", "sstsimeng.simengcore")
+cpu.addParams({
+    "simeng_config_path": params["model"],
+    "executable_path": "",
+    "executable_args": "",
+    "clock" : "1.8GHz",
+    "max_addr_memory": 2*1024*1024*1024-1,
+    "cache_line_width": params["clw"],
+    "source": params["source"],
+    "assemble_with_source": params["withSrc"],
+    "heap": params["heap"],
+    "debug": True
+})
+
+iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface")
+
+l1cache = sst.Component("l1cache.mesi", "memHierarchy.Cache")
+l1cache.addParams({
+      "access_latency_cycles" : "2",
+      "cache_frequency" : "1.8Ghz",
+      "replacement_policy" : "nmru",
+      "coherence_protocol" : "MESI",
+      "associativity" : "4",
+      "cache_line_size" : params["clw"],
+      "debug" : DEBUG_L1,
+    "debug_level" : DEBUG_LEVEL,
+      "verbose": "2",
+      "L1" : "1",
+      "cache_size" : "64KiB"
+})
+
+# Explicitly set the link subcomponents instead of having cache figure them out based on connected port names
+l1toC = l1cache.setSubComponent("cpulink", "memHierarchy.MemLink")
+l1toM = l1cache.setSubComponent("memlink", "memHierarchy.MemLink")
+
+# Memory controller
+memctrl = sst.Component("memory", "memHierarchy.MemController")
+memctrl.addParams({
+    "clock" : "1.8GHz",
+    "request_width" : "64",
+    "debug" : DEBUG_MEM,
+    "debug_level" : DEBUG_LEVEL,
+    "addr_range_end" : 2*1024*1024*1024-1,
+})
+Mtol1 = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink")
+
+# Memory model
+memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem")
+memory.addParams({
+      "access_time" : "0ps",
+      "mem_size" : "2GiB",
+      "request_width": "64"
+})
+
+# Define the simulation links
+link_cpu_cache_link = sst.Link("link_cpu_cache_link")
+link_cpu_cache_link.connect( (iface, "port", "0ps"), (l1toC, "port", "0ps") )
+link_mem_bus_link = sst.Link("link_mem_bus_link")
+link_mem_bus_link.connect( (l1toM, "port", "0ps"), (Mtol1, "port", "0ps") )
+