Skip to content

[TEST] Bring back vpux-translate #59

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/job_tests_unit_mlir_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,15 @@ jobs:
echo "PROF_PARSER_PATH=$(realpath "${OPENVINO_INSTALL_DIR}/tools/prof_parser")" >> $GITHUB_ENV
echo "LSP_SERVER_PATH=$(realpath "${OPENVINO_INSTALL_DIR}/tools/npu-lsp-server")" >> $GITHUB_ENV
echo "VPUX_OPT_PATH=$(realpath "${OPENVINO_INSTALL_DIR}/tools/vpux-opt")" >> $GITHUB_ENV
echo "VPUX_TRANSLATE_PATH=$(realpath "${OPENVINO_INSTALL_DIR}/tools/vpux-translate")" >> $GITHUB_ENV
chmod +x ${OPENVINO_INSTALL_DIR}/setupvars.sh
chmod +x ${OPENVINO_INSTALL_DIR}/tests/FileCheck
chmod +x ${OPENVINO_INSTALL_DIR}/tests/flatc
chmod +x ${OPENVINO_INSTALL_DIR}/tests/not
chmod +x ${OPENVINO_INSTALL_DIR}/tools/prof_parser/prof_parser
chmod +x ${OPENVINO_INSTALL_DIR}/tools/npu-lsp-server/npu-lsp-server
chmod +x ${OPENVINO_INSTALL_DIR}/tools/vpux-opt/vpux-opt
chmod +x ${OPENVINO_INSTALL_DIR}/tools/vpux-translate/vpux-translate

- name: Run OV NPU Unit tests
if: ${{ inputs.run-unit-tests }}
Expand All @@ -78,14 +80,14 @@ jobs:
if: ${{ inputs.run-lit-tests }}
run: |
source ${OPENVINO_INSTALL_DIR}/setupvars.sh
export PATH=$PATH:${PROF_PARSER_PATH}:${LSP_SERVER_PATH}:${VPUX_OPT_PATH}
export PATH=$PATH:${PROF_PARSER_PATH}:${LSP_SERVER_PATH}:${VPUX_OPT_PATH}:${VPUX_TRANSLATE_PATH}
cd ${OPENVINO_INSTALL_DIR}/tests/lit-tests
python3 ./lit-tool/lit.py -v --param arch=NPU37XX NPU/NPU

- name: Run NPU40XX LIT tests
if: ${{ inputs.run-lit-tests }}
run: |
source ${OPENVINO_INSTALL_DIR}/setupvars.sh
export PATH=$PATH:${PROF_PARSER_PATH}:${LSP_SERVER_PATH}:${VPUX_OPT_PATH}
export PATH=$PATH:${PROF_PARSER_PATH}:${LSP_SERVER_PATH}:${VPUX_OPT_PATH}:${VPUX_TRANSLATE_PATH}
cd ${OPENVINO_INSTALL_DIR}/tests/lit-tests
python3 ./lit-tool/lit.py -v --param arch=NPU40XX NPU/NPU
187 changes: 187 additions & 0 deletions tests/lit/NPU/backend/VPUIP_37XX.mlir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=NPU37XX --import-IE ./dynamic_broadcast_with_shapeOf.xml | FileCheck %s

// CHECK: #C = affine_map<(d0) -> (d0)>
// CHECK: #NCHW = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
// CHECK: net.NetworkInfo entryPoint : @main inputsInfo : {
// CHECK: DataInfo "input_0" : tensor<1x3x?x?xf32
// CHECK: DataInfo "input_1" : tensor<?xf32
// CHECK: } outputsInfo : {
// CHECK: DataInfo "Broadcast_60" friendlyName = "Result_64" : tensor<1x3x?x?xf32
// CHECK: }
// CHECK: func.func @main(%arg0: tensor<1x3x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 3, 10, 16]> : tensor<4xsi64>, order = #NCHW}>, %arg1: tensor<?xf32, {bounds = #const.OpaqueI64Elements<[10]> : tensor<1xsi64>, order = #C}>) -> tensor<1x3x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 3, 10, 16]> : tensor<4xsi64>, order = #NCHW}> {
// CHECK: [[SHAPEOF:%.+]] = IE.ShapeOf(%arg0) {dstElemType = si64} : tensor<1x3x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 3, 10, 16]> : tensor<4xsi64>, order = #NCHW}> -> tensor<4xsi64>
// CHECK: [[BROADCAST:%.+]] = IE.DynamicBroadcast(%arg1, [[SHAPEOF]]) {mode = #IE.broadcast_type<NUMPY>, output_bounds = [1, 3, 10, 16], output_shape = [1, 3, -9223372036854775808, -9223372036854775808]} : tensor<?xf32, {bounds = #const.OpaqueI64Elements<[10]> : tensor<1xsi64>, order = #C}>, tensor<4xsi64> -> tensor<1x3x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 3, 10, 16]> : tensor<4xsi64>, order = #NCHW}>
// CHECK: return [[BROADCAST]] : tensor<1x3x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 3, 10, 16]> : tensor<4xsi64>, order = #NCHW}>
// CHECK: }
24 changes: 24 additions & 0 deletions tests/lit/NPU/conversion/pipelines/dynamic_reshape.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=NPU37XX --import-IE ./dynamic_reshape.xml | FileCheck %s

// CHECK: module @dynamic_reshape {
// CHECK: net.NetworkInfo entryPoint : @main inputsInfo : {
// CHECK: DataInfo "Parameter_1" : tensor<1x8x?x?xf32
// CHECK: } outputsInfo : {
// CHECK: DataInfo "Reshape_5" friendlyName = "Result_12" : tensor<1x8x?x?x1xf32
// CHECK: }
// CHECK: func.func @main([[ARG:%.*]]: tensor<1x8x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 8, 48, 48]> : tensor<4xsi64>, order = #NCHW}>)
// CHECK-SAME: -> tensor<1x8x?x?x1xf32, {bounds = #const.OpaqueI64Elements<[1, 8, 48, 48, 1]> : tensor<5xsi64>, order = #NCDHW}> {
// CHECK: [[CST:%.+]] = const.Declare tensor<5xsi64> = dense<[1, 8, 0, 0, 1]> : tensor<5xsi64>
// CHECK: [[RESHAPE:%.+]] = IE.DynamicReshape([[ARG]], [[CST]]) {
// CHECK-SAME: output_bounds = [1, 8, 48, 48, 1]
// CHECK-SAME: output_shape = [1, 8, -9223372036854775808, -9223372036854775808, 1]
// CHECK-SAME: } : tensor<1x8x?x?xf32, {bounds = #const.OpaqueI64Elements<[1, 8, 48, 48]> : tensor<4xsi64>, order = #NCHW}>, tensor<5xsi64>
// CHECK-SAME: -> tensor<1x8x?x?x1xf32, {bounds = #const.OpaqueI64Elements<[1, 8, 48, 48, 1]> : tensor<5xsi64>, order = #NCDHW}>
// CHECK: return [[RESHAPE]] : tensor<1x8x?x?x1xf32, {bounds = #const.OpaqueI64Elements<[1, 8, 48, 48, 1]> : tensor<5xsi64>, order = #NCDHW}>
// CHECK: }
// CHECK: }
28 changes: 28 additions & 0 deletions tests/lit/NPU/conversion/pipelines/dynamic_strided_slice.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=NPU37XX --import-IE ./slice.xml | FileCheck %s

// CHECK: net.NetworkInfo entryPoint : @main inputsInfo : {
// CHECK: DataInfo "param_node_0" tensorNames = ["param_node_0"] : tensor<?xf32
// CHECK: } outputsInfo : {
// CHECK: DataInfo "StridedSlice_5" friendlyName = "Result_6" : tensor<?xf32
// CHECK: }
// CHECK: func.func @main([[ARG:%.+]]: tensor<?xf32, {bounds = #const.OpaqueI64Elements<[32]> : tensor<1xsi64>, order = #C}>)
// CHECK-SAME: -> tensor<?xf32, {bounds = #const.OpaqueI64Elements<[19]> : tensor<1xsi64>, order = #C}> {
// CHECK: [[BEGINS:%.+]] = const.Declare tensor<1xsi64> = dense<1> : tensor<1xsi64>
// CHECK: [[ENDS:%.+]] = const.Declare tensor<1xsi64> = dense<20> : tensor<1xsi64>
// CHECK: [[STRIDES:%.+]] = const.Declare tensor<1xsi64> = dense<1> : tensor<1xsi64>
// CHECK: [[SLICE:%.+]] = IE.StridedSlice([[ARG]], [[BEGINS]], [[ENDS]], [[STRIDES]]) {
// CHECK-SAME: begin_mask = [0],
// CHECK-SAME: ellipsis_mask = [],
// CHECK-SAME: end_mask = [0],
// CHECK-SAME: new_axis_mask = [],
// CHECK-SAME: operandSegmentSizes = array<i32: 1, 1, 1, 1>,
// CHECK-SAME: shrink_axis_mask = [0]
// CHECK-SAME: } : tensor<?xf32, {bounds = #const.OpaqueI64Elements<[32]> : tensor<1xsi64>, order = #C}>, tensor<1xsi64>, tensor<1xsi64>,
// CHECK-SAME: tensor<1xsi64> -> tensor<?xf32, {bounds = #const.OpaqueI64Elements<[19]> : tensor<1xsi64>, order = #C}>
// CHECK: return [[SLICE]] : tensor<?xf32, {bounds = #const.OpaqueI64Elements<[19]> : tensor<1xsi64>, order = #C}>
// CHECK: }
44 changes: 44 additions & 0 deletions tests/lit/NPU/conversion/pipelines/loop.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=%arch% --import-IE ./loop.xml | FileCheck %s
// REQUIRES: arch-NPU37XX

// CHECK: func.func @main(
// CHECK-SAME: [[ARG0:%arg[0-9]+]]: tensor<3x4x6x10xf32>,
// CHECK-SAME: [[ARG1:%arg[0-9]+]]: tensor<2x3x4x5xf32>)
// CHECK-SAME: -> (tensor<1x4x6x10xf32>, tensor<2x3x4x5xf32>) {

// CHECK-DAG: [[CST:%.+]] = const.Declare tensor<1xsi32> = dense<2> : tensor<1xsi32>
// CHECK-DAG: [[CST0:%.+]] = const.Declare tensor<1xi8> = dense<1> : tensor<1xi8>

// CHECK: [[LOOP:%.*]]:2 = IE.Loop([[CST]], [[CST0]], [[ARG0]], [[ARG1]])
// CHECK: : tensor<1xsi32>, tensor<1xi8>, tensor<3x4x6x10xf32>, tensor<2x3x4x5xf32> -> tensor<1x4x6x10xf32>, tensor<2x3x4x5xf32>
// CHECK: (num_iterations : 1 current_iter_index : -1 exec_cond_index : 2)
// CHECK: slice_input_descs : [#IE.SliceInputPortMap<external_port_id = 2 : i64, internal_layer_id = 0 : i64, axis = 0 : i64, start = 0 : i64, stride = 1 : i64, part_size = 3 : i64, end = 2 : i64>]
// CHECK: invariant_input_descs : []
// CHECK: feedback_input_descs : [#IE.MergedInputPortMap<external_port_id = 3 : i64, internal_layer_id = 1 : i64, body_input_index = 1 : i64>]
// CHECK: concat_output_descs : [#IE.ConcatOutputPortMap<external_port_id = 0 : i64, internal_layer_id = 0 : i64, axis = 0 : i64, start = 0 : i64, stride = 1 : i64, part_size = 1 : i64, end = -1 : i64>]
// CHECK: invariant_output_descs : [#IE.InvariantOutputPortMap<external_port_id = 1 : i64, internal_layer_id = 1 : i64, iterations = -1 : i64>]

// CHECK: body_module : {
// CHECK: ^bb0([[ARG2:%arg[0-9]+]]: tensor<1x4x6x10xf32>, [[ARG3:%arg[0-9]+]]: tensor<2x3x4x5xf32>):

// CHECK-DAG: [[CST1:%.*]] = const.Declare tensor<1xi8> = dense<0> : tensor<1xi8>
// CHECK-DAG: [[CST2:%.*]] = const.Declare tensor<1x1x1x1xf32> = dense<1.000000e+00> : tensor<1x1x1x1xf32>
// CHECK: [[ADD1:%.*]] = IE.Add([[ARG3]], [[CST2]])
// CHECK-SAME: {auto_broadcast = #IE.auto_broadcast_type<NUMPY>} :
// CHECK-SAME: tensor<2x3x4x5xf32>, tensor<1x1x1x1xf32> -> tensor<2x3x4x5xf32>

// CHECK-DAG: [[CST3:%.*]] = const.Declare tensor<1x4x6x10xf32> = dense<1.000000e+00> : tensor<1x4x6x10xf32>
// CHECK: [[ADD2:%.*]] = IE.Add([[ARG2]], [[CST3]])
// CHECK-SAME: {auto_broadcast = #IE.auto_broadcast_type<NUMPY>} :
// CHECK-SAME: tensor<1x4x6x10xf32>, tensor<1x4x6x10xf32> -> tensor<1x4x6x10xf32>
// CHECK: "IE.LoopTerminator"([[ADD2]], [[ADD1]], [[CST1]]) : (tensor<1x4x6x10xf32>, tensor<2x3x4x5xf32>, tensor<1xi8>) -> ()
// CHECK: }

// CHECK: return [[LOOP]]#0, [[LOOP]]#1 : tensor<1x4x6x10xf32>, tensor<2x3x4x5xf32>

// CHECK: }
16 changes: 16 additions & 0 deletions tests/lit/NPU/conversion/pipelines/shape_of_37XX.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=%arch% --import-IE ./shape_of.xml | FileCheck %s
// REQUIRES: arch-NPU37XX

// CHECK: module @shape_of {
// CHECK: func.func @main(%arg0: tensor<1x8x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 8, 384, 384]> : tensor<4xsi64>, order = #NCHW}>)
// CHECK-SAME: -> tensor<4xsi64> {
// CHECK: [[SHAPE_OF:%.*]] = IE.ShapeOf(%arg0) {dstElemType = si64} :
// CHECK-SAME: tensor<1x8x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 8, 384, 384]> : tensor<4xsi64>, order = #NCHW}> -> tensor<4xsi64>
// CHECK: return [[SHAPE_OF]] : tensor<4xsi64>
// CHECK: }
// CHECK: }
16 changes: 16 additions & 0 deletions tests/lit/NPU/conversion/pipelines/shape_of_3_37XX.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=%arch% --import-IE ./shape_of_3.xml | FileCheck %s
// REQUIRES: arch-NPU37XX

// CHECK: module @shape_of {
// CHECK: func.func @main(%arg0: tensor<1x8x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 8, 384, 384]> : tensor<4xsi64>, order = #NCHW}>)
// CHECK-SAME: -> tensor<4xsi64> {
// CHECK: [[SHAPE_OF:%.*]] = IE.ShapeOf(%arg0) {dstElemType = si64} :
// CHECK-SAME: tensor<1x8x?x?xf16, {bounds = #const.OpaqueI64Elements<[1, 8, 384, 384]> : tensor<4xsi64>, order = #NCHW}> -> tensor<4xsi64>
// CHECK: return [[SHAPE_OF]] : tensor<4xsi64>
// CHECK: }
// CHECK: }
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//
// Copyright (C) 2024 Intel Corporation.
// SPDX-License-Identifier: Apache 2.0
//

// RUN: vpux-translate --vpu-arch=%arch% --export-ELF %s | FileCheck %s
// REQUIRES: arch-NPU40XX

!quantileFloatType = !QuantileFloat.quantileFloat<4, {-1.000000e+00,-0.69619280099868774,-0.52507305145263672,-0.39491748809814453,-0.28444138169288635,-0.18477343022823334,-0.091050036251544952,0.000000e+00,0.07958029955625534,0.16093020141124725,0.24611230194568634,0.33791524171829224,0.44070982933044434,0.56261700391769409,0.72295683622360229,1.000000e+00}>

module @Test attributes {VPU.arch = #VPU.arch_kind<NPU40XX>, VPU.compilationMode = #VPU.compilation_mode<DefaultHW>} {
IE.TileResource 6 of @NCE at 1.700000e+03 MHz {
IE.MemoryResource 1327104 bytes of @CMX_NN_FragmentationAware
IE.MemoryResource 1474560 bytes of @CMX_NN {VPU.bandwidth = 64 : i64, VPU.derateFactor = 1.000000e+00 : f64}
IE.ExecutorResource 2 of @SHAVE_ACT
IE.ExecutorResource 1 of @DPU
}
IE.ExecutorResource 1 of @M2I
IE.ExecutorResource 2 of @DMA_NN
IE.MemoryResource 4194304000 bytes of @DDR {VPU.bandwidth = 64 : i64, VPU.derateFactor = 6.000000e-01 : f64}
net.NetworkInfo entryPoint : @main inputsInfo : {
DataInfo "Input" : tensor<1x1024x!quantileFloatType>
} outputsInfo : {
DataInfo "Output" : tensor<1x1024x!quantileFloatType>
}
VPUASM.IOBindings inputDeclarations : {
VPUASM.DeclareBuffer @Input !VPUASM.Buffer< "NetworkInput"[0] <0> : memref<1x1024x!quantileFloatType, @DDR> : swizzling(0)>
} outputDeclarations : {
VPUASM.DeclareBuffer @Output !VPUASM.Buffer< "NetworkOutput"[0] <0> : memref<1x1024x!quantileFloatType, @DDR> : swizzling(0)>
} profilingBuffDeclarations : {
}
func.func @main() {
ELF.Main @ELFMain {
ELF.CreateLogicalSection @data.BuffersIO.DMA aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_DMA") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferDMA !VPUASM.Buffer< "DDR"[0] <0> : memref<1x128x1024x!quantileFloatType, @DDR> : swizzling(0)>
}
ELF.CreateLogicalSection @data.BuffersIO.LEON aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|SHF_EXECINSTR") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferLEON !VPUASM.Buffer< "DDR"[0] <0> : memref<1x128x1024x!quantileFloatType, @DDR> : swizzling(0)>
}
ELF.CreateLogicalSection @data.BuffersIO.SHAVE aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_SHAVE") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferSHAVE !VPUASM.Buffer< "DDR"[0] <0> : memref<2560x1024x1024x!quantileFloatType, @DDR> : swizzling(0)>
}
ELF.CreateMetadataSection @MetadataSection aligned(8) secFlags("SHF_NONE") {
VPUASM.NetworkMetadata @NetworkMetadata
}
}
return
}

// CHECK: ELF
// CHECK: .strtab
// CHECK: .symstrtab
// CHECK: MetadataSection
// CHECK: data.BuffersIO.DMA
// CHECK: data.BuffersIO.LEON
// CHECK: data.BuffersIO.SHAVE
}
45 changes: 45 additions & 0 deletions tests/lit/NPU/large_memory/baseline_37XX.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: vpux-translate --vpu-arch=%arch% --export-ELF %s | FileCheck %s
// REQUIRES: arch-NPU37XX

module @Test attributes {VPU.arch = #VPU.arch_kind<NPU37XX>, VPU.compilationMode = #VPU.compilation_mode<DefaultHW>} {
IE.TileResource 2 of @NCE at 1.300000e+03 MHz {
IE.MemoryResource 1784217 bytes of @CMX_NN_FragmentationAware
IE.MemoryResource 1982464 bytes of @CMX_NN {VPU.bandwidth = 32 : i64, VPU.derateFactor = 1.000000e+00 : f64}
IE.ExecutorResource 2 of @SHAVE_ACT
IE.ExecutorResource 1 of @SHAVE_NN
IE.ExecutorResource 1 of @DPU
}
IE.ExecutorResource 2 of @DMA_NN
IE.MemoryResource 4194304000 bytes of @DDR {VPU.bandwidth = 8 : i64, VPU.derateFactor = 6.000000e-01 : f64}
net.NetworkInfo entryPoint : @main inputsInfo : {
DataInfo "Input" : tensor<1x1024xui8>
} outputsInfo : {
DataInfo "Output" : tensor<1x1024xui8>
}
func.func @main(%arg0: memref<1x1024xui8, @DDR>, %arg1: memref<1x1024xui8, @DDR>) -> memref<1x1024xui8, @DDR> {
%buffer_dma = VPURT.DeclareBuffer <DDR> <0> -> memref<3072x1024x1024xui8, @DDR> // 3 GB
%buffer_leon = VPURT.DeclareBuffer <DDR> <0> -> memref<224x1024x1024xui8, @DDR> // 224 MB
%buffer_shave = VPURT.DeclareBuffer <DDR> <0> -> memref<1536x1024x1024xui8, @DDR> // 1.5 GB
%metadata_sec = ELFNPU37XX.CreateMetadataSection secFlags("SHF_NONE") {secAddrAlign = 64 : i64, secInfo = 0 : i64, secName = ".metadata"} -> !ELFNPU37XX.Section {
%metadata = VPUMI37XX.NetworkMetadata -> !VPURegMapped.Index<0:0:0>
}
%buffer_dma_sec = ELFNPU37XX.CreateLogicalSection secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_DMA") {secAddrAlign = 64 : i64, secInfo = 0 : i64, secName = ".data.BuffersIO.DMA"} -> !ELFNPU37XX.Section {
ELFNPU37XX.PutOpInSection %buffer_dma : memref<3072x1024x1024xui8, @DDR>
}
%buffer_leon_sec = ELFNPU37XX.CreateLogicalSection secType(SHT_NOBITS) secFlags("SHF_ALLOC|SHF_EXECINSTR") {secAddrAlign = 64 : i64, secInfo = 0 : i64, secName = ".data.BuffersIO.LEON"} -> !ELFNPU37XX.Section {
ELFNPU37XX.PutOpInSection %buffer_leon : memref<224x1024x1024xui8, @DDR>
}
%buffer_shave_sec = ELFNPU37XX.CreateLogicalSection secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_SHAVE") {secAddrAlign = 64 : i64, secInfo = 0 : i64, secName = ".data.BuffersIO.SHAVE"} -> !ELFNPU37XX.Section {
ELFNPU37XX.PutOpInSection %buffer_shave : memref<1536x1024x1024xui8, @DDR>
}
return %arg1 : memref<1x1024xui8, @DDR>
}

// CHECK: ELF
// CHECK: .strtab
// CHECK: .symstrtab
// CHECK: .metadata
// CHECK: .data.BuffersIO.DMA
// CHECK: .data.BuffersIO.LEON
// CHECK: .data.BuffersIO.SHAVE
}
50 changes: 50 additions & 0 deletions tests/lit/NPU/large_memory/baseline_40XX.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// RUN: vpux-translate --vpu-arch=%arch% --export-ELF %s | FileCheck %s
// REQUIRES: arch-NPU40XX

module @Test attributes {VPU.arch = #VPU.arch_kind<NPU40XX>, VPU.compilationMode = #VPU.compilation_mode<DefaultHW>} {
IE.TileResource 6 of @NCE at 1.700000e+03 MHz {
IE.MemoryResource 1327104 bytes of @CMX_NN_FragmentationAware
IE.MemoryResource 1474560 bytes of @CMX_NN {VPU.bandwidth = 64 : i64, VPU.derateFactor = 1.000000e+00 : f64}
IE.ExecutorResource 2 of @SHAVE_ACT
IE.ExecutorResource 1 of @DPU
}
IE.ExecutorResource 1 of @M2I
IE.ExecutorResource 2 of @DMA_NN
IE.MemoryResource 4194304000 bytes of @DDR {VPU.bandwidth = 64 : i64, VPU.derateFactor = 6.000000e-01 : f64}
net.NetworkInfo entryPoint : @main inputsInfo : {
DataInfo "Input" : tensor<1x1024xui8>
} outputsInfo : {
DataInfo "Output" : tensor<1x1024xui8>
}
VPUASM.IOBindings inputDeclarations : {
VPUASM.DeclareBuffer @Input !VPUASM.Buffer< "NetworkInput"[0] <0> : memref<1x1024xui8, @DDR> : swizzling(0)>
} outputDeclarations : {
VPUASM.DeclareBuffer @Output !VPUASM.Buffer< "NetworkOutput"[0] <0> : memref<1x1024xui8, @DDR> : swizzling(0)>
} profilingBuffDeclarations : {
}
func.func @main() {
ELF.Main @ELFMain {
ELF.CreateLogicalSection @data.BuffersIO.DMA aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_DMA") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferDMA !VPUASM.Buffer< "DDR"[0] <0> : memref<3072x1024x1024xui8, @DDR> : swizzling(0)> // 3 GB
}
ELF.CreateLogicalSection @data.BuffersIO.LEON aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|SHF_EXECINSTR") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferLEON !VPUASM.Buffer< "DDR"[0] <0> : memref<3072x1024x1024xui8, @DDR> : swizzling(0)> // 3 GB
}
ELF.CreateLogicalSection @data.BuffersIO.SHAVE aligned(1) secType(SHT_NOBITS) secFlags("SHF_ALLOC|VPU_SHF_PROC_SHAVE") secLocation(<DDR>) {
VPUASM.DeclareBuffer @DeclareBufferSHAVE !VPUASM.Buffer< "DDR"[0] <0> : memref<1536x1024x1024xui8, @DDR> : swizzling(0)> // 1.5 GB
}
ELF.CreateMetadataSection @MetadataSection aligned(8) secFlags("SHF_NONE") {
VPUASM.NetworkMetadata @NetworkMetadata
}
}
return
}

// CHECK: ELF
// CHECK: .strtab
// CHECK: .symstrtab
// CHECK: MetadataSection
// CHECK: data.BuffersIO.DMA
// CHECK: data.BuffersIO.LEON
// CHECK: data.BuffersIO.SHAVE
}
Loading
Loading