tnn - YingkunZhou/EdgeTransformerBench GitHub Wiki

TODO:

  • tnn::DEVICE_NAIVE ?
git clone https://github.com/Tencent/TNN.git # --depth=1

Linux

if use gcc, modify the CMakeLists.txt first, and then build TNN lib (Deprecated) and convert tool

environment:

  • miniforge conda
  • conda create -n py3.8 python=3.8 pip
  • conda activate py3.8
  • conda install gxx==12.3.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e50f270..5738f5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -220,7 +220,7 @@ if(UNIX)
     endif()
 endif()
 
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
 if(TNN_METAL_ENABLE OR TNN_APPLE_NPU_ENABLE)
diff --git a/tools/converter/CMakeLists.txt b/tools/converter/CMakeLists.txt
index 928525f..39c0d8a 100644
--- a/tools/converter/CMakeLists.txt
+++ b/tools/converter/CMakeLists.txt
@@ -20,6 +20,8 @@ if(TNN_BUILD_SHARED)
             TNN
             gflags
             flatbuffers
+            absl_log_internal_check_op
+            absl_log_internal_message
             ${Protobuf_LIBRARIES}
             )
 elseif(SYSTEM.iOS OR SYSTEM.Darwin)
diff --git a/tools/converter/source/onnx/onnx_proxy_graph.h b/tools/converter/source/onnx/onnx_proxy_graph.h
index 36a92bd..9f647db 100644
--- a/tools/converter/source/onnx/onnx_proxy_graph.h
+++ b/tools/converter/source/onnx/onnx_proxy_graph.h
@@ -16,6 +16,7 @@
 #define TNN_TOOLS_CONVERTER_SOURCE_ONNX_ONNX_PROXY_GRAPH_H_
 #include <memory>
 #include <set>
+#include <map>
 
 #include "onnx.pb.h"
 
diff --git a/tools/onnx2tnn/onnx-converter/CMakeLists.txt b/tools/onnx2tnn/onnx-converter/CMakeLists.txt
index d905120..b8455dc 100755
--- a/tools/onnx2tnn/onnx-converter/CMakeLists.txt
+++ b/tools/onnx2tnn/onnx-converter/CMakeLists.txt
@@ -41,8 +41,8 @@ add_definitions(-DDEBUG)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes")
 
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=c++11")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=c++11")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
 SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS} -rdynamic")
 
 add_library(onnx2tnn MODULE
@@ -50,7 +50,7 @@ add_library(onnx2tnn MODULE
                              ${ONNX2TNN_SRC}
                              ${ONNX_PROTO_SRC}
                              ${ONNX_PROTO_HEAD})
-set_target_properties(onnx2tnn PROPERTIES CXX_STANDARD 11)
+set_target_properties(onnx2tnn PROPERTIES CXX_STANDARD 17)
 if (MAC_ACCE)
     target_link_libraries(onnx2tnn PRIVATE ${MAC_ACCE})
 endif()
mkdir -p build && cd build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_SYSTEM_NAME=Linux \
-D CMAKE_C_COMPILER=gcc \
-D CMAKE_CXX_COMPILER=g++ \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. \

# in order to build convertor
-D TNN_CONVERTER_ENABLE=ON \
-D TNN_ONNX2TNN_ENABLE=ON \
-D TNN_MODEL_CHECK_ENABLE=ON \
-D TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE=ON \

export LIBRARY_PATH=$HOME/miniforge3/envs/py3.8/lib
make -j32
mkdir -p ../install/include && mkdir -p ../install/lib
cp -a libTNN.so* ../install/lib
cp -r ../include/tnn ../install/include

# reference to tools/onnx2tnn/onnx-converter/build.sh
cd ../tools/onnx2tnn/onnx-converter
ln -s ../../../build/tools/onnx2tnn/onnx-converter/onnx2tnn.cpython-38-aarch64-linux-gnu.so .
if use system clang(Recommend), modify the CMakeLists.txt first, and then we will try to build with CUDA(optional) in jetson orin
diff --git a/source/tnn/device/cuda/CMakeLists.txt b/source/tnn/device/cuda/CMakeLists.txt
index 03dc534..9be59fb 100644
--- a/source/tnn/device/cuda/CMakeLists.txt
+++ b/source/tnn/device/cuda/CMakeLists.txt
@@ -16,6 +16,7 @@ include_directories($ENV{CUDNN_ROOT_DIR}/include)
 
 set(TARGET_ARCH "-gencode arch=compute_75,code=sm_75 \
                  -gencode arch=compute_70,code=sm_70 \
+                 -gencode arch=compute_87,code=sm_87 \
                  -gencode arch=compute_61,code=sm_61 \
                  -gencode arch=compute_60,code=sm_60 \
                  -gencode arch=compute_53,code=sm_53")
diff --git a/source/tnn/utils/data_type_utils.cc b/source/tnn/utils/data_type_utils.cc
index 1b11af6..2518b9b 100644
--- a/source/tnn/utils/data_type_utils.cc
+++ b/source/tnn/utils/data_type_utils.cc
@@ -15,6 +15,7 @@
 #include "tnn/utils/data_type_utils.h"
 #include <limits.h>
 #include "tnn/core/macro.h"
+#include <stdint.h>
 
 namespace TNN_NS {
 
sudo apt install libprotoc-dev
sudo apt install libomp-16-dev

export CUDNN_ROOT_DIR=/usr/local/cuda
export TENSORRT_ROOT_DIR=/usr/local/cuda
cd /lib
sudo ln -s aarch64-linux-gnu/libnvinfer.so .
sudo ln -s aarch64-linux-gnu/libnvinfer_plugin.so .
sudo ln -s /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/
cd /usr/local/cuda
sudo ln -s targets/aarch64-linux/lib lib
cd /usr/local/cuda/lib
sudo ln -s /usr/lib/aarch64-linux-gnu/libnvinfer.so .
sudo ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so .

mkdir -p build && cd build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_SYSTEM_NAME=Linux \
-D CMAKE_C_COMPILER=clang-16 \
-D CMAKE_CXX_COMPILER=clang++-16 \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. \
-D TNN_CUDA_ENABLE=ON -D TNN_TENSORRT_ENABLE=ON

make -j32

mkdir -p ../install/include && mkdir -p ../install/lib
cp -a libTNN.so* ../install/lib
cp -r ../include/tnn ../install/include

Android

/data/data/com.termux/files/usr/bin/python /data/data/com.termux/files/home/work/TNN/source/tnn/device/opencl/cl/opencl_codegen.py /data/data/com.termux/files/home/work/TNN/source/tnn/device/opencl/cl
# Generate OpenCL Source done !!!
mkdir build && build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_C_COMPILER=gcc \
-D CMAKE_CXX_COMPILER=g++ \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. -D CMAKE_INSTALL_PREFIX=../install
make install -j8
cd ../install
cp -r ../include .
cp ../build/libTNN.so.0.3.0.0 lib
cd lib; ln -s libTNN.so.0.3.0.0 libTNN.so.0; ln -s libTNN.so.0 libTNN.so; cd -

build & run testsuite tool

export TNN_INC=$HOME/work/TNN/install/include
export TNN_LIB=$HOME/work/TNN/install/lib
g++ -O3 -o tnn_perf tnn_perf.cpp utils.cpp  -std=c++17 -I$TNN_INC -L$TNN_LIB -lTNN `pkg-config --cflags --libs opencv4`
g++ -O3 -DTEST -o tnn_perf-test tnn_perf.cpp utils.cpp  -std=c++17 -I$TNN_INC -L$TNN_LIB -lTNN `pkg-config --cflags --libs opencv4`
LD_LIBRARY_PATH=$TNN_LIB ./tnn_perf-test
  • efficientformerv2
    • cpu โœ…
    • opencl โŒ runtime error
  • SwiftFormer
โŒ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /network.0/network.0.2/attn/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
  • EMO
โŒ conversion error
-debug
D/tnn: get_node_attr_ai [File tools/onnx2tnn/src/core/onnx_utiliE/tnn: TNNWriteProto [File tools/onnx2tnn/src/core/onnx2tnn.cc][Line 326] error::op convert failed onnx:Mod

Segmentation fault (core dumped)
  • edgenext
โŒ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /stages.1/stages.1.1/xca/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
  • mobilevitv2
โŒ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /layer_3/layer_3.1/global_rep/global_rep.0/pre_norm_attn/pre_norm_attn.1/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
  • mobilevit
โŒ runtime error
 E source/tnn/optimizer/graph_matcher/ir.cc:584 the graph is not connected.
E/tnn: Optimize [File source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc][Line 77] code: 0x6000 msg: source/tnn/optimizer/graph_matcher/ir.cc:584 the graph is not connected.E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: Forward [File source/tnn/core/default_network.cc][Line 603] Forward error code: 0x1000 msg: StrideSliceV2Layer param of axes, ends, strides size is invalid, exit
Segmentation fault (core dumped)
  • LeViT
    • cpu โœ…
    • opencl โŒ runtime error

precision

reference

result

CPU FP32
Model Top-1 Top-1
//20 est.
Top-1
//50 est.
#params GMACs
efficientformerv2_s0 - 76.2 76.1 3.5M 0.40G
efficientformerv2_s1 - 78.8 79.8 6.1M 0.65G
efficientformerv2_s2 - 82.1 82.0 12.6M 1.25G
LeViT_128S - 75.9 76.1 7.8M 0.30G
LeViT_128 - 79.4 78.1 9.2M 0.41G
LeViT_192 - 79.6 79.6 11 M 0.66G
LeViT_256 - 81.1 81.4 19 M 1.12G
resnet50 - 79.6 81.3 25.6M 4.1G
mobilenetv3_large_100 - 75.6 75.3 5.5M 0.29G
โš ๏ธ **GitHub.com Fallback** โš ๏ธ