tnn - YingkunZhou/EdgeTransformerBench GitHub Wiki
TODO:
- tnn::DEVICE_NAIVE ?
git clone https://github.com/Tencent/TNN.git # --depth=1
if use gcc, modify the CMakeLists.txt first, and then build TNN lib (Deprecated) and convert tool
environment:
- miniforge conda
- conda create -n py3.8 python=3.8 pip
- conda activate py3.8
- conda install gxx==12.3.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e50f270..5738f5d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -220,7 +220,7 @@ if(UNIX)
endif()
endif()
-set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
if(TNN_METAL_ENABLE OR TNN_APPLE_NPU_ENABLE)
diff --git a/tools/converter/CMakeLists.txt b/tools/converter/CMakeLists.txt
index 928525f..39c0d8a 100644
--- a/tools/converter/CMakeLists.txt
+++ b/tools/converter/CMakeLists.txt
@@ -20,6 +20,8 @@ if(TNN_BUILD_SHARED)
TNN
gflags
flatbuffers
+ absl_log_internal_check_op
+ absl_log_internal_message
${Protobuf_LIBRARIES}
)
elseif(SYSTEM.iOS OR SYSTEM.Darwin)
diff --git a/tools/converter/source/onnx/onnx_proxy_graph.h b/tools/converter/source/onnx/onnx_proxy_graph.h
index 36a92bd..9f647db 100644
--- a/tools/converter/source/onnx/onnx_proxy_graph.h
+++ b/tools/converter/source/onnx/onnx_proxy_graph.h
@@ -16,6 +16,7 @@
#define TNN_TOOLS_CONVERTER_SOURCE_ONNX_ONNX_PROXY_GRAPH_H_
#include <memory>
#include <set>
+#include <map>
#include "onnx.pb.h"
diff --git a/tools/onnx2tnn/onnx-converter/CMakeLists.txt b/tools/onnx2tnn/onnx-converter/CMakeLists.txt
index d905120..b8455dc 100755
--- a/tools/onnx2tnn/onnx-converter/CMakeLists.txt
+++ b/tools/onnx2tnn/onnx-converter/CMakeLists.txt
@@ -41,8 +41,8 @@ add_definitions(-DDEBUG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-ignored-attributes")
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=c++11")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=c++11")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS} -rdynamic")
add_library(onnx2tnn MODULE
@@ -50,7 +50,7 @@ add_library(onnx2tnn MODULE
${ONNX2TNN_SRC}
${ONNX_PROTO_SRC}
${ONNX_PROTO_HEAD})
-set_target_properties(onnx2tnn PROPERTIES CXX_STANDARD 11)
+set_target_properties(onnx2tnn PROPERTIES CXX_STANDARD 17)
if (MAC_ACCE)
target_link_libraries(onnx2tnn PRIVATE ${MAC_ACCE})
endif()
mkdir -p build && cd build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_SYSTEM_NAME=Linux \
-D CMAKE_C_COMPILER=gcc \
-D CMAKE_CXX_COMPILER=g++ \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. \
# in order to build convertor
-D TNN_CONVERTER_ENABLE=ON \
-D TNN_ONNX2TNN_ENABLE=ON \
-D TNN_MODEL_CHECK_ENABLE=ON \
-D TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE=ON \
export LIBRARY_PATH=$HOME/miniforge3/envs/py3.8/lib
make -j32
mkdir -p ../install/include && mkdir -p ../install/lib
cp -a libTNN.so* ../install/lib
cp -r ../include/tnn ../install/include
# reference to tools/onnx2tnn/onnx-converter/build.sh
cd ../tools/onnx2tnn/onnx-converter
ln -s ../../../build/tools/onnx2tnn/onnx-converter/onnx2tnn.cpython-38-aarch64-linux-gnu.so .
if use system clang(Recommend), modify the CMakeLists.txt first, and then we will try to build with CUDA(optional) in jetson orin
diff --git a/source/tnn/device/cuda/CMakeLists.txt b/source/tnn/device/cuda/CMakeLists.txt
index 03dc534..9be59fb 100644
--- a/source/tnn/device/cuda/CMakeLists.txt
+++ b/source/tnn/device/cuda/CMakeLists.txt
@@ -16,6 +16,7 @@ include_directories($ENV{CUDNN_ROOT_DIR}/include)
set(TARGET_ARCH "-gencode arch=compute_75,code=sm_75 \
-gencode arch=compute_70,code=sm_70 \
+ -gencode arch=compute_87,code=sm_87 \
-gencode arch=compute_61,code=sm_61 \
-gencode arch=compute_60,code=sm_60 \
-gencode arch=compute_53,code=sm_53")
diff --git a/source/tnn/utils/data_type_utils.cc b/source/tnn/utils/data_type_utils.cc
index 1b11af6..2518b9b 100644
--- a/source/tnn/utils/data_type_utils.cc
+++ b/source/tnn/utils/data_type_utils.cc
@@ -15,6 +15,7 @@
#include "tnn/utils/data_type_utils.h"
#include <limits.h>
#include "tnn/core/macro.h"
+#include <stdint.h>
namespace TNN_NS {
sudo apt install libprotoc-dev
sudo apt install libomp-16-dev
export CUDNN_ROOT_DIR=/usr/local/cuda
export TENSORRT_ROOT_DIR=/usr/local/cuda
cd /lib
sudo ln -s aarch64-linux-gnu/libnvinfer.so .
sudo ln -s aarch64-linux-gnu/libnvinfer_plugin.so .
sudo ln -s /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/
cd /usr/local/cuda
sudo ln -s targets/aarch64-linux/lib lib
cd /usr/local/cuda/lib
sudo ln -s /usr/lib/aarch64-linux-gnu/libnvinfer.so .
sudo ln -s /usr/lib/aarch64-linux-gnu/libnvinfer_plugin.so .
mkdir -p build && cd build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_SYSTEM_NAME=Linux \
-D CMAKE_C_COMPILER=clang-16 \
-D CMAKE_CXX_COMPILER=clang++-16 \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. \
-D TNN_CUDA_ENABLE=ON -D TNN_TENSORRT_ENABLE=ON
make -j32
mkdir -p ../install/include && mkdir -p ../install/lib
cp -a libTNN.so* ../install/lib
cp -r ../include/tnn ../install/include
/data/data/com.termux/files/usr/bin/python /data/data/com.termux/files/home/work/TNN/source/tnn/device/opencl/cl/opencl_codegen.py /data/data/com.termux/files/home/work/TNN/source/tnn/device/opencl/cl
# Generate OpenCL Source done !!!
mkdir build && build
cmake -D CMAKE_BUILD_TYPE=Release \
-D CMAKE_C_COMPILER=gcc \
-D CMAKE_CXX_COMPILER=g++ \
-D TNN_ARM_ENABLE=ON \
-D TNN_TEST_ENABLE=ON \
-D TNN_CPU_ENABLE=ON \
-D TNN_RK_NPU_ENABLE=OFF \
-D TNN_OPENMP_ENABLE=ON \
-D TNN_OPENCL_ENABLE=ON \
-D CMAKE_SYSTEM_PROCESSOR=aarch64 \
-D TNN_BUILD_SHARED=ON .. -D CMAKE_INSTALL_PREFIX=../install
make install -j8
cd ../install
cp -r ../include .
cp ../build/libTNN.so.0.3.0.0 lib
cd lib; ln -s libTNN.so.0.3.0.0 libTNN.so.0; ln -s libTNN.so.0 libTNN.so; cd -
export TNN_INC=$HOME/work/TNN/install/include
export TNN_LIB=$HOME/work/TNN/install/lib
g++ -O3 -o tnn_perf tnn_perf.cpp utils.cpp -std=c++17 -I$TNN_INC -L$TNN_LIB -lTNN `pkg-config --cflags --libs opencv4`
g++ -O3 -DTEST -o tnn_perf-test tnn_perf.cpp utils.cpp -std=c++17 -I$TNN_INC -L$TNN_LIB -lTNN `pkg-config --cflags --libs opencv4`
LD_LIBRARY_PATH=$TNN_LIB ./tnn_perf-test
- efficientformerv2
- cpu โ
- opencl โ runtime error
- SwiftFormer
โ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /network.0/network.0.2/attn/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
- EMO
โ conversion error
-debug
D/tnn: get_node_attr_ai [File tools/onnx2tnn/src/core/onnx_utiliE/tnn: TNNWriteProto [File tools/onnx2tnn/src/core/onnx2tnn.cc][Line 326] error::op convert failed onnx:Mod
Segmentation fault (core dumped)
- edgenext
โ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /stages.1/stages.1.1/xca/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
- mobilevitv2
โ runtime error
E/tnn: RawBuffer2ArmBlob [File source/tnn/device/arm/acc/arm_layer_acc.cc][Line 135] RawBuffer2ArmBlob:: unsupported buffer and blob data type: 3, 0
E/tnn: InitLayers [File source/tnn/core/default_network.cc][Line 321] Error Init layer /layer_3/layer_3.1/global_rep/global_rep.0/pre_norm_attn/pre_norm_attn.1/Expand (err: 4096 or 0x1000)
Segmentation fault (core dumped)
- mobilevit
โ runtime error
E source/tnn/optimizer/graph_matcher/ir.cc:584 the graph is not connected.
E/tnn: Optimize [File source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc][Line 77] code: 0x6000 msg: source/tnn/optimizer/graph_matcher/ir.cc:584 the graph is not connected.E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: StrideSlice [File source/tnn/utils/dims_function_utils.cc][Line 164] StrideSliceV2Layer param of axes, ends, strides size is invalid
E/tnn: Forward [File source/tnn/core/default_network.cc][Line 603] Forward error code: 0x1000 msg: StrideSliceV2Layer param of axes, ends, strides size is invalid, exit
Segmentation fault (core dumped)
- LeViT
- cpu โ
- opencl โ runtime error
CPU FP32
Model | Top-1 | Top-1 //20 est. |
Top-1 //50 est. |
#params | GMACs |
---|---|---|---|---|---|
efficientformerv2_s0 | - | 76.2 | 76.1 | 3.5M | 0.40G |
efficientformerv2_s1 | - | 78.8 | 79.8 | 6.1M | 0.65G |
efficientformerv2_s2 | - | 82.1 | 82.0 | 12.6M | 1.25G |
LeViT_128S | - | 75.9 | 76.1 | 7.8M | 0.30G |
LeViT_128 | - | 79.4 | 78.1 | 9.2M | 0.41G |
LeViT_192 | - | 79.6 | 79.6 | 11 M | 0.66G |
LeViT_256 | - | 81.1 | 81.4 | 19 M | 1.12G |
resnet50 | - | 79.6 | 81.3 | 25.6M | 4.1G |
mobilenetv3_large_100 | - | 75.6 | 75.3 | 5.5M | 0.29G |