ncnn debug - YingkunZhou/transfer-learning GitHub Wiki

# tb /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/layer/vulkan/reshape_vulkan.cpp:784
# tb /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/gpu.cpp:3285
# r --only-test debug --backend=v

dashboard variables
dashboard breakpoints

# NCNN_LOGE("VkBlobAllocator M %p +%lu %lu", ptr->buffer, ptr->offset, ptr->capacity);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:715
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:784

# NCNN_LOGE("VkBlobAllocator M %p +%lu %lu", ptr->memory, ptr->bind_offset, ptr->bind_capacity);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:980
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1041

# NCNN_LOGE("submit_and_wait");
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:2293
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:3372

# NCNN_LOGE("record_upload src = %d | %d %d %d @ %d", src.dims, src.w, src.h, src.c, src.elempack);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:2996

# NCNN_LOGE("VkWeightAllocator fastMalloc %lu", size);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1234

# NCNN_LOGE("VkWeightAllocator M %p", block->buffer);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1367

# NCNN_LOGE("forward_layer %d %d %s", layer->support_vulkan, layer_index, layer->name.c_str());
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/net.cpp:238

# NCNN_LOGE("forward_layer %d %d %s done", layer->support_vulkan, layer_index, layer->name.c_str());
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/net.cpp:366

# NCNN_LOGE("record_clone buffer to buffer");
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:980

# NCNN_LOGE("record_pipeline %p", pipeline);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:1522

# NCNN_LOGE("record_upload buffer");
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:360

# NCNN_LOGE("VkStagingAllocator M %p %lu", ptr->buffer, size);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1742

# NCNN_LOGE("VkStagingAllocator M %p %lu reused %lu", ptr->buffer, size, capacity);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1710

# NCNN_LOGE("upload_staging_buffer %p  ->   %p +%d ~%d", src_fp16.data, dst_staging.buffer(), dst_staging.buffer_offset(), dst_staging.buffer_capacity());
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:389

# NCNN_LOGE("VkStagingAllocator F %p", ptr->buffer);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1684
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1749

# NCNN_LOGE("record_download buffer");
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/command.cpp:494

# NCNN_LOGE("VkBlobAllocator F %p +%lu %lu", ptr->buffer, ptr->offset, ptr->capacity);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:791
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1048

# NCNN_LOGE("VkStagingAllocator M %p %lu reused %lu", ptr->buffer, size, capacity);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/allocator.cpp:1710

# NCNN_LOGE("convert_packing b2b %d %d %d", cast_type_from_index, cast_type_to_index, packing_type_to_index);
b /media/loongson/phd19/home/zhou/graduate9/work/ncnn/src/gpu.cpp:3094
$ ./ncnn_perf --only-test debug --backend=v 2>&1 | tee /tmp/run.log
[0 NVIDIA Tegra Orin (nvgpu)]  queueC=2[8]  queueG=0[16]  queueT=1[2]
[0 NVIDIA Tegra Orin (nvgpu)]  bugsbn1=0  bugbilz=0  bugcopc=0  bugihfa=0
[0 NVIDIA Tegra Orin (nvgpu)]  fp16-p/s/a=1/1/1  int8-p/s/a=1/1/1
[0 NVIDIA Tegra Orin (nvgpu)]  subgroup=32  basic=1  vote=1  ballot=1  shuffle=1
VkBlobAllocator M 0xaaaaecb52630 +0 64
VkBlobAllocator M 0xaaaaecb540d0 +0 1024
VkBlobAllocator M 0xaaaaecb540d0 +1024 1024
submit_and_wait
Creating ncnn net: debug
record_upload src = 3 | 1 1 8 @ 4
record_upload src = 3 | 1 1 8 @ 4
VkWeightAllocator fastMalloc 128
VkWeightAllocator M 0xaaaaef51cd80
record_upload src = 3 | 1 1 8 @ 4
record_upload src = 3 | 1 1 8 @ 4
VkWeightAllocator fastMalloc 128
submit_and_wait
forward_layer 1 6 mul_1
forward_layer 1 5 reshape_1
forward_layer 1 2 network.0.1
record_clone buffer to buffer
VkBlobAllocator M 0xaaaaecb59f60 +0 128
forward_layer 1 2 network.0.1 done
VkBlobAllocator M 0xaaaaecb59f60 +128 64
record_pipeline 0xaaaaef4d24d0
VkBlobAllocator F 0xaaaaecb59f60 +0 128
forward_layer 1 5 reshape_1 done
forward_layer 1 4 mul_0
forward_layer 1 3 reshape_0
forward_layer 1 1 network.0.0
record_clone buffer to buffer
VkBlobAllocator M 0xaaaaecb59f60 +0 128
forward_layer 1 1 network.0.0 done
VkBlobAllocator M 0xaaaaecb59f60 +192 64
record_pipeline 0xaaaaef513510
VkBlobAllocator F 0xaaaaecb59f60 +0 128
forward_layer 1 3 reshape_0 done
record_upload buffer
VkStagingAllocator M 0xaaaaef5047e0 512
upload_staging_buffer 0xaaaaef495130  ->   0xaaaaef5047e0 +0 ~512
convert_packing b2b 0 2 1
VkBlobAllocator M 0xaaaaecb59f60 +0 128
record_pipeline 0xaaaaecc300c0
# ncnn::BinaryOp_vulkan::forward
VkBlobAllocator M 0xaaaaecb59f60 +256 64
record_pipeline 0xaaaaef4f5790
VkBlobAllocator F 0xaaaaecb59f60 +192 64
VkBlobAllocator F 0xaaaaecb59f60 +0 128
forward_layer 1 4 mul_0 done
VkBlobAllocator M 0xaaaaecb59f60 +0 64
record_pipeline 0xaaaaef4d2510
VkBlobAllocator F 0xaaaaecb59f60 +128 64
VkBlobAllocator F 0xaaaaecb59f60 +256 64
forward_layer 1 6 mul_1 done
record_download buffer
convert_packing b2b 2 0 0
VkBlobAllocator M 0xaaaaecb59f60 +64 128
record_pipeline 0xaaaaef4b0da0
submit_and_wait
VkBlobAllocator F 0xaaaaecb59f60 +64 128
VkStagingAllocator F 0xaaaaef5047e0
-0.0279236 -0.0215607
VkBlobAllocator F 0xaaaaecb59f60 +0 64
VkWeightAllocator F 0xaaaaef51cd80
VkWeightAllocator F 0xaaaaef51cd80
VkWeightAllocator 1 0
VkBlobAllocator F 0xaaaaecb52630 +0 64
VkBlobAllocator F 0xaaaaecb540d0 +0 1024
VkBlobAllocator F 0xaaaaecb540d0 +1024 1024
VkStagingAllocator F 0xaaaaef5047e0