nvidia docker - yszheda/wiki GitHub Wiki

References

$ docker inspect -f '{{index .Config.Labels "com.nvidia.volumes.needed"}}' nvidia/cuda
nvidia_driver
$ docker inspect -f '{{index .Config.Labels "com.nvidia.cuda.version"}}' nvidia/cuda
7.5

Install

Use nvidia-docker2 for Docker images which are not based on NV official CUDA images

nvidia-cuda-toolkit is installed in the Docker image.

  • docker run --runtime=nvidia (device is not whitelisted in the container):
$ nvidia-smi                                                                              
Failed to initialize NVML: Unknown Error

$ strace nvidia-smi                                                                                          
execve("/usr/bin/nvidia-smi", ["nvidia-smi"], [/* 22 vars */]) = 0                                                          
brk(NULL)                               = 0x1832000                                                                  
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)                                                              
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f4ea5d000                          
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)                                                            
open("/home/bot/dev/dr_ros/install_isolated/lib/tls/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/home/bot/dev/dr_ros/install_isolated/lib/tls/x86_64", 0x7ffd678355d0) = -1 ENOENT (No such file or directory)             
open("/home/bot/dev/dr_ros/install_isolated/lib/tls/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
stat("/home/bot/dev/dr_ros/install_isolated/lib/tls", 0x7ffd678355d0) = -1 ENOENT (No such file or directory)               
open("/home/bot/dev/dr_ros/install_isolated/lib/x86_64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)                    
stat("/home/bot/dev/dr_ros/install_isolated/lib/x86_64", 0x7ffd678355d0) = -1 ENOENT (No such file or directory)                      
open("/home/bot/dev/dr_ros/install_isolated/lib/libpthread.so.0", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)       
stat("/home/bot/dev/dr_ros/install_isolated/lib", {st_mode=S_IFDIR|0755, st_size=20480, ...}) = 0                                     
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3                                                                                            
fstat(3, {st_mode=S_IFREG|0644, st_size=248120, ...}) = 0                                                                                                 
mmap(NULL, 248120, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8f4ea20000                                                               
close(3)                                = 0                                                                                 
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)                                                           
open("/lib/x86_64-linux-gnu/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3                                                                                         
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0Pa\0\0\0\0\0\0"..., 832) = 832                                           
fstat(3, {st_mode=S_IFREG|0755, st_size=135440, ...}) = 0                                                                      
mmap(NULL, 2212936, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f4e620000                                               
mprotect(0x7f8f4e638000, 2093056, PROT_NONE) = 0                                                                                                   
mmap(0x7f8f4e837000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x17000) = 0x7f8f4e837000
mmap(0x7f8f4e839000, 13384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8f4e839000                                                               
close(3)                                = 0                                               
open("/home/bot/dev/dr_ros/install_isolated/lib/libdl.so.2", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)                                  
open("/lib/x86_64-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3                                                               
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\200\r\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=14640, ...}) = 0                    
mmap(NULL, 2109680, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f4e41c000
mprotect(0x7f8f4e41f000, 2093056, PROT_NONE) = 0                                          
mmap(0x7f8f4e61e000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f8f4e61e000
close(3)                                = 0                                
open("/home/bot/dev/dr_ros/install_isolated/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3                           
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\4\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1689360, ...}) = 0                                                                               
mmap(NULL, 3795296, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f4e07d000                                                                                  
mprotect(0x7f8f4e212000, 2097152, PROT_NONE) = 0                                                                                           
mmap(0x7f8f4e412000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x195000) = 0x7f8f4e412000                       
mmap(0x7f8f4e418000, 14688, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8f4e418000              
close(3)                                = 0                                                                          
open("/home/bot/dev/dr_ros/install_isolated/lib/librt.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)                     
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)                                     
open("/lib/x86_64-linux-gnu/librt.so.1", O_RDONLY|O_CLOEXEC) = 3                                                                           
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\340 \0\0\0\0\0\0"..., 832) = 832                                                
fstat(3, {st_mode=S_IFREG|0644, st_size=31744, ...}) = 0                                                                         
mmap(NULL, 2128832, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f4de75000                                       
mprotect(0x7f8f4de7c000, 2093056, PROT_NONE) = 0                                                                            
mmap(0x7f8f4e07b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f8f4e07b000                                       
close(3)                                = 0                                                                                           
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8f4ea1e000                                           
arch_prctl(ARCH_SET_FS, 0x7f8f4ea1efc0) = 0                                                                                           
mprotect(0x7f8f4e412000, 16384, PROT_READ) = 0                                                                                              
mprotect(0x7f8f4e837000, 4096, PROT_READ) = 0                                                                                                             
mprotect(0x7f8f4e07b000, 4096, PROT_READ) = 0                                                                                   
mprotect(0x7f8f4e61e000, 4096, PROT_READ) = 0                                                                               
mprotect(0x7f8f4ea60000, 4096, PROT_READ) = 0                                                                                             
munmap(0x7f8f4ea20000, 248120)          = 0                                                                                                                   
set_tid_address(0x7f8f4ea1f290)         = 38                                                                                    
set_robust_list(0x7f8f4ea1f2a0, 24)     = 0                                                                                    
rt_sigaction(SIGRTMIN, {sa_handler=0x7f8f4e625bd0, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7f8f4e6310c0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f8f4e625c60, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f8f4e6310c0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0                                                            
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0                                                                                                  
futex(0x7f8f4e61f0a8, FUTEX_WAKE_PRIVATE, 2147483647) = 0                                 
brk(NULL)                               = 0x1832000                                                                     
brk(0x1853000)                          = 0x1853000                                                              
open("/home/bot/dev/dr_ros/install_isolated/lib/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3                                         
fstat(3, {st_mode=S_IFREG|0644, st_size=248120, ...}) = 0                   
mmap(NULL, 248120, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8f4ea20000                         
close(3)                                = 0                                               
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)                                  
open("/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\335\0\0\0\0\0\0"..., 832) = 832                               
fstat(3, {st_mode=S_IFREG|0644, st_size=1312544, ...}) = 0                     
mmap(NULL, 6124712, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8f4d89d000
mprotect(0x7f8f4d9c9000, 2097152, PROT_NONE) = 0
mmap(0x7f7c8fab8000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x6000) = 0x7f7c8fab8000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f7c9045b000
arch_prctl(ARCH_SET_FS, 0x7f7c9045bd00) = 0                                 
mprotect(0x7f7c8fe4f000, 16384, PROT_READ) = 0                              
mprotect(0x7f7c90274000, 4096, PROT_READ) = 0                     
mprotect(0x7f7c8fab8000, 4096, PROT_READ) = 0                                             
mprotect(0x7f7c9005b000, 4096, PROT_READ) = 0
mprotect(0x7f7c9049d000, 4096, PROT_READ) = 0
munmap(0x7f7c9045d000, 248120)          = 0
set_tid_address(0x7f7c9045bfd0)         = 45                               
set_robust_list(0x7f7c9045bfe0, 24)     = 0
rt_sigaction(SIGRTMIN, {sa_handler=0x7f7c90062bd0, sa_mask=[], sa_flags=SA_RESTORER|SA_SIGINFO, sa_restorer=0x7f7c9006e0c0}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f7c90062c60, sa_mask=[], sa_flags=SA_RESTORER|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f7c9006e0c0}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0   
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0               
futex(0x7f7c9005c0a8, FUTEX_WAKE_PRIVATE, 2147483647) = 0
brk(NULL)                               = 0xee6000                                                                
brk(0xf07000)                           = 0xf07000                                                            
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=248120, ...}) = 0                      
mmap(NULL, 248120, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f7c9045d000
close(3)                                = 0                                              
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", O_RDONLY|O_CLOEXEC) = 3               
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\335\0\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0644, st_size=1312544, ...}) = 0                                                       
mmap(NULL, 6124712, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f7c8f2da000
mprotect(0x7f7c8f406000, 2097152, PROT_NONE) = 0                               
mmap(0x7f7c8f606000, 86016, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12c000) = 0x7f7c8f606000
mmap(0x7f7c8f61b000, 2712744, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f7c8f61b000
close(3)                                = 0               
munmap(0x7f7c9045d000, 248120)          = 0                                               
getpid()                                = 45    
open("/proc/modules", O_RDONLY)         = 3                                                                         
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0                                                          
read(3, "nvidia_uvm 663552 0 - Live 0xfff"..., 1024) = 1024
read(3, "f_conntrack 114688 6 nf_nat_masq"..., 1024) = 1024                    
read(3, " 0 - Live 0xffffffffc0427000\nnvi"..., 1024) = 1024    
read(3, "d_hda_codec_realtek,snd_hda_code"..., 1024) = 1024                             
close(3)                                = 0             
open("/proc/driver/nvidia/params", O_RDONLY) = 3                                          
fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "Mobile: 4294967295\nResmanDebugLe"..., 1024) = 491                                                      
close(3)                                = 0
stat("/dev/nvidiactl", {st_mode=S_IFCHR|0666, st_rdev=makedev(195, 255), ...}) = 0       
open("/dev/nvidiactl", O_RDWR)          = -1 EPERM (Operation not permitted)
open("/dev/nvidiactl", O_RDONLY)        = -1 EPERM (Operation not permitted)
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
write(1, "Failed to initialize NVML: Unkno"..., 41Failed to initialize NVML: Unknown Error
) = 41                                       
exit_group(255)                         = ?  
+++ exited with 255 +++
  • docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all
docker: Error response from daemon: OCI runtime create failed: container_linux.go:344: starting container process caused "process_linux.go:424: container init caused \"process_linux.go:407: running prestart hook 1 caused \\\"error running hook: exit status 1, stdout: , stderr: exec command: [/usr/bin/nvidia-container-cli --load-kmods --debug=/var/log/nvidia-container-runtime-hook.log configure --ldconfig=@/sbin/ldconfig --device=all --utility --pid=5383 /home/bot/docker/overlay2/c8eddfaa444c2ec572944e7ba3c3eea2a5db6dab69c9bdcdb5d5123124d57412/merged]\\\\nnvidia-container-cli: mount error: file creation failed: /home/bot/docker/overlay2/c8eddfaa444c2ec572944e7ba3c3eea2a5db6dab69c9bdcdb5d5123124d57412/merged/usr/bin/nvidia-smi: file exists\\\\n\\\"\"": unknown.

-- WARNING, the following logs are for debugging purposes only --

I0222 04:19:31.013186 6804 nvc.c:281] initializing library context (version=1.0.1, build=038fb92d00c94f97d61492d4ed1f82e981129b74)
I0222 04:19:31.013281 6804 nvc.c:255] using root /
I0222 04:19:31.013297 6804 nvc.c:256] using ldcache /etc/ld.so.cache
I0222 04:19:31.013310 6804 nvc.c:257] using unprivileged user 65534:65534
I0222 04:19:31.014520 6808 nvc.c:191] loading kernel module nvidia
I0222 04:19:31.014910 6808 nvc.c:203] loading kernel module nvidia_uvm
I0222 04:19:31.015063 6808 nvc.c:211] loading kernel module nvidia_modeset
I0222 04:19:31.015584 6809 driver.c:133] starting driver service
I0222 04:19:31.053406 6804 nvc_container.c:364] configuring container with 'utility supervised'
I0222 04:19:31.053610 6804 nvc_container.c:384] setting pid to 6774
I0222 04:19:31.053621 6804 nvc_container.c:385] setting rootfs to /home/bot/docker/overlay2/e99e2f3211230bd58966a98c6e83ccbdb5672cdb8d292502e240fe5e4182a980/merged
I0222 04:19:31.053629 6804 nvc_container.c:386] setting owner to 0:0
I0222 04:19:31.053636 6804 nvc_container.c:387] setting bins directory to /usr/bin
I0222 04:19:31.053642 6804 nvc_container.c:388] setting libs directory to /usr/lib/x86_64-linux-gnu
I0222 04:19:31.053649 6804 nvc_container.c:389] setting libs32 directory to /usr/lib/i386-linux-gnu
I0222 04:19:31.053656 6804 nvc_container.c:390] setting cudart directory to /usr/local/cuda
I0222 04:19:31.053662 6804 nvc_container.c:391] setting ldconfig to @/sbin/ldconfig (host relative)
I0222 04:19:31.053669 6804 nvc_container.c:392] setting mount namespace to /proc/6774/ns/mnt
I0222 04:19:31.053676 6804 nvc_container.c:394] setting devices cgroup to /sys/fs/cgroup/devices/docker/a31e5b4fb3a5949ecb11c5c013c2cbdb5d91ec9cccc6054717372f552a7d4ded
I0222 04:19:31.053685 6804 nvc_info.c:433] requesting driver information with ''
I0222 04:19:31.054366 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-tls.so.384.130
I0222 04:19:31.054440 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ptxjitcompiler.so.384.130
I0222 04:19:31.054504 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-opencl.so.384.130
I0222 04:19:31.054563 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so.384.130
I0222 04:19:31.054600 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-glsi.so.384.130
I0222 04:19:31.054635 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.384.130
I0222 04:19:31.054667 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-fatbinaryloader.so.384.130
I0222 04:19:31.054703 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so.384.130
I0222 04:19:31.054736 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/libnvidia-compiler.so.384.130
I0222 04:19:31.054809 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-cfg.so.384.130
I0222 04:19:31.054866 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libnvcuvid.so.384.130
I0222 04:19:31.055244 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libcuda.so.384.130
I0222 04:19:31.055491 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so.384.130
I0222 04:19:31.055553 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libGLESv2_nvidia.so.384.130
I0222 04:19:31.055611 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libGLESv1_CM_nvidia.so.384.130
I0222 04:19:31.055669 6804 nvc_info.c:147] selecting /usr/lib/x86_64-linux-gnu/nvidia/current/libEGL_nvidia.so.384.130
W0222 04:19:31.055722 6804 nvc_info.c:298] missing library libvdpau_nvidia.so
W0222 04:19:31.055735 6804 nvc_info.c:298] missing library libnvidia-encode.so
W0222 04:19:31.055745 6804 nvc_info.c:298] missing library libnvidia-fbc.so
W0222 04:19:31.055752 6804 nvc_info.c:298] missing library libnvidia-ifr.so
W0222 04:19:31.055758 6804 nvc_info.c:302] missing compat32 library libnvidia-ml.so
W0222 04:19:31.055774 6804 nvc_info.c:302] missing compat32 library libnvidia-cfg.so
W0222 04:19:31.055782 6804 nvc_info.c:302] missing compat32 library libcuda.so
W0222 04:19:31.055791 6804 nvc_info.c:302] missing compat32 library libnvidia-opencl.so
W0222 04:19:31.055800 6804 nvc_info.c:302] missing compat32 library libnvidia-ptxjitcompiler.so
W0222 04:19:31.055809 6804 nvc_info.c:302] missing compat32 library libnvidia-fatbinaryloader.so
W0222 04:19:31.055816 6804 nvc_info.c:302] missing compat32 library libnvidia-compiler.so
W0222 04:19:31.055822 6804 nvc_info.c:302] missing compat32 library libvdpau_nvidia.so
W0222 04:19:31.055829 6804 nvc_info.c:302] missing compat32 library libnvidia-encode.so
W0222 04:19:31.055835 6804 nvc_info.c:302] missing compat32 library libnvcuvid.so
W0222 04:19:31.055842 6804 nvc_info.c:302] missing compat32 library libnvidia-eglcore.so
W0222 04:19:31.055849 6804 nvc_info.c:302] missing compat32 library libnvidia-glcore.so
W0222 04:19:31.055855 6804 nvc_info.c:302] missing compat32 library libnvidia-tls.so
W0222 04:19:31.055862 6804 nvc_info.c:302] missing compat32 library libnvidia-glsi.so
W0222 04:19:31.055868 6804 nvc_info.c:302] missing compat32 library libnvidia-fbc.so
W0222 04:19:31.055875 6804 nvc_info.c:302] missing compat32 library libnvidia-ifr.so
W0222 04:19:31.055882 6804 nvc_info.c:302] missing compat32 library libGLX_nvidia.so
W0222 04:19:31.055888 6804 nvc_info.c:302] missing compat32 library libEGL_nvidia.so
W0222 04:19:31.055895 6804 nvc_info.c:302] missing compat32 library libGLESv2_nvidia.so
W0222 04:19:31.055902 6804 nvc_info.c:302] missing compat32 library libGLESv1_CM_nvidia.so
I0222 04:19:31.056160 6804 nvc_info.c:228] selecting /usr/lib/nvidia/current/nvidia-smi
I0222 04:19:31.056197 6804 nvc_info.c:228] selecting /usr/lib/nvidia/current/nvidia-debugdump
I0222 04:19:31.056215 6804 nvc_info.c:228] selecting /usr/bin/nvidia-persistenced
W0222 04:19:31.056372 6804 nvc_info.c:324] missing binary nvidia-cuda-mps-control
W0222 04:19:31.056379 6804 nvc_info.c:324] missing binary nvidia-cuda-mps-server
I0222 04:19:31.056402 6804 nvc_info.c:365] listing device /dev/nvidiactl
I0222 04:19:31.056409 6804 nvc_info.c:365] listing device /dev/nvidia-uvm
I0222 04:19:31.056416 6804 nvc_info.c:365] listing device /dev/nvidia-uvm-tools
I0222 04:19:31.056422 6804 nvc_info.c:365] listing device /dev/nvidia-modeset
W0222 04:19:31.056445 6804 nvc_info.c:273] missing ipc /var/run/nvidia-persistenced/socket
W0222 04:19:31.056460 6804 nvc_info.c:273] missing ipc /tmp/nvidia-mps
I0222 04:19:31.056467 6804 nvc_info.c:489] requesting device information with ''
I0222 04:19:31.062324 6804 nvc_info.c:519] listing device /dev/nvidia0 (GPU-a11d518d-029c-38a6-dcb9-fbbbe818c226 at 00000000:01:00.0)
I0222 04:19:31.062379 6804 nvc_mount.c:252] mounting tmpfs at /home/bot/docker/overlay2/e99e2f3211230bd58966a98c6e83ccbdb5672cdb8d292502e240fe5e4182a980/merged/proc/driver/nvidia
I0222 04:19:31.078615 6804 nvc.c:318] shutting down library context
I0222 04:19:31.079014 6809 driver.c:192] terminating driver service
I0222 04:19:31.090407 6804 driver.c:233] driver service terminated successfully

docker build

default runtime

/etc/docker/daemon.json:

"default-runtime": "nvidia",

Trouble-shooting

dial unix /var/lib/nvidia-docker/nvidia-docker.sock: connect: no such file or directory

/var/log/upstart/nvidia-docker.log,发现是localhost没被正确解析:

/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:40 Loading NVIDIA unified memory
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:40 Loading NVIDIA management library
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:40 Discovering GPU devices
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:44 Provisioning volumes at /var/lib/nvidia-docker/volumes
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:44 Serving plugin API at /var/lib/nvidia-docker
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:44 Serving remote API at localhost:3476
/usr/bin/nvidia-docker-plugin | 2017/10/10 17:29:45 Error: listen tcp: lookup localhost on 8.8.8.8:53: no such host

nvcc not found

Use devel version

Failed to initialize NVML: Unknown Error

Set NVIDIA_VISIBLE_DEVICES

mount error: file creation failed: /var/lib/docker/overlay2/xxx/merged/usr/bin/nvidia-smi: file exists

Should not install nvidia driver inside the Docker image.

No CUDA inside container

-e NVIDIA_DRIVER_CAPABILITIES=compute,utility

unknown flag: --gpus

sudo apt install -y --reinstall docker-ce docker-ce-cli nvidia-container-toolkit

"--gpus" requires API version 1.40, but the Docker daemon API version is 1.39