How to Create Imagenet ILSVRC2012 LMDB - rioyokotalab/caffe GitHub Wiki
At first , you have to download Imagenet dataset
#!/bin/bash
# Development kit (Task 1 & 2), 2.5MB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_devkit_t12.tar.gz
md5sum ILSVRC2012_devkit_t12.tar.gz
# Development kit (Task 3), 22MB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_devkit_t3.tar.gz
md5sum ILSVRC2012_devkit_t3.tar.gz
# Training images (Task 1 & 2), 138GB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar
md5sum ILSVRC2012_img_train.tar
# Training images (Task 3), 728MB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train_t3.tar
md5sum ILSVRC2012_img_train_t3.tar
# Validation images (all tasks), 6.3GB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar
md5sum ILSVRC2012_img_val.tar
# Test images (all tasks), 13GB
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_test.tar
md5sum ILSVRC2012_img_test.tar
md5sum is command to check correctness of download.
Especially , be careful to use tar for training image file you have to use following script to unzip after unzip ILSVRC2012_img_train.tar
#!/bin/sh
files="./n*.tar"
for filepath in ${files}
do
filename=`basename ${filepath} .tar`
mkdir ${filename}
tar -xf ${filename}.tar -C ${filename}
done
After that , by using caffe script ,Let's create a lmdb
- get label data
$ cd $CAFFE_HOME/data/ilsvrc12/
$ ./get_ilsvrc_aux.sh
det_synset_words.txt imagenet.bet.pickle synsets.txt test.txt val.txt
get_ilsvrc_aux.sh imagenet_mean.binaryproto synset_words.txt train.txt
- edit $CAFFE_HOME/examples/imagenet/create_imagenet.sh
#!/usr/bin/env sh
# Create the imagenet lmdb inputs
# N.B. set the path to the imagenet train + val data dirs
set -e
EXAMPLE=examples/imagenet
DATA=data/ilsvrc12
TOOLS=build/tools
TRAIN_DATA_ROOT=/path/to/imagenet/train/
VAL_DATA_ROOT=/path/to/imagenet/val/
# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
RESIZE=true
if $RESIZE; then
RESIZE_HEIGHT=256
RESIZE_WIDTH=256
else
RESIZE_HEIGHT=0
RESIZE_WIDTH=0
fi
if [ ! -d "$TRAIN_DATA_ROOT" ]; then
echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
echo "Set the TRAIN_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet training data is stored."
exit 1
fi
if [ ! -d "$VAL_DATA_ROOT" ]; then
echo "Error: VAL_DATA_ROOT is not a path to a directory: $VAL_DATA_ROOT"
echo "Set the VAL_DATA_ROOT variable in create_imagenet.sh to the path" \
"where the ImageNet validation data is stored."
exit 1
fi
echo "Creating train lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
$TRAIN_DATA_ROOT \
$DATA/train.txt \
$EXAMPLE/ilsvrc12_train_lmdb
echo "Creating val lmdb..."
GLOG_logtostderr=1 $TOOLS/convert_imageset \
--resize_height=$RESIZE_HEIGHT \
--resize_width=$RESIZE_WIDTH \
--shuffle \
$VAL_DATA_ROOT \
$DATA/val.txt \
$EXAMPLE/ilsvrc12_val_lmdb
echo "Done."
then , execute ./create_imagenet.sh