Kraken Training - bertsky/mkn-kurrent-gt GitHub Wiki
- Kraken does an implicit split if not passing an explicit one
- But this is not accessible afterwards, making evaluation impossible.
- Also, Kraken cannot cope with process substitution, because it seeks in the input files.
- Moreover, for handwriting, we need to specificy a non-default (wider+deeper) topology.
#!/bin/bash
# pip install albumentations
modelname=${1:-herrnhut-kurrent}
mkdir -p $modelname.kraken/
vallist=$(mktemp -u)
trainlist=$(mktemp -u)
find val -type l -printf "%H/%l\n" > $vallist
find train -type l -printf "%H/%l\n" > $trainlist
IFS=$'\n'
options=(
-d cuda:0
# extra slow learning rate (huge dataset)
-r 0.0001
-o $modelname.kraken/$modelname
--debug
--log-dir $modelname.kraken
-f page
# handwriting needs deeper and wider nets
-s '[1,120,0,1 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 S1(1x0)1,3 Lbx200 Do0.1,2 Lbx200 Do.1,2 Lbx200 Do]'
# augmentation or not?
--augment # --no-augment
--workers 2
# does not work (Kraken tries to seek these files)
#-e <(find val -type l -printf "%H/%l\n")
#-t <(find train -type l -printf "%H/%l\n")
-e $vallist -t $trainlist
)
ketos train "${options[@]}"