Kraken Training - bertsky/mkn-kurrent-gt GitHub Wiki

  • Kraken does an implicit split if not passing an explicit one
  • But this is not accessible afterwards, making evaluation impossible.
  • Also, Kraken cannot cope with process substitution, because it seeks in the input files.
  • Moreover, for handwriting, we need to specificy a non-default (wider+deeper) topology.
#!/bin/bash
# pip install albumentations
modelname=${1:-herrnhut-kurrent}
mkdir -p $modelname.kraken/
vallist=$(mktemp -u)
trainlist=$(mktemp -u)
find val -type l -printf "%H/%l\n" > $vallist
find train -type l -printf "%H/%l\n" > $trainlist
IFS=$'\n'
options=(
    -d cuda:0
    # extra slow learning rate (huge dataset)
    -r 0.0001
    -o $modelname.kraken/$modelname
    --debug
    --log-dir $modelname.kraken
    -f page
    # handwriting needs deeper and wider nets
    -s '[1,120,0,1 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 S1(1x0)1,3 Lbx200 Do0.1,2 Lbx200 Do.1,2 Lbx200 Do]'
    # augmentation or not?
    --augment # --no-augment
    --workers 2
    # does not work (Kraken tries to seek these files)
    #-e <(find val -type l -printf "%H/%l\n")
    #-t <(find train -type l -printf "%H/%l\n")
    -e $vallist -t $trainlist
)


ketos train "${options[@]}"