Kraken Training - bertsky/mkn-kurrent-gt GitHub Wiki

Kraken does an implicit split if not passing an explicit one
But this is not accessible afterwards, making evaluation impossible.
Also, Kraken cannot cope with process substitution, because it seeks in the input files.
Moreover, for handwriting, we need to specificy a non-default (wider+deeper) topology.

#!/bin/bash
# pip install albumentations
modelname=${1:-herrnhut-kurrent}
mkdir -p $modelname.kraken/
vallist=$(mktemp -u)
trainlist=$(mktemp -u)
find val -type l -printf "%H/%l\n" > $vallist
find train -type l -printf "%H/%l\n" > $trainlist
IFS=$'\n'
options=(
    -d cuda:0
    # extra slow learning rate (huge dataset)
    -r 0.0001
    -o $modelname.kraken/$modelname
    --debug
    --log-dir $modelname.kraken
    -f page
    # handwriting needs deeper and wider nets
    -s '[1,120,0,1 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,13,32 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 Mp2,2 Cr3,9,64 Do0.1,2 S1(1x0)1,3 Lbx200 Do0.1,2 Lbx200 Do.1,2 Lbx200 Do]'
    # augmentation or not?
    --augment # --no-augment
    --workers 2
    # does not work (Kraken tries to seek these files)
    #-e <(find val -type l -printf "%H/%l\n")
    #-t <(find train -type l -printf "%H/%l\n")
    -e $vallist -t $trainlist
)


ketos train "${options[@]}"