diff options
-rwxr-xr-x | traintessv5.sh (renamed from traintessv4.sh) | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/traintessv4.sh b/traintessv5.sh index 92da4be..8b95443 100755 --- a/traintessv4.sh +++ b/traintessv5.sh @@ -8,8 +8,10 @@ test $# -ne 4 && echo "$usage" && exit 1 #extra="--append_index 5" #netspec="[Lfx512 O1c1]" # This fine-tunes the existing layers (copying the existing best/eng netspec) -extra="" netspec="[1,36,0,1 Ct3,3,16 Mp3,3 Lfys64 Lfx96 Lrx96 Lfx512 O1c1]" +extra="" +# (copied from the fast/fra netspec [the best/fra one is absent from the version string]) +#netspec="[1,36,0,1 Ct3,3,16 Mp3,3 Lfys48 Lfx96 Lrx96 Lfx128 O1c1]" iterations=10000 oldtraining="$3" @@ -17,6 +19,11 @@ name="$4" mkdir -p "$name" +if test ! -f "/usr/local/share/tesseract-ocr/tessdata/eng.traineddata"; then + echo "/usr/local/share/tesseract-ocr/tessdata/eng.traineddata not found, needed for lstmf generation, bailing" + exit 1 +fi + printf 'gtdir: %s\ngtevaldir: %s\noldtraineddata: %s\ntrainingname: %s\nnetspec: %s\niterations: %s\nextra_args: %s\n' \ "$1" "$2" "$3" "$4" "$netspec" $iterations "$extra" > "$name/settings" @@ -24,14 +31,14 @@ printf 'gtdir: %s\ngtevaldir: %s\noldtraineddata: %s\ntrainingname: %s\nnetspec: echo "Copying training ground truth" mkdir -p "$name/gt" find "$1" -type f -name '*tif' -o -name '*png' -o -name '*txt' -o -name '*box' | while read i; do - n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.gt\.txt$/.txt/g'` + n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.bin\.txt$/.txt/g; s/\.gt\.txt$/.txt/g'` cp "$i" "$name/gt/$n" done echo "Copying eval ground truth" mkdir -p "$name/eval" find "$2" -type f -name '*tif' -o -name '*png' -o -name '*txt' -o -name '*box' | while read i; do - n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.gt\.txt$/.txt/g'` + n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.bin\.txt$/.txt/g; s/\.gt\.txt$/.txt/g'` cp "$i" "$name/eval/$n" done @@ -44,7 +51,7 @@ find "$name/gt" "$name/eval" -type f -name '*txt' | while read i; do test -f "$d/$b.png" && n="$b.png" test -z "$n" && echo "Skipping $i as no corresponding image found" && continue test -f "$d/$b.box" && echo "Skipping $i as box file already present" && continue - python ~/training/generate_line_box.py -i "$d/$n" -t "$i" > "$d/$b.box" || exit 1 + python ~/bigboy/othertools/generate_line_box.py -i "$d/$n" -t "$i" > "$d/$b.box" || exit 1 done echo "Making unicharset" |