diff options
| -rwxr-xr-x | traintessv5.sh (renamed from traintessv4.sh) | 15 | 
1 files changed, 11 insertions, 4 deletions
| diff --git a/traintessv4.sh b/traintessv5.sh index 92da4be..8b95443 100755 --- a/traintessv4.sh +++ b/traintessv5.sh @@ -8,8 +8,10 @@ test $# -ne 4 && echo "$usage" && exit 1  #extra="--append_index 5"  #netspec="[Lfx512 O1c1]"  # This fine-tunes the existing layers (copying the existing best/eng netspec) -extra=""  netspec="[1,36,0,1 Ct3,3,16 Mp3,3 Lfys64 Lfx96 Lrx96 Lfx512 O1c1]" +extra="" +# (copied from the fast/fra netspec [the best/fra one is absent from the version string]) +#netspec="[1,36,0,1 Ct3,3,16 Mp3,3 Lfys48 Lfx96 Lrx96 Lfx128 O1c1]"  iterations=10000  oldtraining="$3" @@ -17,6 +19,11 @@ name="$4"  mkdir -p "$name" +if test ! -f "/usr/local/share/tesseract-ocr/tessdata/eng.traineddata"; then +	echo "/usr/local/share/tesseract-ocr/tessdata/eng.traineddata not found, needed for lstmf generation, bailing" +	exit 1 +fi +  printf 'gtdir: %s\ngtevaldir: %s\noldtraineddata: %s\ntrainingname: %s\nnetspec: %s\niterations: %s\nextra_args: %s\n' \  	"$1" "$2" "$3" "$4" "$netspec" $iterations "$extra" > "$name/settings" @@ -24,14 +31,14 @@ printf 'gtdir: %s\ngtevaldir: %s\noldtraineddata: %s\ntrainingname: %s\nnetspec:  echo "Copying training ground truth"  mkdir -p "$name/gt"  find "$1" -type f -name '*tif' -o -name '*png' -o -name '*txt' -o -name '*box' | while read i; do -	n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.gt\.txt$/.txt/g'` +	n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.bin\.txt$/.txt/g; s/\.gt\.txt$/.txt/g'`  	cp "$i" "$name/gt/$n"  done  echo "Copying eval ground truth"  mkdir -p "$name/eval"  find "$2" -type f -name '*tif' -o -name '*png' -o -name '*txt' -o -name '*box' | while read i; do -	n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.gt\.txt$/.txt/g'` +	n=`basename "$i" | sed 's/\.bin\.png$/.png/g; s/\.bin\.txt$/.txt/g; s/\.gt\.txt$/.txt/g'`  	cp "$i" "$name/eval/$n"  done @@ -44,7 +51,7 @@ find "$name/gt" "$name/eval" -type f -name '*txt' | while read i; do  	test -f "$d/$b.png" && n="$b.png"  	test -z "$n" && echo "Skipping $i as no corresponding image found" && continue  	test -f "$d/$b.box" && echo "Skipping $i as box file already present" && continue -	python ~/training/generate_line_box.py -i "$d/$n" -t "$i" > "$d/$b.box" || exit 1 +	python ~/bigboy/othertools/generate_line_box.py -i "$d/$n" -t "$i" > "$d/$b.box" || exit 1  done  echo "Making unicharset" | 
