From e3bb89a2289e41321f4e3a3905982464279d0fec Mon Sep 17 00:00:00 2001 From: Nick White Date: Mon, 31 Jan 2022 12:16:33 +0000 Subject: update traintess and generate_line_box --- generate_line_box.py | 2 -- traintess.sh | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/generate_line_box.py b/generate_line_box.py index 8b7ec74..fe3e26a 100755 --- a/generate_line_box.py +++ b/generate_line_box.py @@ -7,8 +7,6 @@ from PIL import Image import sys import codecs -sys.stdout = codecs.getwriter('utf8')(sys.stdout) -sys.stderr = codecs.getwriter('utf8')(sys.stderr) # # command line arguments diff --git a/traintess.sh b/traintess.sh index 0841899..9f4174d 100755 --- a/traintess.sh +++ b/traintess.sh @@ -1,5 +1,7 @@ #!/bin/sh usage="Usage: $0 gtdir gtevaldir oldtraineddata trainingname" +# add timestamps for log +# ensure exit 1 is gone where it caused failures test $# -ne 4 && echo "$usage" && exit 1 @@ -83,7 +85,7 @@ merge_unicharsets "$name/gt/unicharset" "$name/orig/orig.lstm-unicharset" "$name echo "Making starter training" mkdir -p "$name/starter" -curl -L -f 'https://github.com/tesseract-ocr/langdata_lstm/raw/master/radical-stroke.txt' > "$name/starter/radical-stroke.txt" || exit 1 +curl -L -f 'https://github.com/tesseract-ocr/langdata_lstm/raw/main/radical-stroke.txt' > "$name/starter/radical-stroke.txt" || exit 1 combine_lang_model --input_unicharset "$name/unicharset" --script_dir "$name/starter" --output_dir "$name/starter" --lang "$name" || exit 1 mkdir -p "$name/checkpoint" -- cgit v1.2.1-24-ge1ad