diff options
author | Nick White <git@njw.name> | 2021-02-05 17:15:51 +0000 |
---|---|---|
committer | Nick White <git@njw.name> | 2021-02-05 17:15:51 +0000 |
commit | 11470933e4fd379b4aefa4e2bab33662a72791c2 (patch) | |
tree | 8607e7739989ff63032b9ce10a8bf8553ecc6eb6 /cmd/postprocess-bythresh/main.go | |
parent | 3e7da751b3ca917adb79674eac4ef2a3267e3984 (diff) | |
parent | a8c7481f0dc02bbda3b3a07091e9d61f6eb728b2 (diff) |
Merge branch 'master' of ssh://ssh.phx.nearlyfreespeech.net/home/public/bookpipeline
Diffstat (limited to 'cmd/postprocess-bythresh/main.go')
-rw-r--r-- | cmd/postprocess-bythresh/main.go | 71 |
1 files changed, 32 insertions, 39 deletions
diff --git a/cmd/postprocess-bythresh/main.go b/cmd/postprocess-bythresh/main.go index 37b77e7..5bdb839 100644 --- a/cmd/postprocess-bythresh/main.go +++ b/cmd/postprocess-bythresh/main.go @@ -19,7 +19,6 @@ import ( //TO DO: make writetofile return an error and handle that accordingly // potential TO DO: add text versions where footer is cropped on odd/even pages only - // the trimblanks function trims the blank lines from a text input func trimblanks(hocrfile string) string { @@ -50,7 +49,7 @@ func dehyphenateString(in string) string { words := strings.Split(line, " ") last := words[len(words)-1] // the - 2 here is to account for a trailing newline and counting from zero - if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { + if len(last) > 0 && last[len(last)-1] == '-' && i < len(lines)-2 { nextwords := strings.Split(lines[i+1], " ") if len(nextwords) > 0 { line = line[0:len(line)-1] + nextwords[0] @@ -66,17 +65,15 @@ func dehyphenateString(in string) string { return strings.Join(newlines, " ") } - // the fullcrop function takes a text input and crops the first and the last line (if text is at least 2 lines long) func fullcrop(noblanks string) string { - alllines := strings.Split(noblanks, "\n") - + if len(alllines) <= 2 { - return "" - } else { - return strings.Join(alllines[1:len(alllines)-2], "\n") + return "" + } else { + return strings.Join(alllines[1:len(alllines)-2], "\n") } } @@ -132,7 +129,6 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, var killheadtxt string var footkilltxt string - hocrfilepath := filepath.Join(bookdirectory, hocrfilename) confpath := filepath.Join(bookdirectory, "conf") @@ -165,18 +161,16 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, if err != nil { log.Fatal(err) } - - + trimbest := trimblanks(hocrfiletext) - + alltxt = dehyphenateString(trimbest) - + croptxt = dehyphenateString(fullcrop(trimbest)) - + killheadtxt = dehyphenateString(headcrop(trimbest)) - + footkilltxt = dehyphenateString(footcrop(trimbest)) - } return alltxt, croptxt, killheadtxt, footkilltxt @@ -185,7 +179,7 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, // the writetofile function takes a directory, filename and text input and creates a text file within the bookdirectory from them. func writetofile(bookdirectory, textfilebase, txt string) error { alltxtfile := filepath.Join(bookdirectory, textfilebase) - + file, err := os.Create(alltxtfile) if err != nil { return fmt.Errorf("Error opening file %s: %v", alltxtfile, err) @@ -194,7 +188,7 @@ func writetofile(bookdirectory, textfilebase, txt string) error { if _, err := file.WriteString(txt); err != nil { log.Println(err) } -return err + return err } @@ -215,7 +209,7 @@ func main() { bookdirectory := flag.Arg(0) confthreshstring := strconv.Itoa(*confthresh) - + fmt.Println("Postprocessing", bookdirectory, "with threshold", *confthresh) bestpath := filepath.Join(bookdirectory, "best") @@ -239,32 +233,31 @@ func main() { crop = crop + " " + croptxt killhead = killhead + " " + killheadtxt killfoot = killfoot + " " + footkilltxt - + } } - - - bookname:= filepath.Base(bookdirectory) - b := bookname + "_" + confthreshstring - err1 := writetofile(bookdirectory, b + "_all.txt", all) - if err1 != nil { + bookname := filepath.Base(bookdirectory) + b := bookname + "_" + confthreshstring + + err1 := writetofile(bookdirectory, b+"_all.txt", all) + if err1 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err1) - } - - err2 := writetofile(bookdirectory, b + "_crop.txt", crop) - if err2 != nil { + } + + err2 := writetofile(bookdirectory, b+"_crop.txt", crop) + if err2 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err2) - } - - err3 := writetofile(bookdirectory, b + "_nohead.txt", killhead) - if err3 != nil { + } + + err3 := writetofile(bookdirectory, b+"_nohead.txt", killhead) + if err3 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err3) - } - - err4 := writetofile(bookdirectory, b + "_nofoot.txt", killfoot) - if err4 != nil { + } + + err4 := writetofile(bookdirectory, b+"_nofoot.txt", killfoot) + if err4 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err4) - } + } } |