diff options
-rw-r--r-- | cmd/getbests/main.go | 4 | ||||
-rw-r--r-- | cmd/postprocess-bythresh/main.go | 71 | ||||
-rw-r--r-- | internal/pipeline/pipeline.go | 4 | ||||
-rw-r--r-- | internal/pipeline/put.go | 2 | ||||
-rw-r--r-- | local.go | 6 |
5 files changed, 40 insertions, 47 deletions
diff --git a/cmd/getbests/main.go b/cmd/getbests/main.go index 9eca0d8..c1ee50d 100644 --- a/cmd/getbests/main.go +++ b/cmd/getbests/main.go @@ -62,8 +62,8 @@ func main() { log.Println("Downloading all best files found") for _, i := range objs { parts := strings.Split(i, "/") - if parts[len(parts) - 1] == "best" { - err = conn.Download(conn.WIPStorageId(), i, parts[0] + "-best") + if parts[len(parts)-1] == "best" { + err = conn.Download(conn.WIPStorageId(), i, parts[0]+"-best") if err != nil { log.Fatalln("Failed to download file", i, err) } diff --git a/cmd/postprocess-bythresh/main.go b/cmd/postprocess-bythresh/main.go index 37b77e7..5bdb839 100644 --- a/cmd/postprocess-bythresh/main.go +++ b/cmd/postprocess-bythresh/main.go @@ -19,7 +19,6 @@ import ( //TO DO: make writetofile return an error and handle that accordingly // potential TO DO: add text versions where footer is cropped on odd/even pages only - // the trimblanks function trims the blank lines from a text input func trimblanks(hocrfile string) string { @@ -50,7 +49,7 @@ func dehyphenateString(in string) string { words := strings.Split(line, " ") last := words[len(words)-1] // the - 2 here is to account for a trailing newline and counting from zero - if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { + if len(last) > 0 && last[len(last)-1] == '-' && i < len(lines)-2 { nextwords := strings.Split(lines[i+1], " ") if len(nextwords) > 0 { line = line[0:len(line)-1] + nextwords[0] @@ -66,17 +65,15 @@ func dehyphenateString(in string) string { return strings.Join(newlines, " ") } - // the fullcrop function takes a text input and crops the first and the last line (if text is at least 2 lines long) func fullcrop(noblanks string) string { - alllines := strings.Split(noblanks, "\n") - + if len(alllines) <= 2 { - return "" - } else { - return strings.Join(alllines[1:len(alllines)-2], "\n") + return "" + } else { + return strings.Join(alllines[1:len(alllines)-2], "\n") } } @@ -132,7 +129,6 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, var killheadtxt string var footkilltxt string - hocrfilepath := filepath.Join(bookdirectory, hocrfilename) confpath := filepath.Join(bookdirectory, "conf") @@ -165,18 +161,16 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, if err != nil { log.Fatal(err) } - - + trimbest := trimblanks(hocrfiletext) - + alltxt = dehyphenateString(trimbest) - + croptxt = dehyphenateString(fullcrop(trimbest)) - + killheadtxt = dehyphenateString(headcrop(trimbest)) - + footkilltxt = dehyphenateString(footcrop(trimbest)) - } return alltxt, croptxt, killheadtxt, footkilltxt @@ -185,7 +179,7 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string, // the writetofile function takes a directory, filename and text input and creates a text file within the bookdirectory from them. func writetofile(bookdirectory, textfilebase, txt string) error { alltxtfile := filepath.Join(bookdirectory, textfilebase) - + file, err := os.Create(alltxtfile) if err != nil { return fmt.Errorf("Error opening file %s: %v", alltxtfile, err) @@ -194,7 +188,7 @@ func writetofile(bookdirectory, textfilebase, txt string) error { if _, err := file.WriteString(txt); err != nil { log.Println(err) } -return err + return err } @@ -215,7 +209,7 @@ func main() { bookdirectory := flag.Arg(0) confthreshstring := strconv.Itoa(*confthresh) - + fmt.Println("Postprocessing", bookdirectory, "with threshold", *confthresh) bestpath := filepath.Join(bookdirectory, "best") @@ -239,32 +233,31 @@ func main() { crop = crop + " " + croptxt killhead = killhead + " " + killheadtxt killfoot = killfoot + " " + footkilltxt - + } } - - - bookname:= filepath.Base(bookdirectory) - b := bookname + "_" + confthreshstring - err1 := writetofile(bookdirectory, b + "_all.txt", all) - if err1 != nil { + bookname := filepath.Base(bookdirectory) + b := bookname + "_" + confthreshstring + + err1 := writetofile(bookdirectory, b+"_all.txt", all) + if err1 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err1) - } - - err2 := writetofile(bookdirectory, b + "_crop.txt", crop) - if err2 != nil { + } + + err2 := writetofile(bookdirectory, b+"_crop.txt", crop) + if err2 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err2) - } - - err3 := writetofile(bookdirectory, b + "_nohead.txt", killhead) - if err3 != nil { + } + + err3 := writetofile(bookdirectory, b+"_nohead.txt", killhead) + if err3 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err3) - } - - err4 := writetofile(bookdirectory, b + "_nofoot.txt", killfoot) - if err4 != nil { + } + + err4 := writetofile(bookdirectory, b+"_nofoot.txt", killfoot) + if err4 != nil { log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err4) - } + } } diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go index f6598fd..280e4d2 100644 --- a/internal/pipeline/pipeline.go +++ b/internal/pipeline/pipeline.go @@ -640,8 +640,8 @@ func ProcessBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string conn.Log("Failed to get logs ", err2) logs = "" } - msg := fmt.Sprintf("To: %s\r\nFrom: %s\r\n" + - "Subject: [bookpipeline] Error in wipeonly / preprocessing queue with %s\r\n\r\n" + + msg := fmt.Sprintf("To: %s\r\nFrom: %s\r\n"+ + "Subject: [bookpipeline] Error in wipeonly / preprocessing queue with %s\r\n\r\n"+ " Fail message: %s\r\nFull log:\r\n%s\r\n", ms.to, ms.from, bookname, err, logs) host := fmt.Sprintf("%s:%s", ms.server, ms.port) diff --git a/internal/pipeline/put.go b/internal/pipeline/put.go index 8ada41f..4b38ea5 100644 --- a/internal/pipeline/put.go +++ b/internal/pipeline/put.go @@ -7,8 +7,8 @@ package pipeline import ( "fmt" "image" - _ "image/png" _ "image/jpeg" + _ "image/png" "os" "path/filepath" ) @@ -27,7 +27,7 @@ const storageId = "storage" type LocalConn struct { // these should be set before running Init(), or left to defaults TempDir string - Logger *log.Logger + Logger *log.Logger } // MinimalInit does the bare minimum initialisation @@ -184,12 +184,12 @@ func (a *LocalConn) DelFromQueue(url string, handle string) error { // store the joining of part before and part after handle var complete string - if len(s) >= len(handle) + 1 { + if len(s) >= len(handle)+1 { if i > 0 { complete = s[:i] } // the '+1' is for the newline character - complete += s[i + len(handle) + 1:] + complete += s[i+len(handle)+1:] } f, err := os.Create(filepath.Join(a.TempDir, url)) |