summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cmd/getbests/main.go4
-rw-r--r--cmd/postprocess-bythresh/main.go71
-rw-r--r--internal/pipeline/pipeline.go4
-rw-r--r--internal/pipeline/put.go2
-rw-r--r--local.go6
5 files changed, 40 insertions, 47 deletions
diff --git a/cmd/getbests/main.go b/cmd/getbests/main.go
index 9eca0d8..c1ee50d 100644
--- a/cmd/getbests/main.go
+++ b/cmd/getbests/main.go
@@ -62,8 +62,8 @@ func main() {
log.Println("Downloading all best files found")
for _, i := range objs {
parts := strings.Split(i, "/")
- if parts[len(parts) - 1] == "best" {
- err = conn.Download(conn.WIPStorageId(), i, parts[0] + "-best")
+ if parts[len(parts)-1] == "best" {
+ err = conn.Download(conn.WIPStorageId(), i, parts[0]+"-best")
if err != nil {
log.Fatalln("Failed to download file", i, err)
}
diff --git a/cmd/postprocess-bythresh/main.go b/cmd/postprocess-bythresh/main.go
index 37b77e7..5bdb839 100644
--- a/cmd/postprocess-bythresh/main.go
+++ b/cmd/postprocess-bythresh/main.go
@@ -19,7 +19,6 @@ import (
//TO DO: make writetofile return an error and handle that accordingly
// potential TO DO: add text versions where footer is cropped on odd/even pages only
-
// the trimblanks function trims the blank lines from a text input
func trimblanks(hocrfile string) string {
@@ -50,7 +49,7 @@ func dehyphenateString(in string) string {
words := strings.Split(line, " ")
last := words[len(words)-1]
// the - 2 here is to account for a trailing newline and counting from zero
- if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 {
+ if len(last) > 0 && last[len(last)-1] == '-' && i < len(lines)-2 {
nextwords := strings.Split(lines[i+1], " ")
if len(nextwords) > 0 {
line = line[0:len(line)-1] + nextwords[0]
@@ -66,17 +65,15 @@ func dehyphenateString(in string) string {
return strings.Join(newlines, " ")
}
-
// the fullcrop function takes a text input and crops the first and the last line (if text is at least 2 lines long)
func fullcrop(noblanks string) string {
-
alllines := strings.Split(noblanks, "\n")
-
+
if len(alllines) <= 2 {
- return ""
- } else {
- return strings.Join(alllines[1:len(alllines)-2], "\n")
+ return ""
+ } else {
+ return strings.Join(alllines[1:len(alllines)-2], "\n")
}
}
@@ -132,7 +129,6 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string,
var killheadtxt string
var footkilltxt string
-
hocrfilepath := filepath.Join(bookdirectory, hocrfilename)
confpath := filepath.Join(bookdirectory, "conf")
@@ -165,18 +161,16 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string,
if err != nil {
log.Fatal(err)
}
-
-
+
trimbest := trimblanks(hocrfiletext)
-
+
alltxt = dehyphenateString(trimbest)
-
+
croptxt = dehyphenateString(fullcrop(trimbest))
-
+
killheadtxt = dehyphenateString(headcrop(trimbest))
-
+
footkilltxt = dehyphenateString(footcrop(trimbest))
-
}
return alltxt, croptxt, killheadtxt, footkilltxt
@@ -185,7 +179,7 @@ func convertselect(bookdirectory, hocrfilename string, confthresh int) (string,
// the writetofile function takes a directory, filename and text input and creates a text file within the bookdirectory from them.
func writetofile(bookdirectory, textfilebase, txt string) error {
alltxtfile := filepath.Join(bookdirectory, textfilebase)
-
+
file, err := os.Create(alltxtfile)
if err != nil {
return fmt.Errorf("Error opening file %s: %v", alltxtfile, err)
@@ -194,7 +188,7 @@ func writetofile(bookdirectory, textfilebase, txt string) error {
if _, err := file.WriteString(txt); err != nil {
log.Println(err)
}
-return err
+ return err
}
@@ -215,7 +209,7 @@ func main() {
bookdirectory := flag.Arg(0)
confthreshstring := strconv.Itoa(*confthresh)
-
+
fmt.Println("Postprocessing", bookdirectory, "with threshold", *confthresh)
bestpath := filepath.Join(bookdirectory, "best")
@@ -239,32 +233,31 @@ func main() {
crop = crop + " " + croptxt
killhead = killhead + " " + killheadtxt
killfoot = killfoot + " " + footkilltxt
-
+
}
}
-
-
- bookname:= filepath.Base(bookdirectory)
- b := bookname + "_" + confthreshstring
- err1 := writetofile(bookdirectory, b + "_all.txt", all)
- if err1 != nil {
+ bookname := filepath.Base(bookdirectory)
+ b := bookname + "_" + confthreshstring
+
+ err1 := writetofile(bookdirectory, b+"_all.txt", all)
+ if err1 != nil {
log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err1)
- }
-
- err2 := writetofile(bookdirectory, b + "_crop.txt", crop)
- if err2 != nil {
+ }
+
+ err2 := writetofile(bookdirectory, b+"_crop.txt", crop)
+ if err2 != nil {
log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err2)
- }
-
- err3 := writetofile(bookdirectory, b + "_nohead.txt", killhead)
- if err3 != nil {
+ }
+
+ err3 := writetofile(bookdirectory, b+"_nohead.txt", killhead)
+ if err3 != nil {
log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err3)
- }
-
- err4 := writetofile(bookdirectory, b + "_nofoot.txt", killfoot)
- if err4 != nil {
+ }
+
+ err4 := writetofile(bookdirectory, b+"_nofoot.txt", killfoot)
+ if err4 != nil {
log.Fatalf("Ah shit, we're going down, Nick says ABORT! %v", err4)
- }
+ }
}
diff --git a/internal/pipeline/pipeline.go b/internal/pipeline/pipeline.go
index f6598fd..280e4d2 100644
--- a/internal/pipeline/pipeline.go
+++ b/internal/pipeline/pipeline.go
@@ -640,8 +640,8 @@ func ProcessBook(msg bookpipeline.Qmsg, conn Pipeliner, process func(chan string
conn.Log("Failed to get logs ", err2)
logs = ""
}
- msg := fmt.Sprintf("To: %s\r\nFrom: %s\r\n" +
- "Subject: [bookpipeline] Error in wipeonly / preprocessing queue with %s\r\n\r\n" +
+ msg := fmt.Sprintf("To: %s\r\nFrom: %s\r\n"+
+ "Subject: [bookpipeline] Error in wipeonly / preprocessing queue with %s\r\n\r\n"+
" Fail message: %s\r\nFull log:\r\n%s\r\n",
ms.to, ms.from, bookname, err, logs)
host := fmt.Sprintf("%s:%s", ms.server, ms.port)
diff --git a/internal/pipeline/put.go b/internal/pipeline/put.go
index 8ada41f..4b38ea5 100644
--- a/internal/pipeline/put.go
+++ b/internal/pipeline/put.go
@@ -7,8 +7,8 @@ package pipeline
import (
"fmt"
"image"
- _ "image/png"
_ "image/jpeg"
+ _ "image/png"
"os"
"path/filepath"
)
diff --git a/local.go b/local.go
index 0ccc761..e5d9bef 100644
--- a/local.go
+++ b/local.go
@@ -27,7 +27,7 @@ const storageId = "storage"
type LocalConn struct {
// these should be set before running Init(), or left to defaults
TempDir string
- Logger *log.Logger
+ Logger *log.Logger
}
// MinimalInit does the bare minimum initialisation
@@ -184,12 +184,12 @@ func (a *LocalConn) DelFromQueue(url string, handle string) error {
// store the joining of part before and part after handle
var complete string
- if len(s) >= len(handle) + 1 {
+ if len(s) >= len(handle)+1 {
if i > 0 {
complete = s[:i]
}
// the '+1' is for the newline character
- complete += s[i + len(handle) + 1:]
+ complete += s[i+len(handle)+1:]
}
f, err := os.Create(filepath.Join(a.TempDir, url))