summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--avg-lines/html.go61
-rw-r--r--avg-lines/main.go69
-rw-r--r--aws.go (renamed from bookpipeline/aws.go)0
-rw-r--r--bucket-lines/bucket.go131
-rw-r--r--bucket-lines/main.go87
-rw-r--r--cmd/bookpipeline/main.go (renamed from bookpipeline/cmd/bookpipeline/main.go)4
-rw-r--r--cmd/booktopipeline/main.go (renamed from bookpipeline/cmd/booktopipeline/main.go)0
-rw-r--r--cmd/confgraph/main.go (renamed from bookpipeline/cmd/confgraph/main.go)2
-rw-r--r--cmd/getpipelinebook/main.go (renamed from bookpipeline/cmd/getpipelinebook/main.go)2
-rw-r--r--cmd/lspipeline/main.go (renamed from bookpipeline/cmd/lspipeline/main.go)2
-rw-r--r--cmd/mkpipeline/main.go (renamed from bookpipeline/cmd/mkpipeline/main.go)0
-rw-r--r--dehyphenate/main.go63
-rw-r--r--eeboxmltohocr/main.go135
-rw-r--r--graph.go (renamed from bookpipeline/graph.go)0
-rw-r--r--hocrtotxt/main.go30
-rw-r--r--integralimg/integralimg.go169
-rw-r--r--lib/hocr/hocr.go129
-rw-r--r--lib/hocr/lines.go131
-rw-r--r--lib/line/line.go57
-rw-r--r--lib/prob/prob.go69
-rw-r--r--pgconf/main.go30
-rw-r--r--preproc/cmd/binarize/main.go78
-rw-r--r--preproc/cmd/preproc/main.go90
-rw-r--r--preproc/cmd/preprocmulti/main.go101
-rw-r--r--preproc/cmd/wipe/main.go55
-rw-r--r--preproc/preprocmulti.go94
-rw-r--r--preproc/sauvola.go76
-rw-r--r--preproc/sauvola_test.go70
-rw-r--r--preproc/test_helpers.go53
-rw-r--r--preproc/testdata/pg1.pngbin651071 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.3_w19.pngbin19456 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.5_w19.pngbin18241 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_integralsauvola_k0.5_w41.pngbin18260 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.3_w19.pngbin19447 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.5_w19.pngbin18231 -> 0 bytes
-rw-r--r--preproc/testdata/pg1_sauvola_k0.5_w41.pngbin18275 -> 0 bytes
-rw-r--r--preproc/testdata/pg2.pngbin30803 -> 0 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.02_w5.pngbin33595 -> 0 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.05_w25.pngbin33432 -> 0 bytes
-rw-r--r--preproc/testdata/pg2_integralwipesides_t0.05_w5.pngbin14546 -> 0 bytes
-rw-r--r--preproc/util.go95
-rw-r--r--preproc/wipesides.go160
-rw-r--r--preproc/wipesides_test.go57
43 files changed, 4 insertions, 2096 deletions
diff --git a/avg-lines/html.go b/avg-lines/html.go
deleted file mode 100644
index 443cc4a..0000000
--- a/avg-lines/html.go
+++ /dev/null
@@ -1,61 +0,0 @@
-package main
-
-import (
- "fmt"
- "os"
- "path/filepath"
-
- "rescribe.xyz/go.git/lib/line"
-)
-
-func copylineimg(fn string, l line.Detail) error {
- f, err := os.Create(fn)
- if err != nil {
- return err
- }
- defer f.Close()
-
- return l.Img.CopyLineTo(f)
-}
-
-func htmlout(dir string, lines line.Details) error {
- err := os.MkdirAll(dir, 0700)
- if err != nil {
- return err
- }
-
- fn := filepath.Join(dir, "index.html")
- f, err := os.Create(fn)
- if err != nil {
- return err
- }
- defer f.Close()
-
- _, err = fmt.Fprintf(f, "<!DOCTYPE html><html><head><meta charset='UTF-8'><title></title>"+
- "<style>td {border: 1px solid #444}</style></head><body>\n<table>\n")
- if err != nil {
- return err
- }
- for _, l := range lines {
- fn = filepath.Base(l.OcrName) + "_" + l.Name + ".png"
- err = copylineimg(filepath.Join(dir, fn), l)
- if err != nil {
- return err
- }
- _, err = fmt.Fprintf(f, "<tr>\n"+
- "<td><h1>%.4f%%</h1></td>\n"+
- "<td>%s %s</td>\n"+
- "<td><img src='%s' width='100%%' /><br />%s</td>\n"+
- "</tr>\n",
- l.Avgconf, l.OcrName, l.Name, fn, l.Text)
- if err != nil {
- return err
- }
- }
- _, err = fmt.Fprintf(f, "</table>\n</body></html>\n")
- if err != nil {
- return err
- }
-
- return nil
-}
diff --git a/avg-lines/main.go b/avg-lines/main.go
deleted file mode 100644
index 14b21bd..0000000
--- a/avg-lines/main.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "os"
- "path/filepath"
- "sort"
-
- "rescribe.xyz/go.git/lib/hocr"
- "rescribe.xyz/go.git/lib/line"
- "rescribe.xyz/go.git/lib/prob"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: avg-lines [-html dir] [-nosort] [prob1] [hocr1] [prob2] [...]\n")
- fmt.Fprintf(os.Stderr, "Prints a report of the average confidence for each line, sorted\n")
- fmt.Fprintf(os.Stderr, "from worst to best.\n")
- fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n")
- fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n")
- fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n")
- fmt.Fprintf(os.Stderr, "option.\n\n")
- flag.PrintDefaults()
- }
- var html = flag.String("html", "", "Output in html format to the specified directory")
- var nosort = flag.Bool("nosort", false, "Don't sort lines by confidence")
- flag.Parse()
- if flag.NArg() < 1 {
- flag.Usage()
- os.Exit(1)
- }
-
- var err error
- lines := make(line.Details, 0)
-
- for _, f := range flag.Args() {
- var newlines line.Details
- switch ext := filepath.Ext(f); ext {
- case ".prob":
- newlines, err = prob.GetLineDetails(f)
- case ".hocr":
- newlines, err = hocr.GetLineDetails(f)
- default:
- log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f)
- continue
- }
- if err != nil {
- log.Fatal(err)
- }
-
- for _, l := range newlines {
- lines = append(lines, l)
- }
- }
-
- if *nosort == false {
- sort.Sort(lines)
- }
-
- if *html == "" {
- for _, l := range lines {
- fmt.Printf("%s %s: %.2f%%\n", l.OcrName, l.Name, l.Avgconf)
- }
- } else {
- htmlout(*html, lines)
- }
-}
diff --git a/bookpipeline/aws.go b/aws.go
index 0127d6e..0127d6e 100644
--- a/bookpipeline/aws.go
+++ b/aws.go
diff --git a/bucket-lines/bucket.go b/bucket-lines/bucket.go
deleted file mode 100644
index 9f98887..0000000
--- a/bucket-lines/bucket.go
+++ /dev/null
@@ -1,131 +0,0 @@
-package main
-
-import (
- "fmt"
- "io"
- "os"
- "path/filepath"
- "sort"
- "strconv"
-
- "rescribe.xyz/go.git/lib/line"
-)
-
-type BucketSpec struct {
- Min float64
- Name string
-}
-type BucketSpecs []BucketSpec
-
-func (b BucketSpecs) Len() int { return len(b) }
-func (b BucketSpecs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
-func (b BucketSpecs) Less(i, j int) bool { return b[i].Min < b[j].Min }
-
-type BucketStat struct {
- name string
- num int
-}
-type BucketStats []BucketStat
-
-func (b BucketStats) Len() int { return len(b) }
-func (b BucketStats) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
-func (b BucketStats) Less(i, j int) bool { return b[i].num < b[j].num }
-
-// Copies the image and text for a line into a directory based on
-// the line confidence, as defined by the buckets struct
-func bucketLine(l line.Detail, buckets BucketSpecs, dirname string) (string, error) {
- var bucket string
-
- todir := ""
- for _, b := range buckets {
- if l.Avgconf >= b.Min {
- todir = b.Name
- bucket = b.Name
- }
- }
-
- if todir == "" {
- return bucket, nil
- }
-
- avgstr := strconv.FormatFloat(l.Avgconf, 'G', -1, 64)
- if len(avgstr) > 2 {
- avgstr = avgstr[2:]
- }
-
- base := filepath.Join(dirname, todir, l.OcrName+"_"+l.Name+"_"+avgstr)
-
- err := os.MkdirAll(filepath.Join(dirname, todir), 0700)
- if err != nil {
- return bucket, err
- }
-
- f, err := os.Create(base + ".png")
- if err != nil {
- return bucket, err
- }
- defer f.Close()
-
- err = l.Img.CopyLineTo(f)
- if err != nil {
- return bucket, err
- }
-
- f, err = os.Create(base + ".txt")
- if err != nil {
- return bucket, err
- }
- defer f.Close()
-
- _, err = io.WriteString(f, l.Text)
- if err != nil {
- return bucket, err
- }
-
- return bucket, err
-}
-
-// Copies line images and text into directories based on their
-// confidence, as defined by the buckets struct, and returns
-// statistics of whire lines went in the process.
-func BucketUp(lines line.Details, buckets BucketSpecs, dirname string) (BucketStats, error) {
- var all []string
- var stats BucketStats
-
- sort.Sort(lines)
- sort.Sort(buckets)
- for _, l := range lines {
- bname, err := bucketLine(l, buckets, dirname)
- if err != nil {
- return stats, err
- }
- all = append(all, bname)
- }
-
- for _, b := range all {
- i := sort.Search(len(stats), func(i int) bool { return stats[i].name == b })
- if i == len(stats) {
- newstat := BucketStat{b, 0}
- stats = append(stats, newstat)
- i = len(stats) - 1
- }
- stats[i].num++
- }
-
- return stats, nil
-}
-
-// Prints statistics of where lines went when bucketing
-func PrintBucketStats(w io.Writer, stats BucketStats) {
- var total int
- for _, s := range stats {
- total += s.num
- }
-
- fmt.Fprintf(w, "Copied %d lines\n", total)
- fmt.Fprintf(w, "---------------------------------\n")
- sort.Sort(stats)
- for _, s := range stats {
- fmt.Fprintf(w, "Lines in %7s: %2d%%\n", s.name, 100*s.num/total)
- }
-}
diff --git a/bucket-lines/main.go b/bucket-lines/main.go
deleted file mode 100644
index 990e84c..0000000
--- a/bucket-lines/main.go
+++ /dev/null
@@ -1,87 +0,0 @@
-package main
-
-import (
- "encoding/json"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
-
- "rescribe.xyz/go.git/lib/hocr"
- "rescribe.xyz/go.git/lib/line"
- "rescribe.xyz/go.git/lib/prob"
-)
-
-func main() {
- b := BucketSpecs{
- // minimum confidence, name
- {0, "bad"},
- {0.95, "95to98"},
- {0.98, "98plus"},
- }
-
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: bucket-lines [-d dir] [-s specs.json] [hocr1] [prob1] [hocr2] [...]\n")
- fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n")
- fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n")
- fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n")
- fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n")
- fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n")
- fmt.Fprintf(os.Stderr, "option.\n")
- fmt.Fprintf(os.Stderr, "The .prob and .hocr files are assumed to be in the same directory\n")
- fmt.Fprintf(os.Stderr, "as the line's image and text files.\n\n")
- flag.PrintDefaults()
- fmt.Fprintf(os.Stderr, "\nAn example specs.json file would be the following:\n")
- fmt.Fprintf(os.Stderr, "[{\"min\": 0, \"name\": \"terrible\"}, {\"min\": 0.80, \"name\": \"ok\"}, {\"min\": 0.98, \"name\": \"great\"}]\n")
- }
- dir := flag.String("d", "buckets", "Directory to store the buckets")
- specs := flag.String("s", "", "JSON file describing specs to bucket into")
- flag.Parse()
- if flag.NArg() < 1 {
- flag.Usage()
- os.Exit(1)
- }
-
- if *specs != "" {
- js, err := ioutil.ReadFile(*specs)
- if err != nil {
- log.Fatal(err)
- }
- err = json.Unmarshal(js, &b)
- if err != nil {
- log.Fatal(err)
- }
- }
-
- var err error
- lines := make(line.Details, 0)
-
- for _, f := range flag.Args() {
- var newlines line.Details
- switch ext := filepath.Ext(f); ext {
- case ".prob":
- newlines, err = prob.GetLineDetails(f)
- case ".hocr":
- newlines, err = hocr.GetLineDetails(f)
- default:
- log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f)
- continue
- }
- if err != nil {
- log.Fatal(err)
- }
-
- for _, l := range newlines {
- lines = append(lines, l)
- }
- }
-
- stats, err := BucketUp(lines, b, *dir)
- if err != nil {
- log.Fatal(err)
- }
-
- PrintBucketStats(os.Stdout, stats)
-}
diff --git a/bookpipeline/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go
index 59ece72..f445547 100644
--- a/bookpipeline/cmd/bookpipeline/main.go
+++ b/cmd/bookpipeline/main.go
@@ -1,7 +1,5 @@
package main
-// TODO: check if images are prebinarised and if so skip multiple binarisation
-
import (
"errors"
"flag"
@@ -14,7 +12,7 @@ import (
"strings"
"time"
- "rescribe.xyz/go.git/bookpipeline"
+ "rescribe.xyz/bookpipeline"
"rescribe.xyz/go.git/lib/hocr"
"rescribe.xyz/go.git/preproc"
)
diff --git a/bookpipeline/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go
index 6d9f146..6d9f146 100644
--- a/bookpipeline/cmd/booktopipeline/main.go
+++ b/cmd/booktopipeline/main.go
diff --git a/bookpipeline/cmd/confgraph/main.go b/cmd/confgraph/main.go
index b60821e..474c0a2 100644
--- a/bookpipeline/cmd/confgraph/main.go
+++ b/cmd/confgraph/main.go
@@ -8,7 +8,7 @@ import (
"path/filepath"
"strings"
- "rescribe.xyz/go.git/bookpipeline"
+ "rescribe.xyz/bookpipeline"
"rescribe.xyz/go.git/lib/hocr"
)
diff --git a/bookpipeline/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go
index 66e3f70..9e900bf 100644
--- a/bookpipeline/cmd/getpipelinebook/main.go
+++ b/cmd/getpipelinebook/main.go
@@ -8,7 +8,7 @@ import (
"os"
"path/filepath"
- "rescribe.xyz/go.git/bookpipeline"
+ "rescribe.xyz/bookpipeline"
)
const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n"
diff --git a/bookpipeline/cmd/lspipeline/main.go b/cmd/lspipeline/main.go
index 46a1d63..0e1ebb0 100644
--- a/bookpipeline/cmd/lspipeline/main.go
+++ b/cmd/lspipeline/main.go
@@ -7,7 +7,7 @@ import (
"os/exec"
"strings"
- "rescribe.xyz/go.git/bookpipeline"
+ "rescribe.xyz/bookpipeline"
)
const usage = `Usage: lspipeline [-i key] [-n num]
diff --git a/bookpipeline/cmd/mkpipeline/main.go b/cmd/mkpipeline/main.go
index e37a56d..e37a56d 100644
--- a/bookpipeline/cmd/mkpipeline/main.go
+++ b/cmd/mkpipeline/main.go
diff --git a/dehyphenate/main.go b/dehyphenate/main.go
deleted file mode 100644
index 4393c8f..0000000
--- a/dehyphenate/main.go
+++ /dev/null
@@ -1,63 +0,0 @@
-package main
-
-import (
- "encoding/xml"
- "flag"
- "fmt"
- "io/ioutil"
- "log"
- "os"
-
- "rescribe.xyz/go.git/lib/hocr"
-)
-
-// BUGS:
-// - loses all elements not captured in hocr structure such as html headings
-// might be best to copy the header and footer separately and put the hocr in between, but would still need to ensure all elements are captured
-// - loses any formatting; doesn't need to be identical, but e.g. linebreaks after elements would be handy
-// - need to handle OcrChar
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: dehyphenate hocrin hocrout\n")
- fmt.Fprintf(os.Stderr, "Dehyphenates a hocr file.\n")
- flag.PrintDefaults()
- }
- flag.Parse()
- if flag.NArg() != 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- in, err := ioutil.ReadFile(flag.Arg(0))
- if err != nil {
- log.Fatalf("Error reading %s: %v", flag.Arg(1), err)
- }
- h, err := hocr.Parse(in)
- if err != nil {
- log.Fatal(err)
- }
-
- for i, l := range h.Lines {
- w := l.Words[len(l.Words)-1]
- if len(w.Chars) == 0 {
- if len(w.Text) > 0 && w.Text[len(w.Text) - 1] == '-' {
- h.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + h.Lines[i+1].Words[0].Text
- h.Lines[i+1].Words[0].Text = ""
- }
- } else {
- log.Printf("TODO: handle OcrChar")
- }
- }
-
- f, err := os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Error creating file %s: %v", flag.Arg(1), err)
- }
- defer f.Close()
- e := xml.NewEncoder(f)
- err = e.Encode(h)
- if err != nil {
- log.Fatalf("Error encoding XML: %v", err)
- }
-}
diff --git a/eeboxmltohocr/main.go b/eeboxmltohocr/main.go
deleted file mode 100644
index 2761cd9..0000000
--- a/eeboxmltohocr/main.go
+++ /dev/null
@@ -1,135 +0,0 @@
-package main
-
-import (
- "bufio"
- "flag"
- "fmt"
- "io"
- "log"
- "os"
- "regexp"
- "strconv"
- "strings"
-)
-
-// splitByPb is a split function for the scanner that splits by the
-// '<pb' token.
-func splitByPb(data []byte, atEOF bool) (advance int, token []byte, err error) {
- if atEOF && len(data) == 0 {
- return 0, nil, nil
- }
- if i := strings.Index(string(data[:]), "<pb"); i >= 0 {
- return i + 1, data[0:i], nil
- }
- // If we're at EOF, we have a final section, so just return the lot.
- if atEOF {
- return len(data), data, nil
- }
- // Request more data.
- return 0, nil, nil
-}
-
-type Page struct {
- number int
- text string
-}
-
-func addPage(pgs *[]Page, number int, text string) {
- added := 0
- for i, pg := range *pgs {
- if pg.number == number {
- (*pgs)[i].text = pg.text + text
- added = 1
- }
- }
- if added == 0 {
- newpg := Page{number, text}
- *pgs = append(*pgs, newpg)
- }
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: eeboxmltohocr in.xml outbase\n")
- flag.PrintDefaults()
- }
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- scanner := bufio.NewScanner(f)
-
- scanner.Split(splitByPb)
-
- var pgs []Page
-
- for scanner.Scan() {
- t := scanner.Text()
- r := regexp.MustCompile(`pb [^>]*facs="tcp:.*?:(.*?)"`).FindStringSubmatch(t)
- if len(r) <= 1 {
- continue
- }
- pgnum, err := strconv.Atoi(r[1])
- if err != nil {
- continue
- }
-
- content := t[strings.Index(t, ">")+1:]
- ungap := regexp.MustCompile(`(?s)<gap[ >].+?</gap>`).ReplaceAllString(content, "")
- unxml := regexp.MustCompile(`<.+?>`).ReplaceAllString(ungap, "")
-
- finaltxt := strings.TrimLeft(unxml, " \n")
- if len(finaltxt) == 0 {
- continue
- }
-
- addPage(&pgs, pgnum, finaltxt)
- }
-
- for _, pg := range pgs {
- fn := fmt.Sprintf("%s-%03d.hocr", flag.Arg(1), pg.number - 1)
- f, err := os.Create(fn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", fn, err)
- }
- defer f.Close()
-
- _, err = io.WriteString(f, hocrHeader + pg.text + hocrFooter)
- if err != nil {
- log.Fatalf("Could not write file %s: %v\n", fn, err)
- }
- }
-}
-
-const hocrHeader = `<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
- <head>
- <title></title>
- <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/>
- <meta name='ocr-system' content='tesseract 4.0.0' />
- <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_wconf'/>
- </head>
- <body>
- <div class='ocr_page' id='page_1' title='bbox 0 0 600 1200'>
- <div class='ocr_carea' id='block_1_1' title="bbox 0 0 600 1200">
- <p class='ocr_par' id='par_1_1' lang='lat' title="bbox 0 0 600 1200">
- <span class='ocr_line' id='line_1_1' title="bbox 0 0 600 1200"
->
- <span class='ocrx_word' id='word_1_1' title='bbox 0 0 600 1200'>`
-
-const hocrFooter = `</span>
- </span>
- </p>
- </div>
- </div>
- </body>
-</html>`
diff --git a/bookpipeline/graph.go b/graph.go
index 955abbd..955abbd 100644
--- a/bookpipeline/graph.go
+++ b/graph.go
diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go
deleted file mode 100644
index 6821a9e..0000000
--- a/hocrtotxt/main.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "os"
-
- "rescribe.xyz/go.git/lib/hocr"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n")
- fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n")
- flag.PrintDefaults()
- }
- flag.Parse()
- if flag.NArg() != 1 {
- flag.Usage()
- os.Exit(1)
- }
-
- text, err := hocr.GetText(flag.Arg(0))
- if err != nil {
- log.Fatal(err)
- }
-
- fmt.Printf("%s\n", text)
-}
diff --git a/integralimg/integralimg.go b/integralimg/integralimg.go
deleted file mode 100644
index 406ed61..0000000
--- a/integralimg/integralimg.go
+++ /dev/null
@@ -1,169 +0,0 @@
-package integralimg
-
-import (
- "image"
- "math"
-)
-
-// I is the Integral Image
-type I [][]uint64
-
-// Sq contains an Integral Image and its Square
-type WithSq struct {
- Img I
- Sq I
-}
-
-// Window is a part of an Integral Image
-type Window struct {
- topleft uint64
- topright uint64
- bottomleft uint64
- bottomright uint64
- width int
- height int
-}
-
-// ToIntegralImg creates an integral image
-func ToIntegralImg(img *image.Gray) I {
- var integral I
- var oldy, oldx, oldxy uint64
- b := img.Bounds()
- for y := b.Min.Y; y < b.Max.Y; y++ {
- newrow := []uint64{}
- for x := b.Min.X; x < b.Max.X; x++ {
- oldx, oldy, oldxy = 0, 0, 0
- if x > 0 {
- oldx = newrow[x-1]
- }
- if y > 0 {
- oldy = integral[y-1][x]
- }
- if x > 0 && y > 0 {
- oldxy = integral[y-1][x-1]
- }
- pixel := uint64(img.GrayAt(x, y).Y)
- i := pixel + oldx + oldy - oldxy
- newrow = append(newrow, i)
- }
- integral = append(integral, newrow)
- }
- return integral
-}
-
-// ToSqIntegralImg creates an integral image of the square of all
-// pixel values
-func ToSqIntegralImg(img *image.Gray) I {
- var integral I
- var oldy, oldx, oldxy uint64
- b := img.Bounds()
- for y := b.Min.Y; y < b.Max.Y; y++ {
- newrow := []uint64{}
- for x := b.Min.X; x < b.Max.X; x++ {
- oldx, oldy, oldxy = 0, 0, 0
- if x > 0 {
- oldx = newrow[x-1]
- }
- if y > 0 {
- oldy = integral[y-1][x]
- }
- if x > 0 && y > 0 {
- oldxy = integral[y-1][x-1]
- }
- pixel := uint64(img.GrayAt(x, y).Y)
- i := pixel * pixel + oldx + oldy - oldxy
- newrow = append(newrow, i)
- }
- integral = append(integral, newrow)
- }
- return integral
-}
-
-// ToAllIntegralImg creates a WithSq containing a regular and
-// squared Integral Image
-func ToAllIntegralImg(img *image.Gray) WithSq {
- var s WithSq
- s.Img = ToIntegralImg(img)
- s.Sq = ToSqIntegralImg(img)
- return s
-}
-
-
-// GetWindow gets the values of the corners of a square part of an
-// Integral Image, plus the dimensions of the part, which can
-// be used to quickly calculate the mean of the area
-func (i I) GetWindow(x, y, size int) Window {
- step := size / 2
-
- minx, miny := 0, 0
- maxy := len(i)-1
- maxx := len(i[0])-1
-
- if y > (step+1) {
- miny = y - step - 1
- }
- if x > (step+1) {
- minx = x - step - 1
- }
-
- if maxy > (y + step) {
- maxy = y + step
- }
- if maxx > (x + step) {
- maxx = x + step
- }
-
- return Window { i[miny][minx], i[miny][maxx], i[maxy][minx], i[maxy][maxx], maxx-minx, maxy-miny}
-}
-
-// GetVerticalWindow gets the values of the corners of a vertical
-// slice of an Integral Image, starting at x
-func (i I) GetVerticalWindow(x, width int) Window {
- maxy := len(i) - 1
- maxx := x + width
- if maxx > len(i[0])-1 {
- maxx = len(i[0]) - 1
- }
-
- return Window { i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx], width, maxy }
-}
-
-// Sum returns the sum of all pixels in a Window
-func (w Window) Sum() uint64 {
- return w.bottomright + w.topleft - w.topright - w.bottomleft
-}
-
-// Size returns the total size of a Window
-func (w Window) Size() int {
- return w.width * w.height
-}
-
-// Mean returns the average value of pixels in a Window
-func (w Window) Mean() float64 {
- return float64(w.Sum()) / float64(w.Size())
-}
-
-// Proportion returns the proportion of pixels which are on
-func (w Window) Proportion() float64 {
- area := w.width * w.height
- // divide by 255 as each on pixel has the value of 255
- sum := float64(w.Sum()) / 255
- return float64(area) / sum - 1
-}
-
-// MeanWindow calculates the mean value of a section of an Integral
-// Image
-func (i I) MeanWindow(x, y, size int) float64 {
- return i.GetWindow(x, y, size).Mean()
-}
-
-// MeanStdDevWindow calculates the mean and standard deviation of
-// a section on an Integral Image
-func (i WithSq) MeanStdDevWindow(x, y, size int) (float64, float64) {
- imean := i.Img.GetWindow(x, y, size).Mean()
- smean := i.Sq.GetWindow(x, y, size).Mean()
-
- variance := smean - (imean * imean)
-
- return imean, math.Sqrt(variance)
-}
diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
deleted file mode 100644
index dcd0494..0000000
--- a/lib/hocr/hocr.go
+++ /dev/null
@@ -1,129 +0,0 @@
-package hocr
-
-import (
- "encoding/xml"
- "errors"
- "io/ioutil"
- "regexp"
- "strconv"
- "strings"
-)
-
-type Hocr struct {
- Lines []OcrLine `xml:"body>div>div>p>span"`
-}
-
-type OcrLine struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Words []OcrWord `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-type OcrWord struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Chars []OcrChar `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-type OcrChar struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Chars []OcrChar `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-// Returns the confidence for a word based on its x_wconf value
-func wordConf(s string) (float64, error) {
- re, err := regexp.Compile(`x_wconf ([0-9.]+)`)
- if err != nil {
- return 0.0, err
- }
- conf := re.FindStringSubmatch(s)
- return strconv.ParseFloat(conf[1], 64)
-}
-
-func boxCoords(s string) ([4]int, error) {
- var coords [4]int
- re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`)
- if err != nil {
- return coords, err
- }
- coordstr := re.FindStringSubmatch(s)
- for i := range coords {
- c, err := strconv.Atoi(coordstr[i+1])
- if err != nil {
- return coords, err
- }
- coords[i] = c
- }
- return coords, nil
-}
-
-func noText(s string) bool {
- t := strings.Trim(s, " \n")
- return len(t) == 0
-}
-
-func Parse(b []byte) (Hocr, error) {
- var hocr Hocr
-
- err := xml.Unmarshal(b, &hocr)
- if err != nil {
- return hocr, err
- }
-
- return hocr, nil
-}
-
-func GetText(hocrfn string) (string, error) {
- var s string
-
- file, err := ioutil.ReadFile(hocrfn)
- if err != nil {
- return s, err
- }
-
- h, err := Parse(file)
- if err != nil {
- return s, err
- }
-
-
- for _, l := range h.Lines {
- s += getLineText(l)
- }
- return s, nil
-}
-
-func GetAvgConf(hocrfn string) (float64, error) {
- file, err := ioutil.ReadFile(hocrfn)
- if err != nil {
- return 0, err
- }
-
- h, err := Parse(file)
- if err != nil {
- return 0, err
- }
-
- var total, num float64
- for _, l := range h.Lines {
- for _, w := range l.Words {
- c, err := wordConf(w.Title)
- if err != nil {
- return 0, err
- }
- total += c
- num++
- }
- }
- if num == 0 {
- return 0, errors.New("No words found")
- }
- return total / num, nil
-}
diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go
deleted file mode 100644
index 74e8f9a..0000000
--- a/lib/hocr/lines.go
+++ /dev/null
@@ -1,131 +0,0 @@
-package hocr
-
-// TODO: Parse line name to zero pad line numbers, so they can
-// be sorted easily
-
-import (
- "image"
- "image/png"
- "io/ioutil"
- "log"
- "os"
- "path/filepath"
- "strings"
-
- "rescribe.xyz/go.git/lib/line"
-)
-
-func getLineText(l OcrLine) (string) {
- linetext := ""
-
- linetext = l.Text
- if noText(linetext) {
- linetext = ""
- for _, w := range l.Words {
- if w.Class != "ocrx_word" {
- continue
- }
- linetext += w.Text + " "
- }
- }
- if noText(linetext) {
- linetext = ""
- for _, w := range l.Words {
- if w.Class != "ocrx_word" {
- continue
- }
- for _, c := range w.Chars {
- if c.Class != "ocrx_cinfo" {
- continue
- }
- linetext += c.Text
- }
- linetext += " "
- }
- }
- linetext = strings.TrimRight(linetext, " ")
- linetext += "\n"
- return linetext
-}
-
-func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) {
- lines := make(line.Details, 0)
-
- for _, l := range h.Lines {
- totalconf := float64(0)
- num := 0
- for _, w := range l.Words {
- c, err := wordConf(w.Title)
- if err != nil {
- return lines, err
- }
- num++
- totalconf += c
- }
-
- coords, err := boxCoords(l.Title)
- if err != nil {
- return lines, err
- }
-
- var ln line.Detail
- ln.Name = l.Id
- ln.Avgconf = (totalconf / float64(num)) / 100
- ln.Text = getLineText(l)
- ln.OcrName = name
- if i != nil {
- var imgd line.ImgDirect
- imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
- ln.Img = imgd
- }
- lines = append(lines, ln)
- }
- return lines, nil
-}
-
-func GetLineDetails(hocrfn string) (line.Details, error) {
- var newlines line.Details
-
- file, err := ioutil.ReadFile(hocrfn)
- if err != nil {
- return newlines, err
- }
-
- h, err := Parse(file)
- if err != nil {
- return newlines, err
- }
-
- var img image.Image
- pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
- pngf, err := os.Open(pngfn)
- if err != nil {
- log.Println("Warning: can't open image %s\n", pngfn)
- } else {
- defer pngf.Close()
- img, err = png.Decode(pngf)
- if err != nil {
- log.Println("Warning: can't load image %s\n", pngfn)
- }
- }
-
- n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
- return parseLineDetails(h, img, n)
-}
-
-func GetLineBasics(hocrfn string) (line.Details, error) {
- var newlines line.Details
-
- file, err := ioutil.ReadFile(hocrfn)
- if err != nil {
- return newlines, err
- }
-
- h, err := Parse(file)
- if err != nil {
- return newlines, err
- }
-
- n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
- return parseLineDetails(h, nil, n)
-}
diff --git a/lib/line/line.go b/lib/line/line.go
deleted file mode 100644
index d4e3e44..0000000
--- a/lib/line/line.go
+++ /dev/null
@@ -1,57 +0,0 @@
-package line
-
-import (
- "image"
- "image/png"
- "io"
- "os"
-)
-
-type Detail struct {
- Name string
- Avgconf float64
- Img CopyableImg
- Text string
- OcrName string
-}
-
-type CopyableImg interface {
- CopyLineTo(io.Writer) error
-}
-
-type Details []Detail
-
-func (l Details) Len() int { return len(l) }
-func (l Details) Less(i, j int) bool { return l[i].Avgconf < l[j].Avgconf }
-func (l Details) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
-
-// This is an implementation of the CopyableImg interface that
-// stores the image directly as an image.Image
-type ImgDirect struct {
- Img image.Image
-}
-
-func (i ImgDirect) CopyLineTo(w io.Writer) error {
- err := png.Encode(w, i.Img)
- if err != nil {
- return err
- }
- return nil
-}
-
-// This is an implementation of the CopyableImg interface that
-// stores the path of an image
-type ImgPath struct {
- Path string
-}
-
-func (i ImgPath) CopyLineTo(w io.Writer) error {
- f, err := os.Open(i.Path)
- if err != nil {
- return err
- }
- defer f.Close()
-
- _, err = io.Copy(w, f)
- return err
-}
diff --git a/lib/prob/prob.go b/lib/prob/prob.go
deleted file mode 100644
index 31a484d..0000000
--- a/lib/prob/prob.go
+++ /dev/null
@@ -1,69 +0,0 @@
-package prob
-
-import (
- "io/ioutil"
- "path/filepath"
- "strconv"
- "strings"
-
- "rescribe.xyz/go.git/lib/line"
-)
-
-func getLineAvg(f string) (float64, error) {
- totalconf := float64(0)
- num := 0
-
- prob, err := ioutil.ReadFile(f)
- if err != nil {
- return 0, err
- }
-
- for _, l := range strings.Split(string(prob), "\n") {
- fields := strings.Fields(l)
-
- if len(fields) == 2 {
- conf, err := strconv.ParseFloat(fields[1], 64)
- if err != nil {
- continue
- }
- totalconf += conf
- num += 1
- }
- }
- if num <= 0 {
- return 0, nil
- }
- avg := totalconf / float64(num)
- return avg, nil
-}
-
-// Note this only processes one line at a time
-func GetLineDetails(probfn string) (line.Details, error) {
- var l line.Detail
- lines := make(line.Details, 0)
-
- avg, err := getLineAvg(probfn)
- if err != nil {
- return lines, err
- }
-
- filebase := strings.Replace(probfn, ".prob", "", 1)
-
- txt, err := ioutil.ReadFile(filebase + ".txt")
- if err != nil {
- return lines, err
- }
-
- l.Name = filepath.Base(filebase)
- l.Avgconf = avg
- l.Text = string(txt)
- l.OcrName = filepath.Base(filepath.Dir(filebase))
-
- var imgfn line.ImgPath
- imgfn.Path = filebase + ".bin.png"
- l.Img = imgfn
-
- lines = append(lines, l)
-
- return lines, nil
-}
diff --git a/pgconf/main.go b/pgconf/main.go
deleted file mode 100644
index bc09c23..0000000
--- a/pgconf/main.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "log"
- "os"
-
- "rescribe.xyz/go.git/lib/hocr"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: pgconf hocr\n")
- fmt.Fprintf(os.Stderr, "Prints the total confidence for a page, as an average of the confidence of each word.\n")
- flag.PrintDefaults()
- }
- flag.Parse()
- if flag.NArg() != 1 {
- flag.Usage()
- os.Exit(1)
- }
-
- avg, err := hocr.GetAvgConf(flag.Arg(0))
- if err != nil {
- log.Fatalf("Error retreiving confidence for %s: %v\n", flag.Arg(0), err)
- }
-
- fmt.Printf("%0.0f\n", avg)
-}
diff --git a/preproc/cmd/binarize/main.go b/preproc/cmd/binarize/main.go
deleted file mode 100644
index e7f677e..0000000
--- a/preproc/cmd/binarize/main.go
+++ /dev/null
@@ -1,78 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/go.git/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n")
- flag.PrintDefaults()
- }
- wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *wsize == 0 {
- *wsize = autowsize(b)
- log.Printf("Set window size to %d\n", *wsize)
- }
-
- if *wsize%2 == 0 {
- *wsize++
- }
-
- // TODO: come up with a way to set a good ksize automatically
-
- var thresh image.Image
- thresh = preproc.IntegralSauvola(gray, *ksize, *wsize)
-
- if *btype == "zeroinv" {
- thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, thresh)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/preproc/cmd/preproc/main.go b/preproc/cmd/preproc/main.go
deleted file mode 100644
index 1c248e0..0000000
--- a/preproc/cmd/preproc/main.go
+++ /dev/null
@@ -1,90 +0,0 @@
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/go.git/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n")
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- log.Print("Binarising")
- var clean, threshimg image.Image
- threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min)
- } else {
- clean = threshimg
- }
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/preproc/cmd/preprocmulti/main.go b/preproc/cmd/preprocmulti/main.go
deleted file mode 100644
index c6c9fe4..0000000
--- a/preproc/cmd/preprocmulti/main.go
+++ /dev/null
@@ -1,101 +0,0 @@
-package main
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/go.git/integralimg"
- "rescribe.xyz/go.git/preproc"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-func main() {
- ksizes := []float64{0.1, 0.2, 0.4, 0.5}
-
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n")
- fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n")
- fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n")
- fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes)
- flag.PrintDefaults()
- }
- binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.")
- btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.")
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- nowipe := flag.Bool("nowipe", false, "Disable wiping completely.")
- wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- log.Printf("Opening %s\n", flag.Arg(0))
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if *binwsize == 0 {
- *binwsize = autowsize(b)
- }
-
- if *binwsize%2 == 0 {
- *binwsize++
- }
-
- var clean, threshimg image.Image
- log.Print("Precalculating integral images")
- integrals := integralimg.ToAllIntegralImg(gray)
-
- for _, k := range ksizes {
- log.Print("Binarising")
- threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize)
-
- if *btype == "zeroinv" {
- threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- log.Fatal(err)
- }
- }
-
- if !*nowipe {
- log.Print("Wiping sides")
- clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min)
- } else {
- clean = threshimg
- }
-
- savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k)
- log.Printf("Saving %s\n", savefn)
- f, err = os.Create(savefn)
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", savefn, err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
- }
-}
diff --git a/preproc/cmd/wipe/main.go b/preproc/cmd/wipe/main.go
deleted file mode 100644
index e5c039d..0000000
--- a/preproc/cmd/wipe/main.go
+++ /dev/null
@@ -1,55 +0,0 @@
-package main
-
-import (
- "flag"
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "log"
- "os"
-
- "rescribe.xyz/go.git/preproc"
-)
-
-func main() {
- flag.Usage = func() {
- fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n")
- fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n")
- flag.PrintDefaults()
- }
- min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.")
- thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.")
- wsize := flag.Int("w", 5, "Window size for mask finding algorithm.")
- flag.Parse()
- if flag.NArg() < 2 {
- flag.Usage()
- os.Exit(1)
- }
-
- f, err := os.Open(flag.Arg(0))
- defer f.Close()
- if err != nil {
- log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err)
- }
- img, _, err := image.Decode(f)
- if err != nil {
- log.Fatalf("Could not decode image: %v\n", err)
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- clean := preproc.Wipe(gray, *wsize, *thresh, *min)
-
- f, err = os.Create(flag.Arg(1))
- if err != nil {
- log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err)
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- log.Fatalf("Could not encode image: %v\n", err)
- }
-}
diff --git a/preproc/preprocmulti.go b/preproc/preprocmulti.go
deleted file mode 100644
index 2e7cb06..0000000
--- a/preproc/preprocmulti.go
+++ /dev/null
@@ -1,94 +0,0 @@
-package preproc
-
-// TODO: come up with a way to set a good ksize automatically
-
-import (
- "fmt"
- "image"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "os"
- "strings"
-
- "rescribe.xyz/go.git/integralimg"
-)
-
-// TODO: do more testing to see how good this assumption is
-func autowsize(bounds image.Rectangle) int {
- return bounds.Dx() / 60
-}
-
-// PreProcMulti binarizes and preprocesses an image with multiple binarisation levels.
-// inPath: Path of input image.
-// ksizes: Slice of k values to pass to Sauvola algorithm
-// binType: Type of binarization threshold. binary or zeroinv are currently implemented.
-// binWsize: Window size for sauvola binarization algorithm. Set automatically based on resolution if 0.
-// wipe: Whether to wipe (clear sides) the image
-// wipeWsize: Window size for wiping algorithm
-// wipeMinWidthPerc: Minimum percentage of the image width for the content width calculation to be considered valid
-// Note: copied from cmd/preprocmulti/main.go, should think about the best way
-// to organise this code later.
-// TODO: return errors that encapsulate the err describing where it was encountered
-// TODO: do the post-integral image stuff in separate goroutines for speed
-func PreProcMulti(inPath string, ksizes []float64, binType string, binWsize int, wipe bool, wipeWsize int, wipeMinWidthPerc int) ([]string, error) {
- // Make outBase inPath up to final .
- s := strings.Split(inPath, ".")
- outBase := strings.Join(s[:len(s)-1], "")
-
- var donePaths []string
-
- f, err := os.Open(inPath)
- if err != nil {
- return donePaths, err
- }
- defer f.Close()
- img, _, err := image.Decode(f)
- if err != nil {
- return donePaths, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- if binWsize == 0 {
- binWsize = autowsize(b)
- }
-
- if binWsize%2 == 0 {
- binWsize++
- }
-
- var clean, threshimg image.Image
- integrals := integralimg.ToAllIntegralImg(gray)
-
- for _, k := range ksizes {
- threshimg = PreCalcedSauvola(integrals, gray, k, binWsize)
-
- if binType == "zeroinv" {
- threshimg, err = BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA))
- if err != nil {
- return donePaths, err
- }
- }
-
- if wipe {
- clean = Wipe(threshimg.(*image.Gray), wipeWsize, k*0.02, wipeMinWidthPerc)
- } else {
- clean = threshimg
- }
-
- savefn := fmt.Sprintf("%s_bin%0.1f.png", outBase, k)
- f, err = os.Create(savefn)
- if err != nil {
- return donePaths, err
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- return donePaths, err
- }
- donePaths = append(donePaths, savefn)
- }
- return donePaths, nil
-}
diff --git a/preproc/sauvola.go b/preproc/sauvola.go
deleted file mode 100644
index 046bb7d..0000000
--- a/preproc/sauvola.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package preproc
-
-import (
- "image"
- "image/color"
-
- "rescribe.xyz/go.git/integralimg"
-)
-
-// Implements Sauvola's algorithm for text binarization, see paper
-// "Adaptive document image binarization" (2000)
-func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- window := surrounding(img, x, y, windowsize)
- m, dev := meanstddev(window)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
-
-// Implements Sauvola's algorithm using Integral Images, see paper
-// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images"
-// and
-// https://stackoverflow.com/questions/13110733/computing-image-integral
-func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- integrals := integralimg.ToAllIntegralImg(img)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
-
-// PreCalcedSauvola Implements Sauvola's algorithm using precalculated Integral Images
-// TODO: have this be the root function that the other two reference
-func PreCalcedSauvola(integrals integralimg.WithSq, img *image.Gray, ksize float64, windowsize int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- m, dev := integrals.MeanStdDevWindow(x, y, windowsize)
- threshold := m * (1 + ksize*((dev/128)-1))
- if img.GrayAt(x, y).Y < uint8(threshold) {
- new.SetGray(x, y, color.Gray{0})
- } else {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- }
-
- return new
-}
diff --git a/preproc/sauvola_test.go b/preproc/sauvola_test.go
deleted file mode 100644
index 2331e10..0000000
--- a/preproc/sauvola_test.go
+++ /dev/null
@@ -1,70 +0,0 @@
-package preproc
-
-import (
- "flag"
- "fmt"
- "image"
- "image/png"
- "os"
- "testing"
-)
-
-func TestBinarization(t *testing.T) {
- var slow = flag.Bool("slow", false, "include slow tests")
- var update = flag.Bool("updatesauvola", false, "update golden files")
-
- cases := []struct {
- name string
- orig string
- golden string
- ksize float64
- wsize int
- }{
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19},
- {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19},
- {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- switch c.name {
- case "integralsauvola":
- actual = IntegralSauvola(orig, c.ksize, c.wsize)
- case "sauvola":
- if *slow {
- actual = Sauvola(orig, c.ksize, c.wsize)
- } else {
- t.Skip("Skipping slow test; use -slow to run it.\n")
- }
- default:
- t.Fatalf("No method %s\n", c.name)
- }
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if !imgsequal(golden, actual) {
- t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}
diff --git a/preproc/test_helpers.go b/preproc/test_helpers.go
deleted file mode 100644
index 20de5b1..0000000
--- a/preproc/test_helpers.go
+++ /dev/null
@@ -1,53 +0,0 @@
-package preproc
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
- "image"
- "image/draw"
- "image/png"
- "os"
-)
-
-func decode(s string) (*image.Gray, error) {
- f, err := os.Open(s)
- defer f.Close()
- if err != nil {
- return nil, err
- }
- img, err := png.Decode(f)
- if err != nil {
- return nil, err
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
- return gray, nil
-}
-
-func imgsequal(img1 *image.Gray, img2 *image.Gray) bool {
- b := img1.Bounds()
- if !b.Eq(img2.Bounds()) {
- return false
- }
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- r0, g0, b0, a0 := img1.At(x, y).RGBA()
- r1, g1, b1, a1 := img2.At(x, y).RGBA()
- if r0 != r1 {
- return false
- }
- if g0 != g1 {
- return false
- }
- if b0 != b1 {
- return false
- }
- if a0 != a1 {
- return false
- }
- }
- }
- return true
-}
diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png
deleted file mode 100644
index 2bcc4b1..0000000
--- a/preproc/testdata/pg1.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png
deleted file mode 100644
index bdf5712..0000000
--- a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png
deleted file mode 100644
index 5db2d9a..0000000
--- a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png
deleted file mode 100644
index 050d037..0000000
--- a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.3_w19.png b/preproc/testdata/pg1_sauvola_k0.3_w19.png
deleted file mode 100644
index bcd595f..0000000
--- a/preproc/testdata/pg1_sauvola_k0.3_w19.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.5_w19.png b/preproc/testdata/pg1_sauvola_k0.5_w19.png
deleted file mode 100644
index 8de596c..0000000
--- a/preproc/testdata/pg1_sauvola_k0.5_w19.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg1_sauvola_k0.5_w41.png b/preproc/testdata/pg1_sauvola_k0.5_w41.png
deleted file mode 100644
index b8f50e0..0000000
--- a/preproc/testdata/pg1_sauvola_k0.5_w41.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg2.png b/preproc/testdata/pg2.png
deleted file mode 100644
index c7c4249..0000000
--- a/preproc/testdata/pg2.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg2_integralwipesides_t0.02_w5.png b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png
deleted file mode 100644
index 6b4ccb2..0000000
--- a/preproc/testdata/pg2_integralwipesides_t0.02_w5.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w25.png b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png
deleted file mode 100644
index 39dc88d..0000000
--- a/preproc/testdata/pg2_integralwipesides_t0.05_w25.png
+++ /dev/null
Binary files differ
diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w5.png b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png
deleted file mode 100644
index 50df855..0000000
--- a/preproc/testdata/pg2_integralwipesides_t0.05_w5.png
+++ /dev/null
Binary files differ
diff --git a/preproc/util.go b/preproc/util.go
deleted file mode 100644
index e23829d..0000000
--- a/preproc/util.go
+++ /dev/null
@@ -1,95 +0,0 @@
-package preproc
-
-import (
- "errors"
- "image"
- "math"
-)
-
-// TODO: name better; maybe verb, x-er
-// TODO: implement these for regular image, and use them to make
-// image functions generic for integral and non- images
-type UsefulImg interface {
- MeanWindow()
- MeanStdDevWindow()
-}
-
-func mean(i []int) float64 {
- sum := 0
- for _, n := range i {
- sum += n
- }
- return float64(sum) / float64(len(i))
-}
-
-func stddev(i []int) float64 {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := sum / float64(len(i)-1)
- return math.Sqrt(variance)
-}
-
-func meanstddev(i []int) (float64, float64) {
- m := mean(i)
-
- var sum float64
- for _, n := range i {
- sum += (float64(n) - m) * (float64(n) - m)
- }
- variance := float64(sum) / float64(len(i)-1)
- return m, math.Sqrt(variance)
-}
-
-// gets the pixel values surrounding a point in the image
-func surrounding(img *image.Gray, x int, y int, size int) []int {
- b := img.Bounds()
- step := size / 2
-
- miny := y - step
- if miny < b.Min.Y {
- miny = b.Min.Y
- }
- minx := x - step
- if minx < b.Min.X {
- minx = b.Min.X
- }
- maxy := y + step
- if maxy > b.Max.Y {
- maxy = b.Max.Y
- }
- maxx := x + step
- if maxx > b.Max.X {
- maxx = b.Max.X
- }
-
- var s []int
- for yi := miny; yi <= maxy; yi++ {
- for xi := minx; xi <= maxx; xi++ {
- s = append(s, int(img.GrayAt(xi, yi).Y))
- }
- }
- return s
-}
-
-func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) {
- b := bin.Bounds()
- if !b.Eq(orig.Bounds()) {
- return orig, errors.New("bin and orig images need to be the same dimensions")
- }
- newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy()))
- for y := b.Min.Y; y < b.Max.Y; y++ {
- for x := b.Min.X; x < b.Max.X; x++ {
- if bin.GrayAt(x, y).Y == 255 {
- newimg.Set(x, y, bin.GrayAt(x, y))
- } else {
- newimg.Set(x, y, orig.At(x, y))
- }
- }
- }
-
- return newimg, nil
-}
diff --git a/preproc/wipesides.go b/preproc/wipesides.go
deleted file mode 100644
index 3d08053..0000000
--- a/preproc/wipesides.go
+++ /dev/null
@@ -1,160 +0,0 @@
-package preproc
-
-// TODO: add minimum size variable (default ~30%?)
-// TODO: switch to an interface rather than integralimg.I
-
-import (
- "errors"
- "fmt"
- "image"
- "image/color"
- "image/draw"
- _ "image/jpeg"
- "image/png"
- "os"
-
- "rescribe.xyz/go.git/integralimg"
-)
-
-// returns the proportion of the given window that is black pixels
-func proportion(i integralimg.I, x int, size int) float64 {
- w := i.GetVerticalWindow(x, size)
- return w.Proportion()
-}
-
-// findbestedge goes through every vertical line from x to x+w to
-// find the one with the lowest proportion of black pixels.
-func findbestedge(img integralimg.I, x int, w int) int {
- var bestx int
- var best float64
-
- if w == 1 {
- return x
- }
-
- right := x + w
- for ; x < right; x++ {
- prop := proportion(img, x, 1)
- if prop > best {
- best = prop
- bestx = x
- }
- }
-
- return bestx
-}
-
-// findedges finds the edges of the main content, by moving a window of wsize
-// from near the middle of the image to the left and right, stopping when it reaches
-// a point at which there is a lower proportion of black pixels than thresh.
-func findedges(img integralimg.I, wsize int, thresh float64) (int, int) {
- maxx := len(img[0]) - 1
- var lowedge, highedge int = 0, maxx
-
- // don't start at the middle, as this will fail for 2 column layouts,
- // start 10% left or right of the middle
- notcentre := maxx / 10
-
- for x := maxx/2 + notcentre; x < maxx-wsize; x++ {
- if proportion(img, x, wsize) <= thresh {
- highedge = findbestedge(img, x, wsize)
- break
- }
- }
-
- for x := maxx/2 - notcentre; x > 0; x-- {
- if proportion(img, x, wsize) <= thresh {
- lowedge = findbestedge(img, x, wsize)
- break
- }
- }
-
- return lowedge, highedge
-}
-
-// wipesides fills the sections of image not within the boundaries
-// of lowedge and highedge with white
-func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray {
- b := img.Bounds()
- new := image.NewGray(b)
-
- // set left edge white
- for x := b.Min.X; x < lowedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
- // copy middle
- for x := lowedge; x < highedge; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, img.GrayAt(x, y))
- }
- }
- // set right edge white
- for x := highedge; x < b.Max.X; x++ {
- for y := b.Min.Y; y < b.Max.Y; y++ {
- new.SetGray(x, y, color.Gray{255})
- }
- }
-
- return new
-}
-
-// toonarrow checks whether the area between lowedge and highedge is
-// less than min % of the total image width
-func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool {
- b := img.Bounds()
- imgw := b.Max.X - b.Min.X
- wipew := highedge - lowedge
- if float64(wipew)/float64(imgw)*100 < float64(min) {
- return true
- }
- return false
-}
-
-// Wipe fills the sections of image which fall outside the content
-// area with white, providing the content area is above min %
-func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray {
- integral := integralimg.ToIntegralImg(img)
- lowedge, highedge := findedges(integral, wsize, thresh)
- if toonarrow(img, lowedge, highedge, min) {
- return img
- }
- return wipesides(img, lowedge, highedge)
-}
-
-// WipeFile wipes an image file, filling the sections of the image
-// which fall outside the content area with white, providing the
-// content area is above min %.
-// inPath: path of the input image.
-// outPath: path to save the output image.
-// wsize: window size for wipe algorithm.
-// thresh: threshold for wipe algorithm.
-// min: minimum % of content area width to consider valid.
-func WipeFile(inPath string, outPath string, wsize int, thresh float64, min int) error {
- f, err := os.Open(inPath)
- defer f.Close()
- if err != nil {
- return errors.New(fmt.Sprintf("Could not open file %s: %v", inPath, err))
- }
- img, _, err := image.Decode(f)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not decode image: %v", err))
- }
- b := img.Bounds()
- gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy()))
- draw.Draw(gray, b, img, b.Min, draw.Src)
-
- clean := Wipe(gray, wsize, thresh, min)
-
- f, err = os.Create(outPath)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not create file %s: %v", outPath, err))
- }
- defer f.Close()
- err = png.Encode(f, clean)
- if err != nil {
- return errors.New(fmt.Sprintf("Could not encode image: %v", err))
- }
- return nil
-}
diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go
deleted file mode 100644
index d5464e0..0000000
--- a/preproc/wipesides_test.go
+++ /dev/null
@@ -1,57 +0,0 @@
-package preproc
-
-// TODO: add different pages as test cases
-// TODO: test non integral img version
-
-import (
- "flag"
- "fmt"
- "image"
- "image/png"
- "os"
- "testing"
-)
-
-func TestWipeSides(t *testing.T) {
- var update = flag.Bool("updatewipe", false, "update golden files")
- cases := []struct {
- name string
- orig string
- golden string
- thresh float64
- wsize int
- }{
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5},
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5},
- {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25},
- }
-
- for _, c := range cases {
- t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) {
- var actual *image.Gray
- orig, err := decode(c.orig)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.orig, err)
- }
- actual = Wipe(orig, c.wsize, c.thresh)
- if *update {
- f, err := os.Create(c.golden)
- defer f.Close()
- if err != nil {
- t.Fatalf("Could not open file %s to update: %v\n", c.golden, err)
- }
- err = png.Encode(f, actual)
- if err != nil {
- t.Fatalf("Could not encode update of %s: %v\n", c.golden, err)
- }
- }
- golden, err := decode(c.golden)
- if err != nil {
- t.Fatalf("Could not open file %s: %v\n", c.golden, err)
- }
- if !imgsequal(golden, actual) {
- t.Errorf("Processed %s differs to %s\n", c.orig, c.golden)
- }
- })
- }
-}