diff options
43 files changed, 4 insertions, 2096 deletions
diff --git a/avg-lines/html.go b/avg-lines/html.go deleted file mode 100644 index 443cc4a..0000000 --- a/avg-lines/html.go +++ /dev/null @@ -1,61 +0,0 @@ -package main - -import ( - "fmt" - "os" - "path/filepath" - - "rescribe.xyz/go.git/lib/line" -) - -func copylineimg(fn string, l line.Detail) error { - f, err := os.Create(fn) - if err != nil { - return err - } - defer f.Close() - - return l.Img.CopyLineTo(f) -} - -func htmlout(dir string, lines line.Details) error { - err := os.MkdirAll(dir, 0700) - if err != nil { - return err - } - - fn := filepath.Join(dir, "index.html") - f, err := os.Create(fn) - if err != nil { - return err - } - defer f.Close() - - _, err = fmt.Fprintf(f, "<!DOCTYPE html><html><head><meta charset='UTF-8'><title></title>"+ - "<style>td {border: 1px solid #444}</style></head><body>\n<table>\n") - if err != nil { - return err - } - for _, l := range lines { - fn = filepath.Base(l.OcrName) + "_" + l.Name + ".png" - err = copylineimg(filepath.Join(dir, fn), l) - if err != nil { - return err - } - _, err = fmt.Fprintf(f, "<tr>\n"+ - "<td><h1>%.4f%%</h1></td>\n"+ - "<td>%s %s</td>\n"+ - "<td><img src='%s' width='100%%' /><br />%s</td>\n"+ - "</tr>\n", - l.Avgconf, l.OcrName, l.Name, fn, l.Text) - if err != nil { - return err - } - } - _, err = fmt.Fprintf(f, "</table>\n</body></html>\n") - if err != nil { - return err - } - - return nil -} diff --git a/avg-lines/main.go b/avg-lines/main.go deleted file mode 100644 index 14b21bd..0000000 --- a/avg-lines/main.go +++ /dev/null @@ -1,69 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - "path/filepath" - "sort" - - "rescribe.xyz/go.git/lib/hocr" - "rescribe.xyz/go.git/lib/line" - "rescribe.xyz/go.git/lib/prob" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: avg-lines [-html dir] [-nosort] [prob1] [hocr1] [prob2] [...]\n") - fmt.Fprintf(os.Stderr, "Prints a report of the average confidence for each line, sorted\n") - fmt.Fprintf(os.Stderr, "from worst to best.\n") - fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n") - fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n") - fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n") - fmt.Fprintf(os.Stderr, "option.\n\n") - flag.PrintDefaults() - } - var html = flag.String("html", "", "Output in html format to the specified directory") - var nosort = flag.Bool("nosort", false, "Don't sort lines by confidence") - flag.Parse() - if flag.NArg() < 1 { - flag.Usage() - os.Exit(1) - } - - var err error - lines := make(line.Details, 0) - - for _, f := range flag.Args() { - var newlines line.Details - switch ext := filepath.Ext(f); ext { - case ".prob": - newlines, err = prob.GetLineDetails(f) - case ".hocr": - newlines, err = hocr.GetLineDetails(f) - default: - log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f) - continue - } - if err != nil { - log.Fatal(err) - } - - for _, l := range newlines { - lines = append(lines, l) - } - } - - if *nosort == false { - sort.Sort(lines) - } - - if *html == "" { - for _, l := range lines { - fmt.Printf("%s %s: %.2f%%\n", l.OcrName, l.Name, l.Avgconf) - } - } else { - htmlout(*html, lines) - } -} diff --git a/bookpipeline/aws.go b/aws.go index 0127d6e..0127d6e 100644 --- a/bookpipeline/aws.go +++ b/aws.go diff --git a/bucket-lines/bucket.go b/bucket-lines/bucket.go deleted file mode 100644 index 9f98887..0000000 --- a/bucket-lines/bucket.go +++ /dev/null @@ -1,131 +0,0 @@ -package main - -import ( - "fmt" - "io" - "os" - "path/filepath" - "sort" - "strconv" - - "rescribe.xyz/go.git/lib/line" -) - -type BucketSpec struct { - Min float64 - Name string -} -type BucketSpecs []BucketSpec - -func (b BucketSpecs) Len() int { return len(b) } -func (b BucketSpecs) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b BucketSpecs) Less(i, j int) bool { return b[i].Min < b[j].Min } - -type BucketStat struct { - name string - num int -} -type BucketStats []BucketStat - -func (b BucketStats) Len() int { return len(b) } -func (b BucketStats) Swap(i, j int) { b[i], b[j] = b[j], b[i] } -func (b BucketStats) Less(i, j int) bool { return b[i].num < b[j].num } - -// Copies the image and text for a line into a directory based on -// the line confidence, as defined by the buckets struct -func bucketLine(l line.Detail, buckets BucketSpecs, dirname string) (string, error) { - var bucket string - - todir := "" - for _, b := range buckets { - if l.Avgconf >= b.Min { - todir = b.Name - bucket = b.Name - } - } - - if todir == "" { - return bucket, nil - } - - avgstr := strconv.FormatFloat(l.Avgconf, 'G', -1, 64) - if len(avgstr) > 2 { - avgstr = avgstr[2:] - } - - base := filepath.Join(dirname, todir, l.OcrName+"_"+l.Name+"_"+avgstr) - - err := os.MkdirAll(filepath.Join(dirname, todir), 0700) - if err != nil { - return bucket, err - } - - f, err := os.Create(base + ".png") - if err != nil { - return bucket, err - } - defer f.Close() - - err = l.Img.CopyLineTo(f) - if err != nil { - return bucket, err - } - - f, err = os.Create(base + ".txt") - if err != nil { - return bucket, err - } - defer f.Close() - - _, err = io.WriteString(f, l.Text) - if err != nil { - return bucket, err - } - - return bucket, err -} - -// Copies line images and text into directories based on their -// confidence, as defined by the buckets struct, and returns -// statistics of whire lines went in the process. -func BucketUp(lines line.Details, buckets BucketSpecs, dirname string) (BucketStats, error) { - var all []string - var stats BucketStats - - sort.Sort(lines) - sort.Sort(buckets) - for _, l := range lines { - bname, err := bucketLine(l, buckets, dirname) - if err != nil { - return stats, err - } - all = append(all, bname) - } - - for _, b := range all { - i := sort.Search(len(stats), func(i int) bool { return stats[i].name == b }) - if i == len(stats) { - newstat := BucketStat{b, 0} - stats = append(stats, newstat) - i = len(stats) - 1 - } - stats[i].num++ - } - - return stats, nil -} - -// Prints statistics of where lines went when bucketing -func PrintBucketStats(w io.Writer, stats BucketStats) { - var total int - for _, s := range stats { - total += s.num - } - - fmt.Fprintf(w, "Copied %d lines\n", total) - fmt.Fprintf(w, "---------------------------------\n") - sort.Sort(stats) - for _, s := range stats { - fmt.Fprintf(w, "Lines in %7s: %2d%%\n", s.name, 100*s.num/total) - } -} diff --git a/bucket-lines/main.go b/bucket-lines/main.go deleted file mode 100644 index 990e84c..0000000 --- a/bucket-lines/main.go +++ /dev/null @@ -1,87 +0,0 @@ -package main - -import ( - "encoding/json" - "flag" - "fmt" - "io/ioutil" - "log" - "os" - "path/filepath" - - "rescribe.xyz/go.git/lib/hocr" - "rescribe.xyz/go.git/lib/line" - "rescribe.xyz/go.git/lib/prob" -) - -func main() { - b := BucketSpecs{ - // minimum confidence, name - {0, "bad"}, - {0.95, "95to98"}, - {0.98, "98plus"}, - } - - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: bucket-lines [-d dir] [-s specs.json] [hocr1] [prob1] [hocr2] [...]\n") - fmt.Fprintf(os.Stderr, "Copies image-text line pairs into different directories according\n") - fmt.Fprintf(os.Stderr, "to the average character probability for the line.\n") - fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n") - fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n") - fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n") - fmt.Fprintf(os.Stderr, "option.\n") - fmt.Fprintf(os.Stderr, "The .prob and .hocr files are assumed to be in the same directory\n") - fmt.Fprintf(os.Stderr, "as the line's image and text files.\n\n") - flag.PrintDefaults() - fmt.Fprintf(os.Stderr, "\nAn example specs.json file would be the following:\n") - fmt.Fprintf(os.Stderr, "[{\"min\": 0, \"name\": \"terrible\"}, {\"min\": 0.80, \"name\": \"ok\"}, {\"min\": 0.98, \"name\": \"great\"}]\n") - } - dir := flag.String("d", "buckets", "Directory to store the buckets") - specs := flag.String("s", "", "JSON file describing specs to bucket into") - flag.Parse() - if flag.NArg() < 1 { - flag.Usage() - os.Exit(1) - } - - if *specs != "" { - js, err := ioutil.ReadFile(*specs) - if err != nil { - log.Fatal(err) - } - err = json.Unmarshal(js, &b) - if err != nil { - log.Fatal(err) - } - } - - var err error - lines := make(line.Details, 0) - - for _, f := range flag.Args() { - var newlines line.Details - switch ext := filepath.Ext(f); ext { - case ".prob": - newlines, err = prob.GetLineDetails(f) - case ".hocr": - newlines, err = hocr.GetLineDetails(f) - default: - log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f) - continue - } - if err != nil { - log.Fatal(err) - } - - for _, l := range newlines { - lines = append(lines, l) - } - } - - stats, err := BucketUp(lines, b, *dir) - if err != nil { - log.Fatal(err) - } - - PrintBucketStats(os.Stdout, stats) -} diff --git a/bookpipeline/cmd/bookpipeline/main.go b/cmd/bookpipeline/main.go index 59ece72..f445547 100644 --- a/bookpipeline/cmd/bookpipeline/main.go +++ b/cmd/bookpipeline/main.go @@ -1,7 +1,5 @@ package main -// TODO: check if images are prebinarised and if so skip multiple binarisation - import ( "errors" "flag" @@ -14,7 +12,7 @@ import ( "strings" "time" - "rescribe.xyz/go.git/bookpipeline" + "rescribe.xyz/bookpipeline" "rescribe.xyz/go.git/lib/hocr" "rescribe.xyz/go.git/preproc" ) diff --git a/bookpipeline/cmd/booktopipeline/main.go b/cmd/booktopipeline/main.go index 6d9f146..6d9f146 100644 --- a/bookpipeline/cmd/booktopipeline/main.go +++ b/cmd/booktopipeline/main.go diff --git a/bookpipeline/cmd/confgraph/main.go b/cmd/confgraph/main.go index b60821e..474c0a2 100644 --- a/bookpipeline/cmd/confgraph/main.go +++ b/cmd/confgraph/main.go @@ -8,7 +8,7 @@ import ( "path/filepath" "strings" - "rescribe.xyz/go.git/bookpipeline" + "rescribe.xyz/bookpipeline" "rescribe.xyz/go.git/lib/hocr" ) diff --git a/bookpipeline/cmd/getpipelinebook/main.go b/cmd/getpipelinebook/main.go index 66e3f70..9e900bf 100644 --- a/bookpipeline/cmd/getpipelinebook/main.go +++ b/cmd/getpipelinebook/main.go @@ -8,7 +8,7 @@ import ( "os" "path/filepath" - "rescribe.xyz/go.git/bookpipeline" + "rescribe.xyz/bookpipeline" ) const usage = "Usage: getpipelinebook [-a] [-v] bookname\n\nDownloads the pipeline results for a book.\n" diff --git a/bookpipeline/cmd/lspipeline/main.go b/cmd/lspipeline/main.go index 46a1d63..0e1ebb0 100644 --- a/bookpipeline/cmd/lspipeline/main.go +++ b/cmd/lspipeline/main.go @@ -7,7 +7,7 @@ import ( "os/exec" "strings" - "rescribe.xyz/go.git/bookpipeline" + "rescribe.xyz/bookpipeline" ) const usage = `Usage: lspipeline [-i key] [-n num] diff --git a/bookpipeline/cmd/mkpipeline/main.go b/cmd/mkpipeline/main.go index e37a56d..e37a56d 100644 --- a/bookpipeline/cmd/mkpipeline/main.go +++ b/cmd/mkpipeline/main.go diff --git a/dehyphenate/main.go b/dehyphenate/main.go deleted file mode 100644 index 4393c8f..0000000 --- a/dehyphenate/main.go +++ /dev/null @@ -1,63 +0,0 @@ -package main - -import ( - "encoding/xml" - "flag" - "fmt" - "io/ioutil" - "log" - "os" - - "rescribe.xyz/go.git/lib/hocr" -) - -// BUGS: -// - loses all elements not captured in hocr structure such as html headings -// might be best to copy the header and footer separately and put the hocr in between, but would still need to ensure all elements are captured -// - loses any formatting; doesn't need to be identical, but e.g. linebreaks after elements would be handy -// - need to handle OcrChar - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: dehyphenate hocrin hocrout\n") - fmt.Fprintf(os.Stderr, "Dehyphenates a hocr file.\n") - flag.PrintDefaults() - } - flag.Parse() - if flag.NArg() != 2 { - flag.Usage() - os.Exit(1) - } - - in, err := ioutil.ReadFile(flag.Arg(0)) - if err != nil { - log.Fatalf("Error reading %s: %v", flag.Arg(1), err) - } - h, err := hocr.Parse(in) - if err != nil { - log.Fatal(err) - } - - for i, l := range h.Lines { - w := l.Words[len(l.Words)-1] - if len(w.Chars) == 0 { - if len(w.Text) > 0 && w.Text[len(w.Text) - 1] == '-' { - h.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + h.Lines[i+1].Words[0].Text - h.Lines[i+1].Words[0].Text = "" - } - } else { - log.Printf("TODO: handle OcrChar") - } - } - - f, err := os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Error creating file %s: %v", flag.Arg(1), err) - } - defer f.Close() - e := xml.NewEncoder(f) - err = e.Encode(h) - if err != nil { - log.Fatalf("Error encoding XML: %v", err) - } -} diff --git a/eeboxmltohocr/main.go b/eeboxmltohocr/main.go deleted file mode 100644 index 2761cd9..0000000 --- a/eeboxmltohocr/main.go +++ /dev/null @@ -1,135 +0,0 @@ -package main - -import ( - "bufio" - "flag" - "fmt" - "io" - "log" - "os" - "regexp" - "strconv" - "strings" -) - -// splitByPb is a split function for the scanner that splits by the -// '<pb' token. -func splitByPb(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := strings.Index(string(data[:]), "<pb"); i >= 0 { - return i + 1, data[0:i], nil - } - // If we're at EOF, we have a final section, so just return the lot. - if atEOF { - return len(data), data, nil - } - // Request more data. - return 0, nil, nil -} - -type Page struct { - number int - text string -} - -func addPage(pgs *[]Page, number int, text string) { - added := 0 - for i, pg := range *pgs { - if pg.number == number { - (*pgs)[i].text = pg.text + text - added = 1 - } - } - if added == 0 { - newpg := Page{number, text} - *pgs = append(*pgs, newpg) - } -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: eeboxmltohocr in.xml outbase\n") - flag.PrintDefaults() - } - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - scanner := bufio.NewScanner(f) - - scanner.Split(splitByPb) - - var pgs []Page - - for scanner.Scan() { - t := scanner.Text() - r := regexp.MustCompile(`pb [^>]*facs="tcp:.*?:(.*?)"`).FindStringSubmatch(t) - if len(r) <= 1 { - continue - } - pgnum, err := strconv.Atoi(r[1]) - if err != nil { - continue - } - - content := t[strings.Index(t, ">")+1:] - ungap := regexp.MustCompile(`(?s)<gap[ >].+?</gap>`).ReplaceAllString(content, "") - unxml := regexp.MustCompile(`<.+?>`).ReplaceAllString(ungap, "") - - finaltxt := strings.TrimLeft(unxml, " \n") - if len(finaltxt) == 0 { - continue - } - - addPage(&pgs, pgnum, finaltxt) - } - - for _, pg := range pgs { - fn := fmt.Sprintf("%s-%03d.hocr", flag.Arg(1), pg.number - 1) - f, err := os.Create(fn) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", fn, err) - } - defer f.Close() - - _, err = io.WriteString(f, hocrHeader + pg.text + hocrFooter) - if err != nil { - log.Fatalf("Could not write file %s: %v\n", fn, err) - } - } -} - -const hocrHeader = `<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> - <head> - <title></title> - <meta http-equiv="Content-Type" content="text/html;charset=utf-8"/> - <meta name='ocr-system' content='tesseract 4.0.0' /> - <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par ocr_line ocrx_word ocrp_wconf'/> - </head> - <body> - <div class='ocr_page' id='page_1' title='bbox 0 0 600 1200'> - <div class='ocr_carea' id='block_1_1' title="bbox 0 0 600 1200"> - <p class='ocr_par' id='par_1_1' lang='lat' title="bbox 0 0 600 1200"> - <span class='ocr_line' id='line_1_1' title="bbox 0 0 600 1200" -> - <span class='ocrx_word' id='word_1_1' title='bbox 0 0 600 1200'>` - -const hocrFooter = `</span> - </span> - </p> - </div> - </div> - </body> -</html>` diff --git a/bookpipeline/graph.go b/graph.go index 955abbd..955abbd 100644 --- a/bookpipeline/graph.go +++ b/graph.go diff --git a/hocrtotxt/main.go b/hocrtotxt/main.go deleted file mode 100644 index 6821a9e..0000000 --- a/hocrtotxt/main.go +++ /dev/null @@ -1,30 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - - "rescribe.xyz/go.git/lib/hocr" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: hocrtotxt hocrfile\n") - fmt.Fprintf(os.Stderr, "Prints the text from a hocr file.\n") - flag.PrintDefaults() - } - flag.Parse() - if flag.NArg() != 1 { - flag.Usage() - os.Exit(1) - } - - text, err := hocr.GetText(flag.Arg(0)) - if err != nil { - log.Fatal(err) - } - - fmt.Printf("%s\n", text) -} diff --git a/integralimg/integralimg.go b/integralimg/integralimg.go deleted file mode 100644 index 406ed61..0000000 --- a/integralimg/integralimg.go +++ /dev/null @@ -1,169 +0,0 @@ -package integralimg - -import ( - "image" - "math" -) - -// I is the Integral Image -type I [][]uint64 - -// Sq contains an Integral Image and its Square -type WithSq struct { - Img I - Sq I -} - -// Window is a part of an Integral Image -type Window struct { - topleft uint64 - topright uint64 - bottomleft uint64 - bottomright uint64 - width int - height int -} - -// ToIntegralImg creates an integral image -func ToIntegralImg(img *image.Gray) I { - var integral I - var oldy, oldx, oldxy uint64 - b := img.Bounds() - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -// ToSqIntegralImg creates an integral image of the square of all -// pixel values -func ToSqIntegralImg(img *image.Gray) I { - var integral I - var oldy, oldx, oldxy uint64 - b := img.Bounds() - for y := b.Min.Y; y < b.Max.Y; y++ { - newrow := []uint64{} - for x := b.Min.X; x < b.Max.X; x++ { - oldx, oldy, oldxy = 0, 0, 0 - if x > 0 { - oldx = newrow[x-1] - } - if y > 0 { - oldy = integral[y-1][x] - } - if x > 0 && y > 0 { - oldxy = integral[y-1][x-1] - } - pixel := uint64(img.GrayAt(x, y).Y) - i := pixel * pixel + oldx + oldy - oldxy - newrow = append(newrow, i) - } - integral = append(integral, newrow) - } - return integral -} - -// ToAllIntegralImg creates a WithSq containing a regular and -// squared Integral Image -func ToAllIntegralImg(img *image.Gray) WithSq { - var s WithSq - s.Img = ToIntegralImg(img) - s.Sq = ToSqIntegralImg(img) - return s -} - - -// GetWindow gets the values of the corners of a square part of an -// Integral Image, plus the dimensions of the part, which can -// be used to quickly calculate the mean of the area -func (i I) GetWindow(x, y, size int) Window { - step := size / 2 - - minx, miny := 0, 0 - maxy := len(i)-1 - maxx := len(i[0])-1 - - if y > (step+1) { - miny = y - step - 1 - } - if x > (step+1) { - minx = x - step - 1 - } - - if maxy > (y + step) { - maxy = y + step - } - if maxx > (x + step) { - maxx = x + step - } - - return Window { i[miny][minx], i[miny][maxx], i[maxy][minx], i[maxy][maxx], maxx-minx, maxy-miny} -} - -// GetVerticalWindow gets the values of the corners of a vertical -// slice of an Integral Image, starting at x -func (i I) GetVerticalWindow(x, width int) Window { - maxy := len(i) - 1 - maxx := x + width - if maxx > len(i[0])-1 { - maxx = len(i[0]) - 1 - } - - return Window { i[0][x], i[0][maxx], i[maxy][x], i[maxy][maxx], width, maxy } -} - -// Sum returns the sum of all pixels in a Window -func (w Window) Sum() uint64 { - return w.bottomright + w.topleft - w.topright - w.bottomleft -} - -// Size returns the total size of a Window -func (w Window) Size() int { - return w.width * w.height -} - -// Mean returns the average value of pixels in a Window -func (w Window) Mean() float64 { - return float64(w.Sum()) / float64(w.Size()) -} - -// Proportion returns the proportion of pixels which are on -func (w Window) Proportion() float64 { - area := w.width * w.height - // divide by 255 as each on pixel has the value of 255 - sum := float64(w.Sum()) / 255 - return float64(area) / sum - 1 -} - -// MeanWindow calculates the mean value of a section of an Integral -// Image -func (i I) MeanWindow(x, y, size int) float64 { - return i.GetWindow(x, y, size).Mean() -} - -// MeanStdDevWindow calculates the mean and standard deviation of -// a section on an Integral Image -func (i WithSq) MeanStdDevWindow(x, y, size int) (float64, float64) { - imean := i.Img.GetWindow(x, y, size).Mean() - smean := i.Sq.GetWindow(x, y, size).Mean() - - variance := smean - (imean * imean) - - return imean, math.Sqrt(variance) -} diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go deleted file mode 100644 index dcd0494..0000000 --- a/lib/hocr/hocr.go +++ /dev/null @@ -1,129 +0,0 @@ -package hocr - -import ( - "encoding/xml" - "errors" - "io/ioutil" - "regexp" - "strconv" - "strings" -) - -type Hocr struct { - Lines []OcrLine `xml:"body>div>div>p>span"` -} - -type OcrLine struct { - Class string `xml:"class,attr"` - Id string `xml:"id,attr"` - Title string `xml:"title,attr"` - Words []OcrWord `xml:"span"` - Text string `xml:",chardata"` -} - -type OcrWord struct { - Class string `xml:"class,attr"` - Id string `xml:"id,attr"` - Title string `xml:"title,attr"` - Chars []OcrChar `xml:"span"` - Text string `xml:",chardata"` -} - -type OcrChar struct { - Class string `xml:"class,attr"` - Id string `xml:"id,attr"` - Title string `xml:"title,attr"` - Chars []OcrChar `xml:"span"` - Text string `xml:",chardata"` -} - -// Returns the confidence for a word based on its x_wconf value -func wordConf(s string) (float64, error) { - re, err := regexp.Compile(`x_wconf ([0-9.]+)`) - if err != nil { - return 0.0, err - } - conf := re.FindStringSubmatch(s) - return strconv.ParseFloat(conf[1], 64) -} - -func boxCoords(s string) ([4]int, error) { - var coords [4]int - re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`) - if err != nil { - return coords, err - } - coordstr := re.FindStringSubmatch(s) - for i := range coords { - c, err := strconv.Atoi(coordstr[i+1]) - if err != nil { - return coords, err - } - coords[i] = c - } - return coords, nil -} - -func noText(s string) bool { - t := strings.Trim(s, " \n") - return len(t) == 0 -} - -func Parse(b []byte) (Hocr, error) { - var hocr Hocr - - err := xml.Unmarshal(b, &hocr) - if err != nil { - return hocr, err - } - - return hocr, nil -} - -func GetText(hocrfn string) (string, error) { - var s string - - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return s, err - } - - h, err := Parse(file) - if err != nil { - return s, err - } - - - for _, l := range h.Lines { - s += getLineText(l) - } - return s, nil -} - -func GetAvgConf(hocrfn string) (float64, error) { - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return 0, err - } - - h, err := Parse(file) - if err != nil { - return 0, err - } - - var total, num float64 - for _, l := range h.Lines { - for _, w := range l.Words { - c, err := wordConf(w.Title) - if err != nil { - return 0, err - } - total += c - num++ - } - } - if num == 0 { - return 0, errors.New("No words found") - } - return total / num, nil -} diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go deleted file mode 100644 index 74e8f9a..0000000 --- a/lib/hocr/lines.go +++ /dev/null @@ -1,131 +0,0 @@ -package hocr - -// TODO: Parse line name to zero pad line numbers, so they can -// be sorted easily - -import ( - "image" - "image/png" - "io/ioutil" - "log" - "os" - "path/filepath" - "strings" - - "rescribe.xyz/go.git/lib/line" -) - -func getLineText(l OcrLine) (string) { - linetext := "" - - linetext = l.Text - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - linetext += w.Text + " " - } - } - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - for _, c := range w.Chars { - if c.Class != "ocrx_cinfo" { - continue - } - linetext += c.Text - } - linetext += " " - } - } - linetext = strings.TrimRight(linetext, " ") - linetext += "\n" - return linetext -} - -func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) { - lines := make(line.Details, 0) - - for _, l := range h.Lines { - totalconf := float64(0) - num := 0 - for _, w := range l.Words { - c, err := wordConf(w.Title) - if err != nil { - return lines, err - } - num++ - totalconf += c - } - - coords, err := boxCoords(l.Title) - if err != nil { - return lines, err - } - - var ln line.Detail - ln.Name = l.Id - ln.Avgconf = (totalconf / float64(num)) / 100 - ln.Text = getLineText(l) - ln.OcrName = name - if i != nil { - var imgd line.ImgDirect - imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) - ln.Img = imgd - } - lines = append(lines, ln) - } - return lines, nil -} - -func GetLineDetails(hocrfn string) (line.Details, error) { - var newlines line.Details - - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return newlines, err - } - - h, err := Parse(file) - if err != nil { - return newlines, err - } - - var img image.Image - pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1) - pngf, err := os.Open(pngfn) - if err != nil { - log.Println("Warning: can't open image %s\n", pngfn) - } else { - defer pngf.Close() - img, err = png.Decode(pngf) - if err != nil { - log.Println("Warning: can't load image %s\n", pngfn) - } - } - - n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) - return parseLineDetails(h, img, n) -} - -func GetLineBasics(hocrfn string) (line.Details, error) { - var newlines line.Details - - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return newlines, err - } - - h, err := Parse(file) - if err != nil { - return newlines, err - } - - n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) - return parseLineDetails(h, nil, n) -} diff --git a/lib/line/line.go b/lib/line/line.go deleted file mode 100644 index d4e3e44..0000000 --- a/lib/line/line.go +++ /dev/null @@ -1,57 +0,0 @@ -package line - -import ( - "image" - "image/png" - "io" - "os" -) - -type Detail struct { - Name string - Avgconf float64 - Img CopyableImg - Text string - OcrName string -} - -type CopyableImg interface { - CopyLineTo(io.Writer) error -} - -type Details []Detail - -func (l Details) Len() int { return len(l) } -func (l Details) Less(i, j int) bool { return l[i].Avgconf < l[j].Avgconf } -func (l Details) Swap(i, j int) { l[i], l[j] = l[j], l[i] } - -// This is an implementation of the CopyableImg interface that -// stores the image directly as an image.Image -type ImgDirect struct { - Img image.Image -} - -func (i ImgDirect) CopyLineTo(w io.Writer) error { - err := png.Encode(w, i.Img) - if err != nil { - return err - } - return nil -} - -// This is an implementation of the CopyableImg interface that -// stores the path of an image -type ImgPath struct { - Path string -} - -func (i ImgPath) CopyLineTo(w io.Writer) error { - f, err := os.Open(i.Path) - if err != nil { - return err - } - defer f.Close() - - _, err = io.Copy(w, f) - return err -} diff --git a/lib/prob/prob.go b/lib/prob/prob.go deleted file mode 100644 index 31a484d..0000000 --- a/lib/prob/prob.go +++ /dev/null @@ -1,69 +0,0 @@ -package prob - -import ( - "io/ioutil" - "path/filepath" - "strconv" - "strings" - - "rescribe.xyz/go.git/lib/line" -) - -func getLineAvg(f string) (float64, error) { - totalconf := float64(0) - num := 0 - - prob, err := ioutil.ReadFile(f) - if err != nil { - return 0, err - } - - for _, l := range strings.Split(string(prob), "\n") { - fields := strings.Fields(l) - - if len(fields) == 2 { - conf, err := strconv.ParseFloat(fields[1], 64) - if err != nil { - continue - } - totalconf += conf - num += 1 - } - } - if num <= 0 { - return 0, nil - } - avg := totalconf / float64(num) - return avg, nil -} - -// Note this only processes one line at a time -func GetLineDetails(probfn string) (line.Details, error) { - var l line.Detail - lines := make(line.Details, 0) - - avg, err := getLineAvg(probfn) - if err != nil { - return lines, err - } - - filebase := strings.Replace(probfn, ".prob", "", 1) - - txt, err := ioutil.ReadFile(filebase + ".txt") - if err != nil { - return lines, err - } - - l.Name = filepath.Base(filebase) - l.Avgconf = avg - l.Text = string(txt) - l.OcrName = filepath.Base(filepath.Dir(filebase)) - - var imgfn line.ImgPath - imgfn.Path = filebase + ".bin.png" - l.Img = imgfn - - lines = append(lines, l) - - return lines, nil -} diff --git a/pgconf/main.go b/pgconf/main.go deleted file mode 100644 index bc09c23..0000000 --- a/pgconf/main.go +++ /dev/null @@ -1,30 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "os" - - "rescribe.xyz/go.git/lib/hocr" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: pgconf hocr\n") - fmt.Fprintf(os.Stderr, "Prints the total confidence for a page, as an average of the confidence of each word.\n") - flag.PrintDefaults() - } - flag.Parse() - if flag.NArg() != 1 { - flag.Usage() - os.Exit(1) - } - - avg, err := hocr.GetAvgConf(flag.Arg(0)) - if err != nil { - log.Fatalf("Error retreiving confidence for %s: %v\n", flag.Arg(0), err) - } - - fmt.Printf("%0.0f\n", avg) -} diff --git a/preproc/cmd/binarize/main.go b/preproc/cmd/binarize/main.go deleted file mode 100644 index e7f677e..0000000 --- a/preproc/cmd/binarize/main.go +++ /dev/null @@ -1,78 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/preproc" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: binarize [-k num] [-t type] [-w num] inimg outimg\n") - flag.PrintDefaults() - } - wsize := flag.Int("w", 0, "Window size for sauvola algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("t", "binary", "Type of threshold. binary or zeroinv are currently implemented.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *wsize == 0 { - *wsize = autowsize(b) - log.Printf("Set window size to %d\n", *wsize) - } - - if *wsize%2 == 0 { - *wsize++ - } - - // TODO: come up with a way to set a good ksize automatically - - var thresh image.Image - thresh = preproc.IntegralSauvola(gray, *ksize, *wsize) - - if *btype == "zeroinv" { - thresh, err = preproc.BinToZeroInv(thresh.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, thresh) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/preproc/cmd/preproc/main.go b/preproc/cmd/preproc/main.go deleted file mode 100644 index 1c248e0..0000000 --- a/preproc/cmd/preproc/main.go +++ /dev/null @@ -1,90 +0,0 @@ -package main - -// TODO: come up with a way to set a good ksize automatically - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/preproc" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: preproc [-bt bintype] [-bw winsize] [-k num] [-m minperc] [-nowipe] [-wt wipethresh] [-ws wipesize] inimg outimg\n") - fmt.Fprintf(os.Stderr, "Binarize and preprocess an image\n") - flag.PrintDefaults() - } - binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.") - ksize := flag.Float64("k", 0.5, "K for sauvola binarization algorithm. This controls the overall threshold level. Set it lower for very light text (try 0.1 or 0.2).") - btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - nowipe := flag.Bool("nowipe", false, "Disable wiping completely.") - wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.") - thresh := flag.Float64("wt", 0.05, "Threshold for the wiping algorithm to determine the proportion of black pixels below which a window is determined to be the edge.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *binwsize == 0 { - *binwsize = autowsize(b) - } - - if *binwsize%2 == 0 { - *binwsize++ - } - - log.Print("Binarising") - var clean, threshimg image.Image - threshimg = preproc.IntegralSauvola(gray, *ksize, *binwsize) - - if *btype == "zeroinv" { - threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - if !*nowipe { - log.Print("Wiping sides") - clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, *thresh, *min) - } else { - clean = threshimg - } - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/preproc/cmd/preprocmulti/main.go b/preproc/cmd/preprocmulti/main.go deleted file mode 100644 index c6c9fe4..0000000 --- a/preproc/cmd/preprocmulti/main.go +++ /dev/null @@ -1,101 +0,0 @@ -package main - -// TODO: come up with a way to set a good ksize automatically - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/integralimg" - "rescribe.xyz/go.git/preproc" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -func main() { - ksizes := []float64{0.1, 0.2, 0.4, 0.5} - - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: preprocmulti [-bt bintype] [-bw winsize] [-m minperc] [-nowipe] [-ws wipesize] inimg outbase\n") - fmt.Fprintf(os.Stderr, "Binarize and preprocess an image, with multiple binarisation levels,\n") - fmt.Fprintf(os.Stderr, "saving images to outbase_bin{k}.png.\n") - fmt.Fprintf(os.Stderr, "Binarises with these levels for k: %v.\n", ksizes) - flag.PrintDefaults() - } - binwsize := flag.Int("bw", 0, "Window size for sauvola binarization algorithm. Set automatically based on resolution if not set.") - btype := flag.String("bt", "binary", "Type of binarization threshold. binary or zeroinv are currently implemented.") - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - nowipe := flag.Bool("nowipe", false, "Disable wiping completely.") - wipewsize := flag.Int("ws", 5, "Window size for wiping algorithm.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - log.Printf("Opening %s\n", flag.Arg(0)) - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if *binwsize == 0 { - *binwsize = autowsize(b) - } - - if *binwsize%2 == 0 { - *binwsize++ - } - - var clean, threshimg image.Image - log.Print("Precalculating integral images") - integrals := integralimg.ToAllIntegralImg(gray) - - for _, k := range ksizes { - log.Print("Binarising") - threshimg = preproc.PreCalcedSauvola(integrals, gray, k, *binwsize) - - if *btype == "zeroinv" { - threshimg, err = preproc.BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - log.Fatal(err) - } - } - - if !*nowipe { - log.Print("Wiping sides") - clean = preproc.Wipe(threshimg.(*image.Gray), *wipewsize, k*0.02, *min) - } else { - clean = threshimg - } - - savefn := fmt.Sprintf("%s_bin%0.1f.png", flag.Arg(1), k) - log.Printf("Saving %s\n", savefn) - f, err = os.Create(savefn) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", savefn, err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } - } -} diff --git a/preproc/cmd/wipe/main.go b/preproc/cmd/wipe/main.go deleted file mode 100644 index e5c039d..0000000 --- a/preproc/cmd/wipe/main.go +++ /dev/null @@ -1,55 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "log" - "os" - - "rescribe.xyz/go.git/preproc" -) - -func main() { - flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: wipe [-m minperc] [-t thresh] [-w winsize] inimg outimg\n") - fmt.Fprintf(os.Stderr, "Wipes the sections of an image which are outside the content area.\n") - flag.PrintDefaults() - } - min := flag.Int("m", 30, "Minimum percentage of the image width for the content width calculation to be considered valid.") - thresh := flag.Float64("t", 0.05, "Threshold for the proportion of black pixels below which a window is determined to be the edge. Higher means more aggressive wiping.") - wsize := flag.Int("w", 5, "Window size for mask finding algorithm.") - flag.Parse() - if flag.NArg() < 2 { - flag.Usage() - os.Exit(1) - } - - f, err := os.Open(flag.Arg(0)) - defer f.Close() - if err != nil { - log.Fatalf("Could not open file %s: %v\n", flag.Arg(0), err) - } - img, _, err := image.Decode(f) - if err != nil { - log.Fatalf("Could not decode image: %v\n", err) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - clean := preproc.Wipe(gray, *wsize, *thresh, *min) - - f, err = os.Create(flag.Arg(1)) - if err != nil { - log.Fatalf("Could not create file %s: %v\n", flag.Arg(1), err) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - log.Fatalf("Could not encode image: %v\n", err) - } -} diff --git a/preproc/preprocmulti.go b/preproc/preprocmulti.go deleted file mode 100644 index 2e7cb06..0000000 --- a/preproc/preprocmulti.go +++ /dev/null @@ -1,94 +0,0 @@ -package preproc - -// TODO: come up with a way to set a good ksize automatically - -import ( - "fmt" - "image" - "image/draw" - _ "image/jpeg" - "image/png" - "os" - "strings" - - "rescribe.xyz/go.git/integralimg" -) - -// TODO: do more testing to see how good this assumption is -func autowsize(bounds image.Rectangle) int { - return bounds.Dx() / 60 -} - -// PreProcMulti binarizes and preprocesses an image with multiple binarisation levels. -// inPath: Path of input image. -// ksizes: Slice of k values to pass to Sauvola algorithm -// binType: Type of binarization threshold. binary or zeroinv are currently implemented. -// binWsize: Window size for sauvola binarization algorithm. Set automatically based on resolution if 0. -// wipe: Whether to wipe (clear sides) the image -// wipeWsize: Window size for wiping algorithm -// wipeMinWidthPerc: Minimum percentage of the image width for the content width calculation to be considered valid -// Note: copied from cmd/preprocmulti/main.go, should think about the best way -// to organise this code later. -// TODO: return errors that encapsulate the err describing where it was encountered -// TODO: do the post-integral image stuff in separate goroutines for speed -func PreProcMulti(inPath string, ksizes []float64, binType string, binWsize int, wipe bool, wipeWsize int, wipeMinWidthPerc int) ([]string, error) { - // Make outBase inPath up to final . - s := strings.Split(inPath, ".") - outBase := strings.Join(s[:len(s)-1], "") - - var donePaths []string - - f, err := os.Open(inPath) - if err != nil { - return donePaths, err - } - defer f.Close() - img, _, err := image.Decode(f) - if err != nil { - return donePaths, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - if binWsize == 0 { - binWsize = autowsize(b) - } - - if binWsize%2 == 0 { - binWsize++ - } - - var clean, threshimg image.Image - integrals := integralimg.ToAllIntegralImg(gray) - - for _, k := range ksizes { - threshimg = PreCalcedSauvola(integrals, gray, k, binWsize) - - if binType == "zeroinv" { - threshimg, err = BinToZeroInv(threshimg.(*image.Gray), img.(*image.RGBA)) - if err != nil { - return donePaths, err - } - } - - if wipe { - clean = Wipe(threshimg.(*image.Gray), wipeWsize, k*0.02, wipeMinWidthPerc) - } else { - clean = threshimg - } - - savefn := fmt.Sprintf("%s_bin%0.1f.png", outBase, k) - f, err = os.Create(savefn) - if err != nil { - return donePaths, err - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - return donePaths, err - } - donePaths = append(donePaths, savefn) - } - return donePaths, nil -} diff --git a/preproc/sauvola.go b/preproc/sauvola.go deleted file mode 100644 index 046bb7d..0000000 --- a/preproc/sauvola.go +++ /dev/null @@ -1,76 +0,0 @@ -package preproc - -import ( - "image" - "image/color" - - "rescribe.xyz/go.git/integralimg" -) - -// Implements Sauvola's algorithm for text binarization, see paper -// "Adaptive document image binarization" (2000) -func Sauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - window := surrounding(img, x, y, windowsize) - m, dev := meanstddev(window) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// Implements Sauvola's algorithm using Integral Images, see paper -// "Efficient Implementation of Local Adaptive Thresholding Techniques Using Integral Images" -// and -// https://stackoverflow.com/questions/13110733/computing-image-integral -func IntegralSauvola(img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - integrals := integralimg.ToAllIntegralImg(img) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integrals.MeanStdDevWindow(x, y, windowsize) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} - -// PreCalcedSauvola Implements Sauvola's algorithm using precalculated Integral Images -// TODO: have this be the root function that the other two reference -func PreCalcedSauvola(integrals integralimg.WithSq, img *image.Gray, ksize float64, windowsize int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - m, dev := integrals.MeanStdDevWindow(x, y, windowsize) - threshold := m * (1 + ksize*((dev/128)-1)) - if img.GrayAt(x, y).Y < uint8(threshold) { - new.SetGray(x, y, color.Gray{0}) - } else { - new.SetGray(x, y, color.Gray{255}) - } - } - } - - return new -} diff --git a/preproc/sauvola_test.go b/preproc/sauvola_test.go deleted file mode 100644 index 2331e10..0000000 --- a/preproc/sauvola_test.go +++ /dev/null @@ -1,70 +0,0 @@ -package preproc - -import ( - "flag" - "fmt" - "image" - "image/png" - "os" - "testing" -) - -func TestBinarization(t *testing.T) { - var slow = flag.Bool("slow", false, "include slow tests") - var update = flag.Bool("updatesauvola", false, "update golden files") - - cases := []struct { - name string - orig string - golden string - ksize float64 - wsize int - }{ - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w41.png", 0.5, 41}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.5_w19.png", 0.5, 19}, - {"integralsauvola", "testdata/pg1.png", "testdata/pg1_integralsauvola_k0.3_w19.png", 0.3, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w41.png", 0.5, 41}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.5_w19.png", 0.5, 19}, - {"sauvola", "testdata/pg1.png", "testdata/pg1_sauvola_k0.3_w19.png", 0.3, 19}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.1f_%d", c.name, c.ksize, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - switch c.name { - case "integralsauvola": - actual = IntegralSauvola(orig, c.ksize, c.wsize) - case "sauvola": - if *slow { - actual = Sauvola(orig, c.ksize, c.wsize) - } else { - t.Skip("Skipping slow test; use -slow to run it.\n") - } - default: - t.Fatalf("No method %s\n", c.name) - } - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if !imgsequal(golden, actual) { - t.Errorf("Binarized %s differs to %s\n", c.orig, c.golden) - } - }) - } -} diff --git a/preproc/test_helpers.go b/preproc/test_helpers.go deleted file mode 100644 index 20de5b1..0000000 --- a/preproc/test_helpers.go +++ /dev/null @@ -1,53 +0,0 @@ -package preproc - -// TODO: add different pages as test cases -// TODO: test non integral img version - -import ( - "image" - "image/draw" - "image/png" - "os" -) - -func decode(s string) (*image.Gray, error) { - f, err := os.Open(s) - defer f.Close() - if err != nil { - return nil, err - } - img, err := png.Decode(f) - if err != nil { - return nil, err - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - return gray, nil -} - -func imgsequal(img1 *image.Gray, img2 *image.Gray) bool { - b := img1.Bounds() - if !b.Eq(img2.Bounds()) { - return false - } - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - r0, g0, b0, a0 := img1.At(x, y).RGBA() - r1, g1, b1, a1 := img2.At(x, y).RGBA() - if r0 != r1 { - return false - } - if g0 != g1 { - return false - } - if b0 != b1 { - return false - } - if a0 != a1 { - return false - } - } - } - return true -} diff --git a/preproc/testdata/pg1.png b/preproc/testdata/pg1.png Binary files differdeleted file mode 100644 index 2bcc4b1..0000000 --- a/preproc/testdata/pg1.png +++ /dev/null diff --git a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png b/preproc/testdata/pg1_integralsauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bdf5712..0000000 --- a/preproc/testdata/pg1_integralsauvola_k0.3_w19.png +++ /dev/null diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png b/preproc/testdata/pg1_integralsauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 5db2d9a..0000000 --- a/preproc/testdata/pg1_integralsauvola_k0.5_w19.png +++ /dev/null diff --git a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png b/preproc/testdata/pg1_integralsauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index 050d037..0000000 --- a/preproc/testdata/pg1_integralsauvola_k0.5_w41.png +++ /dev/null diff --git a/preproc/testdata/pg1_sauvola_k0.3_w19.png b/preproc/testdata/pg1_sauvola_k0.3_w19.png Binary files differdeleted file mode 100644 index bcd595f..0000000 --- a/preproc/testdata/pg1_sauvola_k0.3_w19.png +++ /dev/null diff --git a/preproc/testdata/pg1_sauvola_k0.5_w19.png b/preproc/testdata/pg1_sauvola_k0.5_w19.png Binary files differdeleted file mode 100644 index 8de596c..0000000 --- a/preproc/testdata/pg1_sauvola_k0.5_w19.png +++ /dev/null diff --git a/preproc/testdata/pg1_sauvola_k0.5_w41.png b/preproc/testdata/pg1_sauvola_k0.5_w41.png Binary files differdeleted file mode 100644 index b8f50e0..0000000 --- a/preproc/testdata/pg1_sauvola_k0.5_w41.png +++ /dev/null diff --git a/preproc/testdata/pg2.png b/preproc/testdata/pg2.png Binary files differdeleted file mode 100644 index c7c4249..0000000 --- a/preproc/testdata/pg2.png +++ /dev/null diff --git a/preproc/testdata/pg2_integralwipesides_t0.02_w5.png b/preproc/testdata/pg2_integralwipesides_t0.02_w5.png Binary files differdeleted file mode 100644 index 6b4ccb2..0000000 --- a/preproc/testdata/pg2_integralwipesides_t0.02_w5.png +++ /dev/null diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w25.png b/preproc/testdata/pg2_integralwipesides_t0.05_w25.png Binary files differdeleted file mode 100644 index 39dc88d..0000000 --- a/preproc/testdata/pg2_integralwipesides_t0.05_w25.png +++ /dev/null diff --git a/preproc/testdata/pg2_integralwipesides_t0.05_w5.png b/preproc/testdata/pg2_integralwipesides_t0.05_w5.png Binary files differdeleted file mode 100644 index 50df855..0000000 --- a/preproc/testdata/pg2_integralwipesides_t0.05_w5.png +++ /dev/null diff --git a/preproc/util.go b/preproc/util.go deleted file mode 100644 index e23829d..0000000 --- a/preproc/util.go +++ /dev/null @@ -1,95 +0,0 @@ -package preproc - -import ( - "errors" - "image" - "math" -) - -// TODO: name better; maybe verb, x-er -// TODO: implement these for regular image, and use them to make -// image functions generic for integral and non- images -type UsefulImg interface { - MeanWindow() - MeanStdDevWindow() -} - -func mean(i []int) float64 { - sum := 0 - for _, n := range i { - sum += n - } - return float64(sum) / float64(len(i)) -} - -func stddev(i []int) float64 { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := sum / float64(len(i)-1) - return math.Sqrt(variance) -} - -func meanstddev(i []int) (float64, float64) { - m := mean(i) - - var sum float64 - for _, n := range i { - sum += (float64(n) - m) * (float64(n) - m) - } - variance := float64(sum) / float64(len(i)-1) - return m, math.Sqrt(variance) -} - -// gets the pixel values surrounding a point in the image -func surrounding(img *image.Gray, x int, y int, size int) []int { - b := img.Bounds() - step := size / 2 - - miny := y - step - if miny < b.Min.Y { - miny = b.Min.Y - } - minx := x - step - if minx < b.Min.X { - minx = b.Min.X - } - maxy := y + step - if maxy > b.Max.Y { - maxy = b.Max.Y - } - maxx := x + step - if maxx > b.Max.X { - maxx = b.Max.X - } - - var s []int - for yi := miny; yi <= maxy; yi++ { - for xi := minx; xi <= maxx; xi++ { - s = append(s, int(img.GrayAt(xi, yi).Y)) - } - } - return s -} - -func BinToZeroInv(bin *image.Gray, orig *image.RGBA) (*image.RGBA, error) { - b := bin.Bounds() - if !b.Eq(orig.Bounds()) { - return orig, errors.New("bin and orig images need to be the same dimensions") - } - newimg := image.NewRGBA(image.Rect(0, 0, b.Dx(), b.Dy())) - for y := b.Min.Y; y < b.Max.Y; y++ { - for x := b.Min.X; x < b.Max.X; x++ { - if bin.GrayAt(x, y).Y == 255 { - newimg.Set(x, y, bin.GrayAt(x, y)) - } else { - newimg.Set(x, y, orig.At(x, y)) - } - } - } - - return newimg, nil -} diff --git a/preproc/wipesides.go b/preproc/wipesides.go deleted file mode 100644 index 3d08053..0000000 --- a/preproc/wipesides.go +++ /dev/null @@ -1,160 +0,0 @@ -package preproc - -// TODO: add minimum size variable (default ~30%?) -// TODO: switch to an interface rather than integralimg.I - -import ( - "errors" - "fmt" - "image" - "image/color" - "image/draw" - _ "image/jpeg" - "image/png" - "os" - - "rescribe.xyz/go.git/integralimg" -) - -// returns the proportion of the given window that is black pixels -func proportion(i integralimg.I, x int, size int) float64 { - w := i.GetVerticalWindow(x, size) - return w.Proportion() -} - -// findbestedge goes through every vertical line from x to x+w to -// find the one with the lowest proportion of black pixels. -func findbestedge(img integralimg.I, x int, w int) int { - var bestx int - var best float64 - - if w == 1 { - return x - } - - right := x + w - for ; x < right; x++ { - prop := proportion(img, x, 1) - if prop > best { - best = prop - bestx = x - } - } - - return bestx -} - -// findedges finds the edges of the main content, by moving a window of wsize -// from near the middle of the image to the left and right, stopping when it reaches -// a point at which there is a lower proportion of black pixels than thresh. -func findedges(img integralimg.I, wsize int, thresh float64) (int, int) { - maxx := len(img[0]) - 1 - var lowedge, highedge int = 0, maxx - - // don't start at the middle, as this will fail for 2 column layouts, - // start 10% left or right of the middle - notcentre := maxx / 10 - - for x := maxx/2 + notcentre; x < maxx-wsize; x++ { - if proportion(img, x, wsize) <= thresh { - highedge = findbestedge(img, x, wsize) - break - } - } - - for x := maxx/2 - notcentre; x > 0; x-- { - if proportion(img, x, wsize) <= thresh { - lowedge = findbestedge(img, x, wsize) - break - } - } - - return lowedge, highedge -} - -// wipesides fills the sections of image not within the boundaries -// of lowedge and highedge with white -func wipesides(img *image.Gray, lowedge int, highedge int) *image.Gray { - b := img.Bounds() - new := image.NewGray(b) - - // set left edge white - for x := b.Min.X; x < lowedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - // copy middle - for x := lowedge; x < highedge; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, img.GrayAt(x, y)) - } - } - // set right edge white - for x := highedge; x < b.Max.X; x++ { - for y := b.Min.Y; y < b.Max.Y; y++ { - new.SetGray(x, y, color.Gray{255}) - } - } - - return new -} - -// toonarrow checks whether the area between lowedge and highedge is -// less than min % of the total image width -func toonarrow(img *image.Gray, lowedge int, highedge int, min int) bool { - b := img.Bounds() - imgw := b.Max.X - b.Min.X - wipew := highedge - lowedge - if float64(wipew)/float64(imgw)*100 < float64(min) { - return true - } - return false -} - -// Wipe fills the sections of image which fall outside the content -// area with white, providing the content area is above min % -func Wipe(img *image.Gray, wsize int, thresh float64, min int) *image.Gray { - integral := integralimg.ToIntegralImg(img) - lowedge, highedge := findedges(integral, wsize, thresh) - if toonarrow(img, lowedge, highedge, min) { - return img - } - return wipesides(img, lowedge, highedge) -} - -// WipeFile wipes an image file, filling the sections of the image -// which fall outside the content area with white, providing the -// content area is above min %. -// inPath: path of the input image. -// outPath: path to save the output image. -// wsize: window size for wipe algorithm. -// thresh: threshold for wipe algorithm. -// min: minimum % of content area width to consider valid. -func WipeFile(inPath string, outPath string, wsize int, thresh float64, min int) error { - f, err := os.Open(inPath) - defer f.Close() - if err != nil { - return errors.New(fmt.Sprintf("Could not open file %s: %v", inPath, err)) - } - img, _, err := image.Decode(f) - if err != nil { - return errors.New(fmt.Sprintf("Could not decode image: %v", err)) - } - b := img.Bounds() - gray := image.NewGray(image.Rect(0, 0, b.Dx(), b.Dy())) - draw.Draw(gray, b, img, b.Min, draw.Src) - - clean := Wipe(gray, wsize, thresh, min) - - f, err = os.Create(outPath) - if err != nil { - return errors.New(fmt.Sprintf("Could not create file %s: %v", outPath, err)) - } - defer f.Close() - err = png.Encode(f, clean) - if err != nil { - return errors.New(fmt.Sprintf("Could not encode image: %v", err)) - } - return nil -} diff --git a/preproc/wipesides_test.go b/preproc/wipesides_test.go deleted file mode 100644 index d5464e0..0000000 --- a/preproc/wipesides_test.go +++ /dev/null @@ -1,57 +0,0 @@ -package preproc - -// TODO: add different pages as test cases -// TODO: test non integral img version - -import ( - "flag" - "fmt" - "image" - "image/png" - "os" - "testing" -) - -func TestWipeSides(t *testing.T) { - var update = flag.Bool("updatewipe", false, "update golden files") - cases := []struct { - name string - orig string - golden string - thresh float64 - wsize int - }{ - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.02_w5.png", 0.02, 5}, - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w5.png", 0.05, 5}, - {"integralwipesides", "testdata/pg2.png", "testdata/pg2_integralwipesides_t0.05_w25.png", 0.05, 25}, - } - - for _, c := range cases { - t.Run(fmt.Sprintf("%s_%0.2f_%d", c.name, c.thresh, c.wsize), func(t *testing.T) { - var actual *image.Gray - orig, err := decode(c.orig) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.orig, err) - } - actual = Wipe(orig, c.wsize, c.thresh) - if *update { - f, err := os.Create(c.golden) - defer f.Close() - if err != nil { - t.Fatalf("Could not open file %s to update: %v\n", c.golden, err) - } - err = png.Encode(f, actual) - if err != nil { - t.Fatalf("Could not encode update of %s: %v\n", c.golden, err) - } - } - golden, err := decode(c.golden) - if err != nil { - t.Fatalf("Could not open file %s: %v\n", c.golden, err) - } - if !imgsequal(golden, actual) { - t.Errorf("Processed %s differs to %s\n", c.orig, c.golden) - } - }) - } -} |