summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.name>2019-01-23 18:22:49 +0000
committerNick White <git@njw.name>2019-01-23 18:22:49 +0000
commit2e5f7a80673c29b778f76f8f17896440ab75b4d4 (patch)
tree8b753f3c06a7238e0f4ac7576d49fac8527b162f
parent93a02d75156148ee69e223b6440e07ca581b643c (diff)
Start separating out functionality into separate package; working, but more to do, see TODO in hocr.go
-rw-r--r--hocr/hocr.go170
-rw-r--r--line-conf-buckets-tess/line-conf-buckets-tess.go204
2 files changed, 189 insertions, 185 deletions
diff --git a/hocr/hocr.go b/hocr/hocr.go
new file mode 100644
index 0000000..4384bad
--- /dev/null
+++ b/hocr/hocr.go
@@ -0,0 +1,170 @@
+package hocr
+
+// TODO: separate out linedetail to a general structure that can incorporate
+// line-conf-buckets too, in a different file (and rename package to
+// something more generic). Try to use interfaces to fill it with hocr
+// or ocropy data simply. Could design it so LineDetail is an interface,
+// that can do CopyLines() and BucketUp(). Actually only BucketUp() would
+// be needed, as copylines can be internal to that. Then can create more
+// methods as and when needed. BucketUp() can take some form of arguments
+// that can make it flexible, like []struct { dirname string, minconf float64 }
+// TODO: Parse line name to zero pad line numbers, so they come out in the correct order
+
+import (
+ "encoding/xml"
+ "image"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+type LineDetail struct {
+ Name string
+ Avgconf float64
+ Img image.Image
+ Text string
+ Hocrname string
+}
+
+type LineDetails []LineDetail
+
+// Used by sort.Sort.
+func (l LineDetails) Len() int { return len(l) }
+
+// Used by sort.Sort.
+func (l LineDetails) Less(i, j int) bool {
+ return l[i].Avgconf < l[j].Avgconf
+}
+
+// Used by sort.Sort.
+func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
+
+type Hocr struct {
+ Lines []OcrLine `xml:"body>div>div>p>span"`
+}
+
+type OcrLine struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Words []OcrWord `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+type OcrWord struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Chars []OcrChar `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+type OcrChar struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Chars []OcrChar `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+// Returns the confidence for a word based on its x_wconf value
+func wordConf(s string) (float64, error) {
+ re, err := regexp.Compile(`x_wconf ([0-9.]+)`)
+ if err != nil {
+ return 0.0, err
+ }
+ conf := re.FindStringSubmatch(s)
+ return strconv.ParseFloat(conf[1], 64)
+}
+
+func boxCoords(s string) ([4]int, error) {
+ var coords [4]int
+ re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`)
+ if err != nil {
+ return coords, err
+ }
+ coordstr := re.FindStringSubmatch(s)
+ for i := range coords {
+ c, err := strconv.Atoi(coordstr[i+1])
+ if err != nil {
+ return coords, err
+ }
+ coords[i] = c
+ }
+ return coords, nil
+}
+
+func noText(s string) bool {
+ t := strings.Trim(s, " \n")
+ return len(t) == 0
+}
+
+func Parse(b []byte) (Hocr, error) {
+ var hocr Hocr
+
+ err := xml.Unmarshal(b, &hocr)
+ if err != nil {
+ return hocr, err
+ }
+
+ return hocr, nil
+}
+
+func GetLineDetails(h Hocr, i image.Image, name string) (LineDetails, error) {
+ lines := make(LineDetails, 0)
+
+ for _, l := range h.Lines {
+ totalconf := float64(0)
+ num := 0
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return lines, err
+ }
+ num++
+ totalconf += c
+ }
+
+ coords, err := boxCoords(l.Title)
+ if err != nil {
+ return lines, err
+ }
+
+ var line LineDetail
+ line.Name = l.Id
+ line.Avgconf = (totalconf/float64(num)) / 100
+ linetext := ""
+
+ linetext = l.Text
+ if(noText(linetext)) {
+ linetext = ""
+ for _, w := range l.Words {
+ if(w.Class != "ocrx_word") {
+ continue
+ }
+ linetext += w.Text + " "
+ }
+ }
+ if(noText(linetext)) {
+ linetext = ""
+ for _, w := range l.Words {
+ if(w.Class != "ocrx_word") {
+ continue
+ }
+ for _, c := range w.Chars {
+ if(c.Class != "ocrx_cinfo") {
+ continue
+ }
+ linetext += c.Text
+ }
+ linetext += " "
+ }
+ }
+ line.Text = strings.TrimRight(linetext, " ")
+ line.Text += "\n"
+ line.Hocrname = name
+ line.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
+ lines = append(lines, line)
+ }
+ return lines, nil
+}
diff --git a/line-conf-buckets-tess/line-conf-buckets-tess.go b/line-conf-buckets-tess/line-conf-buckets-tess.go
index 3e33b58..1af8360 100644
--- a/line-conf-buckets-tess/line-conf-buckets-tess.go
+++ b/line-conf-buckets-tess/line-conf-buckets-tess.go
@@ -1,142 +1,24 @@
package main
-// TODO: combine this with line-conf-buckets, separating the parsing
-// out to a separate library, probably
-// see https://github.com/OCR-D/ocrd-train/issues/7 and https://github.com/OCR-D/ocrd-train/
+// TODO: see TODO in hocr package
//
// TODO: Simplify things into functions more; this works well, but is a bit of a rush job
-// TODO: Parse line name to zero pad line numbers, so they come out in the correct order
import (
- "encoding/xml"
"flag"
"fmt"
- "image"
"image/png"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
- "regexp"
"sort"
"strconv"
"strings"
-)
-
-type LineDetail struct {
- name string
- avgconf float64
- img image.Image
- text string
- hocrname string
-}
-
-type LineDetails []LineDetail
-
-// Used by sort.Sort.
-func (l LineDetails) Len() int { return len(l) }
-
-// Used by sort.Sort.
-func (l LineDetails) Less(i, j int) bool {
- return l[i].avgconf < l[j].avgconf
-}
-
-// Used by sort.Sort.
-func (l LineDetails) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
-
-func copyline(filebase string, dirname string, basename string, avgconf string, outdir string, todir string) (err error) {
- outname := filepath.Join(outdir, todir, filepath.Base(dirname) + "_" + basename + "_" + avgconf)
- //log.Fatalf("I'd use '%s' as outname, and '%s' as filebase\n", outname, filebase)
-
- for _, extn := range []string{".bin.png", ".txt"} {
- infile, err := os.Open(filebase + extn)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Failed to open %s\n", filebase + extn)
- return err
- }
- defer infile.Close()
-
- err = os.MkdirAll(filepath.Join(outdir, todir), 0700)
- if err != nil {
- return err
- }
- outfile, err := os.Create(outname + extn)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Failed to create %s\n", outname + extn)
- return err
- }
- defer outfile.Close()
-
- _, err = io.Copy(outfile, infile)
- if err != nil {
- return err
- }
- }
-
- return err
-}
-
-type Hocr struct {
- Lines []OcrLine `xml:"body>div>div>p>span"`
-}
-
-type OcrLine struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Words []OcrWord `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-type OcrWord struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Chars []OcrChar `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-type OcrChar struct {
- Class string `xml:"class,attr"`
- Id string `xml:"id,attr"`
- Title string `xml:"title,attr"`
- Chars []OcrChar `xml:"span"`
- Text string `xml:",chardata"`
-}
-
-// Returns the confidence for a word based on its x_wconf value
-func wordConf(s string) (float64, error) {
- re, err := regexp.Compile(`x_wconf ([0-9.]+)`)
- if err != nil {
- return 0.0, err
- }
- conf := re.FindStringSubmatch(s)
- return strconv.ParseFloat(conf[1], 64)
-}
-
-func boxCoords(s string) ([4]int, error) {
- var coords [4]int
- re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`)
- if err != nil {
- return coords, err
- }
- coordstr := re.FindStringSubmatch(s)
- for i := range coords {
- c, err := strconv.Atoi(coordstr[i+1])
- if err != nil {
- return coords, err
- }
- coords[i] = c
- }
- return coords, nil
-}
-
-func noText(s string) bool {
- t := strings.Trim(s, " \n")
- return len(t) == 0
-}
+ "git.rescribe.xyz/testingtools/hocr"
+)
func main() {
flag.Usage = func() {
@@ -151,9 +33,7 @@ func main() {
os.Exit(1)
}
- lines := make(LineDetails, 0)
-
- var hocr Hocr
+ lines := make(hocr.LineDetails, 0)
for _, f := range flag.Args() {
file, err := ioutil.ReadFile(f)
@@ -161,7 +41,7 @@ func main() {
log.Fatal(err)
}
- err = xml.Unmarshal(file, &hocr)
+ h, err := hocr.Parse(file)
if err != nil {
log.Fatal(err)
}
@@ -177,58 +57,13 @@ func main() {
log.Fatal(err)
}
- for _, l := range hocr.Lines {
- totalconf := float64(0)
- num := 0
- for _, w := range l.Words {
- c, err := wordConf(w.Title)
- if err != nil {
- log.Fatal(err)
- }
- num++
- totalconf += c
- }
-
- coords, err := boxCoords(l.Title)
- if err != nil {
- log.Fatal(err)
- }
-
- var line LineDetail
- line.name = l.Id
- line.avgconf = (totalconf/float64(num)) / 100
- linetext := ""
-
- linetext = l.Text
- if(noText(linetext)) {
- linetext = ""
- for _, w := range l.Words {
- if(w.Class != "ocrx_word") {
- continue
- }
- linetext += w.Text + " "
- }
- }
- if(noText(linetext)) {
- linetext = ""
- for _, w := range l.Words {
- if(w.Class != "ocrx_word") {
- continue
- }
- for _, c := range w.Chars {
- if(c.Class != "ocrx_cinfo") {
- continue
- }
- linetext += c.Text
- }
- linetext += " "
- }
- }
- line.text = strings.TrimRight(linetext, " ")
- line.text += "\n"
- line.hocrname = strings.Replace(filepath.Base(f), ".hocr", "", 1)
- line.img = img.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
- lines = append(lines, line)
+ n := strings.Replace(filepath.Base(f), ".hocr", "", 1)
+ newlines, err := hocr.GetLineDetails(h, img, n)
+ if err != nil {
+ log.Fatal(err)
+ }
+ for _, l := range newlines {
+ lines = append(lines, l)
}
}
@@ -243,10 +78,10 @@ func main() {
for _, l := range lines {
switch {
- case l.avgconf < 0.95:
+ case l.Avgconf < 0.95:
todir = "bad"
worstnum++
- case l.avgconf < 0.98:
+ case l.Avgconf < 0.98:
todir = "95to98"
mediumnum++
default:
@@ -254,10 +89,9 @@ func main() {
bestnum++
}
- avgstr := strconv.FormatFloat(l.avgconf, 'f', 5, 64)
+ avgstr := strconv.FormatFloat(l.Avgconf, 'f', 5, 64)
avgstr = avgstr[2:]
- fmt.Printf("Line: %s, avg: %f, avgstr: %s\n", l.name, l.avgconf, avgstr)
- outname := filepath.Join(outdir, todir, l.hocrname + "_" + l.name + "_" + avgstr + ".png")
+ outname := filepath.Join(outdir, todir, l.Hocrname + "_" + l.Name + "_" + avgstr + ".png")
err := os.MkdirAll(filepath.Join(outdir, todir), 0700)
if err != nil {
@@ -271,12 +105,12 @@ func main() {
}
defer outfile.Close()
- err = png.Encode(outfile, l.img)
+ err = png.Encode(outfile, l.Img)
if err != nil {
log.Fatal(err)
}
- outname = filepath.Join(outdir, todir, l.hocrname + "_" + l.name + "_" + avgstr + ".txt")
+ outname = filepath.Join(outdir, todir, l.Hocrname + "_" + l.Name + "_" + avgstr + ".txt")
outfile, err = os.Create(outname)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create %s\n", outname)
@@ -284,7 +118,7 @@ func main() {
}
defer outfile.Close()
- _, err = io.WriteString(outfile, l.text)
+ _, err = io.WriteString(outfile, l.Text)
if err != nil {
log.Fatal(err)
}