summaryrefslogtreecommitdiff
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/hocr/hocr.go129
-rw-r--r--pkg/hocr/lines.go131
-rw-r--r--pkg/line/line.go57
-rw-r--r--pkg/prob/prob.go69
4 files changed, 386 insertions, 0 deletions
diff --git a/pkg/hocr/hocr.go b/pkg/hocr/hocr.go
new file mode 100644
index 0000000..dcd0494
--- /dev/null
+++ b/pkg/hocr/hocr.go
@@ -0,0 +1,129 @@
+package hocr
+
+import (
+ "encoding/xml"
+ "errors"
+ "io/ioutil"
+ "regexp"
+ "strconv"
+ "strings"
+)
+
+type Hocr struct {
+ Lines []OcrLine `xml:"body>div>div>p>span"`
+}
+
+type OcrLine struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Words []OcrWord `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+type OcrWord struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Chars []OcrChar `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+type OcrChar struct {
+ Class string `xml:"class,attr"`
+ Id string `xml:"id,attr"`
+ Title string `xml:"title,attr"`
+ Chars []OcrChar `xml:"span"`
+ Text string `xml:",chardata"`
+}
+
+// Returns the confidence for a word based on its x_wconf value
+func wordConf(s string) (float64, error) {
+ re, err := regexp.Compile(`x_wconf ([0-9.]+)`)
+ if err != nil {
+ return 0.0, err
+ }
+ conf := re.FindStringSubmatch(s)
+ return strconv.ParseFloat(conf[1], 64)
+}
+
+func boxCoords(s string) ([4]int, error) {
+ var coords [4]int
+ re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`)
+ if err != nil {
+ return coords, err
+ }
+ coordstr := re.FindStringSubmatch(s)
+ for i := range coords {
+ c, err := strconv.Atoi(coordstr[i+1])
+ if err != nil {
+ return coords, err
+ }
+ coords[i] = c
+ }
+ return coords, nil
+}
+
+func noText(s string) bool {
+ t := strings.Trim(s, " \n")
+ return len(t) == 0
+}
+
+func Parse(b []byte) (Hocr, error) {
+ var hocr Hocr
+
+ err := xml.Unmarshal(b, &hocr)
+ if err != nil {
+ return hocr, err
+ }
+
+ return hocr, nil
+}
+
+func GetText(hocrfn string) (string, error) {
+ var s string
+
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return s, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return s, err
+ }
+
+
+ for _, l := range h.Lines {
+ s += getLineText(l)
+ }
+ return s, nil
+}
+
+func GetAvgConf(hocrfn string) (float64, error) {
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return 0, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return 0, err
+ }
+
+ var total, num float64
+ for _, l := range h.Lines {
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return 0, err
+ }
+ total += c
+ num++
+ }
+ }
+ if num == 0 {
+ return 0, errors.New("No words found")
+ }
+ return total / num, nil
+}
diff --git a/pkg/hocr/lines.go b/pkg/hocr/lines.go
new file mode 100644
index 0000000..e90b0a8
--- /dev/null
+++ b/pkg/hocr/lines.go
@@ -0,0 +1,131 @@
+package hocr
+
+// TODO: Parse line name to zero pad line numbers, so they can
+// be sorted easily
+
+import (
+ "image"
+ "image/png"
+ "io/ioutil"
+ "log"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "rescribe.xyz/utils/pkg/line"
+)
+
+func getLineText(l OcrLine) (string) {
+ linetext := ""
+
+ linetext = l.Text
+ if noText(linetext) {
+ linetext = ""
+ for _, w := range l.Words {
+ if w.Class != "ocrx_word" {
+ continue
+ }
+ linetext += w.Text + " "
+ }
+ }
+ if noText(linetext) {
+ linetext = ""
+ for _, w := range l.Words {
+ if w.Class != "ocrx_word" {
+ continue
+ }
+ for _, c := range w.Chars {
+ if c.Class != "ocrx_cinfo" {
+ continue
+ }
+ linetext += c.Text
+ }
+ linetext += " "
+ }
+ }
+ linetext = strings.TrimRight(linetext, " ")
+ linetext += "\n"
+ return linetext
+}
+
+func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) {
+ lines := make(line.Details, 0)
+
+ for _, l := range h.Lines {
+ totalconf := float64(0)
+ num := 0
+ for _, w := range l.Words {
+ c, err := wordConf(w.Title)
+ if err != nil {
+ return lines, err
+ }
+ num++
+ totalconf += c
+ }
+
+ coords, err := boxCoords(l.Title)
+ if err != nil {
+ return lines, err
+ }
+
+ var ln line.Detail
+ ln.Name = l.Id
+ ln.Avgconf = (totalconf / float64(num)) / 100
+ ln.Text = getLineText(l)
+ ln.OcrName = name
+ if i != nil {
+ var imgd line.ImgDirect
+ imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
+ ln.Img = imgd
+ }
+ lines = append(lines, ln)
+ }
+ return lines, nil
+}
+
+func GetLineDetails(hocrfn string) (line.Details, error) {
+ var newlines line.Details
+
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return newlines, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return newlines, err
+ }
+
+ var img image.Image
+ pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
+ pngf, err := os.Open(pngfn)
+ if err != nil {
+ log.Println("Warning: can't open image %s\n", pngfn)
+ } else {
+ defer pngf.Close()
+ img, err = png.Decode(pngf)
+ if err != nil {
+ log.Println("Warning: can't load image %s\n", pngfn)
+ }
+ }
+
+ n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
+ return parseLineDetails(h, img, n)
+}
+
+func GetLineBasics(hocrfn string) (line.Details, error) {
+ var newlines line.Details
+
+ file, err := ioutil.ReadFile(hocrfn)
+ if err != nil {
+ return newlines, err
+ }
+
+ h, err := Parse(file)
+ if err != nil {
+ return newlines, err
+ }
+
+ n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
+ return parseLineDetails(h, nil, n)
+}
diff --git a/pkg/line/line.go b/pkg/line/line.go
new file mode 100644
index 0000000..d4e3e44
--- /dev/null
+++ b/pkg/line/line.go
@@ -0,0 +1,57 @@
+package line
+
+import (
+ "image"
+ "image/png"
+ "io"
+ "os"
+)
+
+type Detail struct {
+ Name string
+ Avgconf float64
+ Img CopyableImg
+ Text string
+ OcrName string
+}
+
+type CopyableImg interface {
+ CopyLineTo(io.Writer) error
+}
+
+type Details []Detail
+
+func (l Details) Len() int { return len(l) }
+func (l Details) Less(i, j int) bool { return l[i].Avgconf < l[j].Avgconf }
+func (l Details) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
+
+// This is an implementation of the CopyableImg interface that
+// stores the image directly as an image.Image
+type ImgDirect struct {
+ Img image.Image
+}
+
+func (i ImgDirect) CopyLineTo(w io.Writer) error {
+ err := png.Encode(w, i.Img)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// This is an implementation of the CopyableImg interface that
+// stores the path of an image
+type ImgPath struct {
+ Path string
+}
+
+func (i ImgPath) CopyLineTo(w io.Writer) error {
+ f, err := os.Open(i.Path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ _, err = io.Copy(w, f)
+ return err
+}
diff --git a/pkg/prob/prob.go b/pkg/prob/prob.go
new file mode 100644
index 0000000..8bdb3d5
--- /dev/null
+++ b/pkg/prob/prob.go
@@ -0,0 +1,69 @@
+package prob
+
+import (
+ "io/ioutil"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "rescribe.xyz/utils/pkg/line"
+)
+
+func getLineAvg(f string) (float64, error) {
+ totalconf := float64(0)
+ num := 0
+
+ prob, err := ioutil.ReadFile(f)
+ if err != nil {
+ return 0, err
+ }
+
+ for _, l := range strings.Split(string(prob), "\n") {
+ fields := strings.Fields(l)
+
+ if len(fields) == 2 {
+ conf, err := strconv.ParseFloat(fields[1], 64)
+ if err != nil {
+ continue
+ }
+ totalconf += conf
+ num += 1
+ }
+ }
+ if num <= 0 {
+ return 0, nil
+ }
+ avg := totalconf / float64(num)
+ return avg, nil
+}
+
+// Note this only processes one line at a time
+func GetLineDetails(probfn string) (line.Details, error) {
+ var l line.Detail
+ lines := make(line.Details, 0)
+
+ avg, err := getLineAvg(probfn)
+ if err != nil {
+ return lines, err
+ }
+
+ filebase := strings.Replace(probfn, ".prob", "", 1)
+
+ txt, err := ioutil.ReadFile(filebase + ".txt")
+ if err != nil {
+ return lines, err
+ }
+
+ l.Name = filepath.Base(filebase)
+ l.Avgconf = avg
+ l.Text = string(txt)
+ l.OcrName = filepath.Base(filepath.Dir(filebase))
+
+ var imgfn line.ImgPath
+ imgfn.Path = filebase + ".bin.png"
+ l.Img = imgfn
+
+ lines = append(lines, l)
+
+ return lines, nil
+}