1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
|
package hocr
// TODO: Parse line name to zero pad line numbers, so they can
// be sorted easily
// TODO: have same filename format as .prob uses, so include base
// dirname, and don't include line numbers if there's only
// one line in the hocr
import (
"image"
"image/png"
"io/ioutil"
"os"
"path/filepath"
"strings"
"git.rescribe.xyz/testingtools/lib/line"
)
func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) {
lines := make(line.Details, 0)
for _, l := range h.Lines {
totalconf := float64(0)
num := 0
for _, w := range l.Words {
c, err := wordConf(w.Title)
if err != nil {
return lines, err
}
num++
totalconf += c
}
coords, err := boxCoords(l.Title)
if err != nil {
return lines, err
}
var ln line.Detail
ln.Name = l.Id
ln.Avgconf = (totalconf/float64(num)) / 100
linetext := ""
linetext = l.Text
if(noText(linetext)) {
linetext = ""
for _, w := range l.Words {
if(w.Class != "ocrx_word") {
continue
}
linetext += w.Text + " "
}
}
if(noText(linetext)) {
linetext = ""
for _, w := range l.Words {
if(w.Class != "ocrx_word") {
continue
}
for _, c := range w.Chars {
if(c.Class != "ocrx_cinfo") {
continue
}
linetext += c.Text
}
linetext += " "
}
}
ln.Text = strings.TrimRight(linetext, " ")
ln.Text += "\n"
ln.OcrName = name
var imgd line.ImgDirect
imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
ln.Img = imgd
lines = append(lines, ln)
}
return lines, nil
}
func GetLineDetails(hocrfn string) (line.Details, error) {
var newlines line.Details
file, err := ioutil.ReadFile(hocrfn)
if err != nil {
return newlines, err
}
h, err := Parse(file)
if err != nil {
return newlines, err
}
pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
pngf, err := os.Open(pngfn)
if err != nil {
return newlines, err
}
defer pngf.Close()
img, err := png.Decode(pngf)
if err != nil {
return newlines, err
}
n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
return parseLineDetails(h, img, n)
}
|