From 7482157a03ed3e9d7f45e54a126b391001f34948 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 8 Oct 2019 12:52:33 +0100 Subject: Separate out bookpipeline from catch-all go.git repo, and rename to rescribe.xyz/bookpipeline The dependencies from the go.git repo will follow in due course. --- lib/hocr/lines.go | 131 ------------------------------------------------------ 1 file changed, 131 deletions(-) delete mode 100644 lib/hocr/lines.go (limited to 'lib/hocr/lines.go') diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go deleted file mode 100644 index 74e8f9a..0000000 --- a/lib/hocr/lines.go +++ /dev/null @@ -1,131 +0,0 @@ -package hocr - -// TODO: Parse line name to zero pad line numbers, so they can -// be sorted easily - -import ( - "image" - "image/png" - "io/ioutil" - "log" - "os" - "path/filepath" - "strings" - - "rescribe.xyz/go.git/lib/line" -) - -func getLineText(l OcrLine) (string) { - linetext := "" - - linetext = l.Text - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - linetext += w.Text + " " - } - } - if noText(linetext) { - linetext = "" - for _, w := range l.Words { - if w.Class != "ocrx_word" { - continue - } - for _, c := range w.Chars { - if c.Class != "ocrx_cinfo" { - continue - } - linetext += c.Text - } - linetext += " " - } - } - linetext = strings.TrimRight(linetext, " ") - linetext += "\n" - return linetext -} - -func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) { - lines := make(line.Details, 0) - - for _, l := range h.Lines { - totalconf := float64(0) - num := 0 - for _, w := range l.Words { - c, err := wordConf(w.Title) - if err != nil { - return lines, err - } - num++ - totalconf += c - } - - coords, err := boxCoords(l.Title) - if err != nil { - return lines, err - } - - var ln line.Detail - ln.Name = l.Id - ln.Avgconf = (totalconf / float64(num)) / 100 - ln.Text = getLineText(l) - ln.OcrName = name - if i != nil { - var imgd line.ImgDirect - imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3])) - ln.Img = imgd - } - lines = append(lines, ln) - } - return lines, nil -} - -func GetLineDetails(hocrfn string) (line.Details, error) { - var newlines line.Details - - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return newlines, err - } - - h, err := Parse(file) - if err != nil { - return newlines, err - } - - var img image.Image - pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1) - pngf, err := os.Open(pngfn) - if err != nil { - log.Println("Warning: can't open image %s\n", pngfn) - } else { - defer pngf.Close() - img, err = png.Decode(pngf) - if err != nil { - log.Println("Warning: can't load image %s\n", pngfn) - } - } - - n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) - return parseLineDetails(h, img, n) -} - -func GetLineBasics(hocrfn string) (line.Details, error) { - var newlines line.Details - - file, err := ioutil.ReadFile(hocrfn) - if err != nil { - return newlines, err - } - - h, err := Parse(file) - if err != nil { - return newlines, err - } - - n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1) - return parseLineDetails(h, nil, n) -} -- cgit v1.2.1-24-ge1ad