From 69aae6b93dcadd9e4895f86fe661ee80e79dcf9e Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Tue, 8 Oct 2019 15:49:52 +0100
Subject: Remove parts that have been moved elsewhere, and rename to
 rescribe.xyz/utils

bookpipeline is now at rescribe.xyz/bookpipeline
preproc is now at rescribe.xyz/preproc
integralimg is now at rescribe.xyz/preproc/integralimg
---
 lib/hocr/hocr.go  | 129 -----------------------------------------------------
 lib/hocr/lines.go | 131 ------------------------------------------------------
 2 files changed, 260 deletions(-)
 delete mode 100644 lib/hocr/hocr.go
 delete mode 100644 lib/hocr/lines.go

(limited to 'lib/hocr')

diff --git a/lib/hocr/hocr.go b/lib/hocr/hocr.go
deleted file mode 100644
index dcd0494..0000000
--- a/lib/hocr/hocr.go
+++ /dev/null
@@ -1,129 +0,0 @@
-package hocr
-
-import (
-	"encoding/xml"
-	"errors"
-	"io/ioutil"
-	"regexp"
-	"strconv"
-	"strings"
-)
-
-type Hocr struct {
-	Lines []OcrLine `xml:"body>div>div>p>span"`
-}
-
-type OcrLine struct {
-	Class string    `xml:"class,attr"`
-	Id    string    `xml:"id,attr"`
-	Title string    `xml:"title,attr"`
-	Words []OcrWord `xml:"span"`
-	Text  string    `xml:",chardata"`
-}
-
-type OcrWord struct {
-	Class string    `xml:"class,attr"`
-	Id    string    `xml:"id,attr"`
-	Title string    `xml:"title,attr"`
-	Chars []OcrChar `xml:"span"`
-	Text  string    `xml:",chardata"`
-}
-
-type OcrChar struct {
-	Class string    `xml:"class,attr"`
-	Id    string    `xml:"id,attr"`
-	Title string    `xml:"title,attr"`
-	Chars []OcrChar `xml:"span"`
-	Text  string    `xml:",chardata"`
-}
-
-// Returns the confidence for a word based on its x_wconf value
-func wordConf(s string) (float64, error) {
-	re, err := regexp.Compile(`x_wconf ([0-9.]+)`)
-	if err != nil {
-		return 0.0, err
-	}
-	conf := re.FindStringSubmatch(s)
-	return strconv.ParseFloat(conf[1], 64)
-}
-
-func boxCoords(s string) ([4]int, error) {
-	var coords [4]int
-	re, err := regexp.Compile(`bbox ([0-9]+) ([0-9]+) ([0-9]+) ([0-9]+)`)
-	if err != nil {
-		return coords, err
-	}
-	coordstr := re.FindStringSubmatch(s)
-	for i := range coords {
-		c, err := strconv.Atoi(coordstr[i+1])
-		if err != nil {
-			return coords, err
-		}
-		coords[i] = c
-	}
-	return coords, nil
-}
-
-func noText(s string) bool {
-	t := strings.Trim(s, " \n")
-	return len(t) == 0
-}
-
-func Parse(b []byte) (Hocr, error) {
-	var hocr Hocr
-
-	err := xml.Unmarshal(b, &hocr)
-	if err != nil {
-		return hocr, err
-	}
-
-	return hocr, nil
-}
-
-func GetText(hocrfn string) (string, error) {
-	var s string
-
-	file, err := ioutil.ReadFile(hocrfn)
-	if err != nil {
-		return s, err
-	}
-
-	h, err := Parse(file)
-	if err != nil {
-		return s, err
-	}
-
-
-	for _, l := range h.Lines {
-		s += getLineText(l)
-	}
-	return s, nil
-}
-
-func GetAvgConf(hocrfn string) (float64, error) {
-	file, err := ioutil.ReadFile(hocrfn)
-	if err != nil {
-		return 0, err
-	}
-
-	h, err := Parse(file)
-	if err != nil {
-		return 0, err
-	}
-
-	var total, num float64
-	for _, l := range h.Lines {
-		for _, w := range l.Words {
-			c, err := wordConf(w.Title)
-			if err != nil {
-				return 0, err
-			}
-			total += c
-			num++
-		}
-	}
-	if num == 0 {
-		return 0, errors.New("No words found")
-	}
-	return total / num, nil
-}
diff --git a/lib/hocr/lines.go b/lib/hocr/lines.go
deleted file mode 100644
index 74e8f9a..0000000
--- a/lib/hocr/lines.go
+++ /dev/null
@@ -1,131 +0,0 @@
-package hocr
-
-// TODO: Parse line name to zero pad line numbers, so they can
-//       be sorted easily
-
-import (
-	"image"
-	"image/png"
-	"io/ioutil"
-	"log"
-	"os"
-	"path/filepath"
-	"strings"
-
-	"rescribe.xyz/go.git/lib/line"
-)
-
-func getLineText(l OcrLine) (string) {
-	linetext := ""
-
-	linetext = l.Text
-	if noText(linetext) {
-		linetext = ""
-		for _, w := range l.Words {
-			if w.Class != "ocrx_word" {
-				continue
-			}
-			linetext += w.Text + " "
-		}
-	}
-	if noText(linetext) {
-		linetext = ""
-		for _, w := range l.Words {
-			if w.Class != "ocrx_word" {
-				continue
-			}
-			for _, c := range w.Chars {
-				if c.Class != "ocrx_cinfo" {
-					continue
-				}
-				linetext += c.Text
-			}
-			linetext += " "
-		}
-	}
-	linetext = strings.TrimRight(linetext, " ")
-	linetext += "\n"
-	return linetext
-}
-
-func parseLineDetails(h Hocr, i image.Image, name string) (line.Details, error) {
-	lines := make(line.Details, 0)
-
-	for _, l := range h.Lines {
-		totalconf := float64(0)
-		num := 0
-		for _, w := range l.Words {
-			c, err := wordConf(w.Title)
-			if err != nil {
-				return lines, err
-			}
-			num++
-			totalconf += c
-		}
-
-		coords, err := boxCoords(l.Title)
-		if err != nil {
-			return lines, err
-		}
-
-		var ln line.Detail
-		ln.Name = l.Id
-		ln.Avgconf = (totalconf / float64(num)) / 100
-		ln.Text = getLineText(l)
-		ln.OcrName = name
-		if i != nil {
-			var imgd line.ImgDirect
-			imgd.Img = i.(*image.Gray).SubImage(image.Rect(coords[0], coords[1], coords[2], coords[3]))
-			ln.Img = imgd
-		}
-		lines = append(lines, ln)
-	}
-	return lines, nil
-}
-
-func GetLineDetails(hocrfn string) (line.Details, error) {
-	var newlines line.Details
-
-	file, err := ioutil.ReadFile(hocrfn)
-	if err != nil {
-		return newlines, err
-	}
-
-	h, err := Parse(file)
-	if err != nil {
-		return newlines, err
-	}
-
-	var img image.Image
-	pngfn := strings.Replace(hocrfn, ".hocr", ".png", 1)
-	pngf, err := os.Open(pngfn)
-	if err != nil {
-		log.Println("Warning: can't open image %s\n", pngfn)
-	} else {
-		defer pngf.Close()
-		img, err = png.Decode(pngf)
-		if err != nil {
-			log.Println("Warning: can't load image %s\n", pngfn)
-		}
-	}
-
-	n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
-	return parseLineDetails(h, img, n)
-}
-
-func GetLineBasics(hocrfn string) (line.Details, error) {
-	var newlines line.Details
-
-	file, err := ioutil.ReadFile(hocrfn)
-	if err != nil {
-		return newlines, err
-	}
-
-	h, err := Parse(file)
-	if err != nil {
-		return newlines, err
-	}
-
-	n := strings.Replace(filepath.Base(hocrfn), ".hocr", "", 1)
-	return parseLineDetails(h, nil, n)
-}
-- 
cgit v1.2.1-24-ge1ad