From 40c1e4956c0f4cd2c5f139aba7cb7bc04c57fc0f Mon Sep 17 00:00:00 2001
From: Nick White <git@njw.name>
Date: Fri, 25 Jan 2019 17:26:43 +0000
Subject: Add html output including all images, by writing them to an html
 directory

---
 avg-lines/avg-lines.go | 81 --------------------------------------------------
 avg-lines/html.go      | 61 +++++++++++++++++++++++++++++++++++++
 avg-lines/main.go      | 69 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 81 deletions(-)
 delete mode 100644 avg-lines/avg-lines.go
 create mode 100644 avg-lines/html.go
 create mode 100644 avg-lines/main.go

(limited to 'avg-lines')

diff --git a/avg-lines/avg-lines.go b/avg-lines/avg-lines.go
deleted file mode 100644
index 160dd7c..0000000
--- a/avg-lines/avg-lines.go
+++ /dev/null
@@ -1,81 +0,0 @@
-package main
-
-import (
-	"flag"
-	"fmt"
-	"log"
-	"os"
-	"path/filepath"
-	"sort"
-
-	"rescribe.xyz/go.git/lib/line"
-	"rescribe.xyz/go.git/lib/hocr"
-	"rescribe.xyz/go.git/lib/prob"
-)
-
-func main() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage: avg-lines [-html] [-nosort] [prob1] [hocr1] [prob2] [...]\n")
-		fmt.Fprintf(os.Stderr, "Prints a report of the average confidence for each line, sorted\n")
-		fmt.Fprintf(os.Stderr, "from worst to best.\n")
-		fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n")
-		fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n")
-		fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n")
-		fmt.Fprintf(os.Stderr, "option.\n\n")
-		flag.PrintDefaults()
-	}
-	var usehtml = flag.Bool("html", false, "Output html page")
-	var nosort = flag.Bool("nosort", false, "Don't sort lines by confidence")
-	flag.Parse()
-	if flag.NArg() < 1 {
-		flag.Usage()
-		os.Exit(1)
-	}
-
-	var err error
-	lines := make(line.Details, 0)
-
-	for _, f := range flag.Args() {
-		var newlines line.Details
-		switch ext := filepath.Ext(f); ext {
-			case ".prob":
-				newlines, err = prob.GetLineDetails(f)
-			case ".hocr":
-				newlines, err = hocr.GetLineDetails(f)
-			default:
-				log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f)
-				continue
-		}
-		if err != nil {
-			log.Fatal(err)
-		}
-
-		for _, l := range newlines {
-			lines = append(lines, l)
-		}
-	}
-
-	if *nosort == false {
-		sort.Sort(lines)
-	}
-
-	if *usehtml == false {
-		for _, l := range lines {
-			fmt.Printf("%s %s: %.2f%%\n", l.OcrName, l.Name, l.Avgconf)
-		}
-	} else {
-		fmt.Printf("<!DOCTYPE html><html><head><meta charset='UTF-8'><title></title><style>td {border: 1px solid #444}</style></head><body>\n")
-		fmt.Printf("<table>\n")
-		for _, l := range lines {
-			fmt.Printf("<tr>\n")
-			fmt.Printf("<td><h1>%.4f%%</h1></td>\n", l.Avgconf)
-			fmt.Printf("<td>%s %s</td>\n", l.OcrName, l.Name)
-			// TODO: think about this, what do we want to do here? if showing imgs is important,
-			//       will need to copy them somewhere, so works with hocr too
-			//fmt.Printf("<td><img src='%s' /><br />%s</td>\n", l.Filebase + ".bin.png", l.Fulltext)
-			fmt.Printf("</tr>\n")
-		}
-		fmt.Printf("</table>\n")
-		fmt.Printf("</body></html>\n")
-	}
-}
diff --git a/avg-lines/html.go b/avg-lines/html.go
new file mode 100644
index 0000000..f299830
--- /dev/null
+++ b/avg-lines/html.go
@@ -0,0 +1,61 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"rescribe.xyz/go.git/lib/line"
+)
+
+func copylineimg(fn string, l line.Detail) error {
+	f, err := os.Create(fn)
+        if err != nil {
+                return err
+        }
+        defer f.Close()
+
+        return l.Img.CopyLineTo(f)
+}
+
+func htmlout(dir string, lines line.Details) error {
+	err := os.MkdirAll(dir, 0700)
+	if err != nil {
+		return err
+	}
+
+	fn := filepath.Join(dir, "index.html")
+	f, err := os.Create(fn)
+        if err != nil {
+                return err
+        }
+        defer f.Close()
+
+	_, err = fmt.Fprintf(f, "<!DOCTYPE html><html><head><meta charset='UTF-8'><title></title>" +
+	                     "<style>td {border: 1px solid #444}</style></head><body>\n<table>\n")
+        if err != nil {
+                return err
+        }
+	for _, l := range lines {
+		fn = filepath.Base(l.OcrName) + "_" + l.Name + ".png"
+		err = copylineimg(filepath.Join(dir, fn), l)
+		if err != nil {
+			return err
+		}
+		_, err = fmt.Fprintf(f, "<tr>\n" +
+		                        "<td><h1>%.4f%%</h1></td>\n" +
+		                        "<td>%s %s</td>\n" +
+		                        "<td><img src='%s' width='100%%' /><br />%s</td>\n" +
+		                        "</tr>\n",
+		                     l.Avgconf, l.OcrName, l.Name, fn, l.Text)
+		if err != nil {
+			return err
+		}
+	}
+	_, err = fmt.Fprintf(f, "</table>\n</body></html>\n")
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
diff --git a/avg-lines/main.go b/avg-lines/main.go
new file mode 100644
index 0000000..a953598
--- /dev/null
+++ b/avg-lines/main.go
@@ -0,0 +1,69 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+
+	"rescribe.xyz/go.git/lib/line"
+	"rescribe.xyz/go.git/lib/hocr"
+	"rescribe.xyz/go.git/lib/prob"
+)
+
+func main() {
+	flag.Usage = func() {
+		fmt.Fprintf(os.Stderr, "Usage: avg-lines [-html dir] [-nosort] [prob1] [hocr1] [prob2] [...]\n")
+		fmt.Fprintf(os.Stderr, "Prints a report of the average confidence for each line, sorted\n")
+		fmt.Fprintf(os.Stderr, "from worst to best.\n")
+		fmt.Fprintf(os.Stderr, "Both .hocr and .prob files can be processed.\n")
+		fmt.Fprintf(os.Stderr, "For .hocr files, the x_wconf data is used to calculate confidence.\n")
+		fmt.Fprintf(os.Stderr, "The .prob files are generated using ocropy-rpred's --probabilities\n")
+		fmt.Fprintf(os.Stderr, "option.\n\n")
+		flag.PrintDefaults()
+	}
+	var html = flag.String("html", "", "Output in html format to the specified directory")
+	var nosort = flag.Bool("nosort", false, "Don't sort lines by confidence")
+	flag.Parse()
+	if flag.NArg() < 1 {
+		flag.Usage()
+		os.Exit(1)
+	}
+
+	var err error
+	lines := make(line.Details, 0)
+
+	for _, f := range flag.Args() {
+		var newlines line.Details
+		switch ext := filepath.Ext(f); ext {
+			case ".prob":
+				newlines, err = prob.GetLineDetails(f)
+			case ".hocr":
+				newlines, err = hocr.GetLineDetails(f)
+			default:
+				log.Printf("Skipping file '%s' as it isn't a .prob or .hocr\n", f)
+				continue
+		}
+		if err != nil {
+			log.Fatal(err)
+		}
+
+		for _, l := range newlines {
+			lines = append(lines, l)
+		}
+	}
+
+	if *nosort == false {
+		sort.Sort(lines)
+	}
+
+	if *html == "" {
+		for _, l := range lines {
+			fmt.Printf("%s %s: %.2f%%\n", l.OcrName, l.Name, l.Avgconf)
+		}
+	} else {
+		htmlout(*html, lines)
+	}
+}
-- 
cgit v1.2.1-24-ge1ad