diff options
Diffstat (limited to 'cmd')
-rw-r--r-- | cmd/avg-lines/main.go | 2 | ||||
-rw-r--r-- | cmd/boxtotxt/main.go | 1 | ||||
-rw-r--r-- | cmd/bucket-lines/main.go | 2 | ||||
-rw-r--r-- | cmd/dehyphenate/main.go | 1 | ||||
-rw-r--r-- | cmd/eeboxmltohocr/main.go | 2 | ||||
-rw-r--r-- | cmd/fonttobytes/main.go | 2 | ||||
-rw-r--r-- | cmd/getbests/main.go | 2 | ||||
-rw-r--r-- | cmd/hocrtotxt/main.go | 1 | ||||
-rw-r--r-- | cmd/pare-gt/main.go | 3 | ||||
-rw-r--r-- | cmd/pgconf/main.go | 1 |
10 files changed, 17 insertions, 0 deletions
diff --git a/cmd/avg-lines/main.go b/cmd/avg-lines/main.go index a32f5ce..c1fd901 100644 --- a/cmd/avg-lines/main.go +++ b/cmd/avg-lines/main.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// avg-lines prints a report of the average confidence for each line, +// sorted from worst to best package main import ( diff --git a/cmd/boxtotxt/main.go b/cmd/boxtotxt/main.go index c8e4c02..b3b18b0 100644 --- a/cmd/boxtotxt/main.go +++ b/cmd/boxtotxt/main.go @@ -2,6 +2,7 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// boxtotxt converts a Tesseract .box file to plain text package main import ( diff --git a/cmd/bucket-lines/main.go b/cmd/bucket-lines/main.go index 64ebffc..fddff21 100644 --- a/cmd/bucket-lines/main.go +++ b/cmd/bucket-lines/main.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// bucket-lines copies image-text line pairs into different directories +// according to the average character probability for the line package main import ( diff --git a/cmd/dehyphenate/main.go b/cmd/dehyphenate/main.go index 58b735e..90a6cda 100644 --- a/cmd/dehyphenate/main.go +++ b/cmd/dehyphenate/main.go @@ -2,6 +2,7 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// dehyphenate does basic dehyphenation on a hocr file package main import ( diff --git a/cmd/eeboxmltohocr/main.go b/cmd/eeboxmltohocr/main.go index aaad3a5..867717f 100644 --- a/cmd/eeboxmltohocr/main.go +++ b/cmd/eeboxmltohocr/main.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// eeboxmltohocr converts the XML from an EEBO download to hOCR, which +// can be easily incorporated into a searchable PDF package main import ( diff --git a/cmd/fonttobytes/main.go b/cmd/fonttobytes/main.go index 52883cb..085003b 100644 --- a/cmd/fonttobytes/main.go +++ b/cmd/fonttobytes/main.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// fonttobytes outputs a font file as a series of bytes in go format, +// allowing a font to be easily embedded into a go binary package main import ( diff --git a/cmd/getbests/main.go b/cmd/getbests/main.go index 2b0c40a..9eca0d8 100644 --- a/cmd/getbests/main.go +++ b/cmd/getbests/main.go @@ -2,6 +2,8 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// getbests downloads every 'best' file from a set of OCRed books +// stored on cloud infrastructure package main import ( diff --git a/cmd/hocrtotxt/main.go b/cmd/hocrtotxt/main.go index c3eb0f4..f8447e2 100644 --- a/cmd/hocrtotxt/main.go +++ b/cmd/hocrtotxt/main.go @@ -2,6 +2,7 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// hocrtotxt prints the text from a hocr file package main import ( diff --git a/cmd/pare-gt/main.go b/cmd/pare-gt/main.go index 1180607..f5da496 100644 --- a/cmd/pare-gt/main.go +++ b/cmd/pare-gt/main.go @@ -2,6 +2,9 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// pare-gt moves some ground truth, ensuring that the same +// proportions of each ground truth source are represented in the +// moved section package main import ( diff --git a/cmd/pgconf/main.go b/cmd/pgconf/main.go index 41b00f0..846a3d8 100644 --- a/cmd/pgconf/main.go +++ b/cmd/pgconf/main.go @@ -2,6 +2,7 @@ // Use of this source code is governed by the GPLv3 // license that can be found in the LICENSE file. +// pgconf prints the total confidence for a page of hOCR package main import ( |