summaryrefslogtreecommitdiff
path: root/cmd/rescribe
diff options
context:
space:
mode:
authorNick White <git@njw.name>2024-02-06 19:39:27 +0000
committerNick White <git@njw.name>2024-02-06 19:39:27 +0000
commit47bbea7c7d433976acc0a3380f3fb782a64b1499 (patch)
tree029c09c38cf1431c98c1c124a37c29e46c44a00f /cmd/rescribe
parent7ae64e32b3a4fb4fcb66f287a0ec39215bb11a32 (diff)
Add concatenated text output to rescribe output dir, called bookname.txt
Diffstat (limited to 'cmd/rescribe')
-rw-r--r--cmd/rescribe/main.go31
1 files changed, 31 insertions, 0 deletions
diff --git a/cmd/rescribe/main.go b/cmd/rescribe/main.go
index f532678..8ae29ff 100644
--- a/cmd/rescribe/main.go
+++ b/cmd/rescribe/main.go
@@ -575,6 +575,11 @@ func startProcess(ctx context.Context, logger *log.Logger, tessCommand string, b
return fmt.Errorf("Error looking for .hocr files: %v", err)
}
+ err = addFullTxt(hocrs, bookname)
+ if err != nil {
+ log.Fatalf("Error creating full txt version: %v", err)
+ }
+
for _, v := range hocrs {
err = addTxtVersion(v)
if err != nil {
@@ -660,6 +665,32 @@ func addTxtVersion(hocrfn string) error {
return nil
}
+func addFullTxt(hocrs []string, bookname string) error {
+ if len(hocrs) == 0 {
+ return nil
+ }
+ var full string
+ for i, v := range hocrs {
+ t, err := hocr.GetText(v)
+ if err != nil {
+ return fmt.Errorf("Error getting text from hocr file %s: %v", v, err)
+ }
+ if i > 0 {
+ full += "\n"
+ }
+ full += t
+ }
+
+ dir := filepath.Dir(hocrs[0])
+ fn := filepath.Join(dir, bookname+".txt")
+ err := ioutil.WriteFile(fn, []byte(full), 0644)
+ if err != nil {
+ return fmt.Errorf("Error creating text file %s: %v", fn, err)
+ }
+
+ return nil
+}
+
func uploadbook(ctx context.Context, dir string, name string, conn Pipeliner, nowipe bool) error {
_, err := os.Stat(dir)
if err != nil && !os.IsExist(err) {