diff options
| author | Nick White <git@njw.name> | 2019-10-08 12:52:33 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2019-10-08 12:52:33 +0100 | 
| commit | 7482157a03ed3e9d7f45e54a126b391001f34948 (patch) | |
| tree | 52f87b9ca159fe4c04a0349de95ea9de82692b3c /dehyphenate | |
| parent | d43c11bf653bfe3c1ad1ed277f1ec08bf155cf98 (diff) | |
Separate out bookpipeline from catch-all go.git repo, and rename to rescribe.xyz/bookpipeline
The dependencies from the go.git repo will follow in due course.
Diffstat (limited to 'dehyphenate')
| -rw-r--r-- | dehyphenate/main.go | 63 | 
1 files changed, 0 insertions, 63 deletions
diff --git a/dehyphenate/main.go b/dehyphenate/main.go deleted file mode 100644 index 4393c8f..0000000 --- a/dehyphenate/main.go +++ /dev/null @@ -1,63 +0,0 @@ -package main - -import ( -	"encoding/xml" -	"flag" -	"fmt" -	"io/ioutil" -	"log" -	"os" - -	"rescribe.xyz/go.git/lib/hocr" -) - -// BUGS: -// - loses all elements not captured in hocr structure such as html headings -//   might be best to copy the header and footer separately and put the hocr in between, but would still need to ensure all elements are captured -// - loses any formatting; doesn't need to be identical, but e.g. linebreaks after elements would be handy -// - need to handle OcrChar - -func main() { -	flag.Usage = func() { -		fmt.Fprintf(os.Stderr, "Usage: dehyphenate hocrin hocrout\n") -		fmt.Fprintf(os.Stderr, "Dehyphenates a hocr file.\n") -		flag.PrintDefaults() -	} -	flag.Parse() -	if flag.NArg() != 2 { -		flag.Usage() -		os.Exit(1) -	} - -	in, err := ioutil.ReadFile(flag.Arg(0)) -	if err != nil { -		log.Fatalf("Error reading %s: %v", flag.Arg(1), err) -	} -	h, err := hocr.Parse(in) -	if err != nil { -		log.Fatal(err) -	} - -	for i, l := range h.Lines { -		w := l.Words[len(l.Words)-1] -		if len(w.Chars) == 0 { -			if len(w.Text) > 0 && w.Text[len(w.Text) - 1] == '-' { -				h.Lines[i].Words[len(l.Words)-1].Text = w.Text[0:len(w.Text)-1] + h.Lines[i+1].Words[0].Text -				h.Lines[i+1].Words[0].Text = "" -			} -		} else { -			log.Printf("TODO: handle OcrChar") -		} -	} - -	f, err := os.Create(flag.Arg(1)) -	if err != nil { -		log.Fatalf("Error creating file %s: %v", flag.Arg(1), err) -	} -	defer f.Close() -	e := xml.NewEncoder(f) -	err = e.Encode(h) -	if err != nil { -		log.Fatalf("Error encoding XML: %v", err) -	} -}  | 
