From 090c9986c9b93f45de70f21f649877536839dc29 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 25 Aug 2020 16:03:42 +0100 Subject: Move dehyphenate string code into its own function --- cmd/dehyphenate/main.go | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) (limited to 'cmd') diff --git a/cmd/dehyphenate/main.go b/cmd/dehyphenate/main.go index afd1aac..0974368 100644 --- a/cmd/dehyphenate/main.go +++ b/cmd/dehyphenate/main.go @@ -24,6 +24,32 @@ import ( // - loses any formatting; doesn't need to be identical, but e.g. linebreaks after elements would be handy // - need to handle OcrChar +// dehyphenateString replaces hyphens at the end of a line +// with the first word from the following line, and removes +// that word from its line. +func dehyphenateString(in string) string { + var newlines []string + lines := strings.Split(in, "\n") + for i, line := range lines { + words := strings.Split(line, " ") + last := words[len(words)-1] + // the - 2 here is to account for a trailing newline and counting from zero + if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { + nextwords := strings.Split(lines[i+1], " ") + if len(nextwords) > 0 { + line = line[0:len(line)-1] + nextwords[0] + } + if len(nextwords) > 1 { + lines[i+1] = strings.Join(nextwords[1:], " ") + } else { + lines[i+1] = "" + } + } + newlines = append(newlines, line) + } + return strings.Join(newlines, "\n") +} + func main() { flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: dehyphenate [-hocr] in out\n") @@ -63,26 +89,7 @@ func main() { } } } else { - var newlines []string - lines := strings.Split(string(in), "\n") - for i, line := range lines { - words := strings.Split(line, " ") - last := words[len(words)-1] - // the - 2 here is to account for a trailing newline and counting from zero - if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { - nextwords := strings.Split(lines[i+1], " ") - if len(nextwords) > 0 { - line = line[0:len(line)-1] + nextwords[0] - } - if len(nextwords) > 1 { - lines[i+1] = strings.Join(nextwords[1:], " ") - } else { - lines[i+1] = "" - } - } - newlines = append(newlines, line) - } - finaltxt = strings.Join(newlines, "\n") + finaltxt = dehyphenateString(string(in)) } f, err := os.Create(flag.Arg(1)) -- cgit v1.2.1-24-ge1ad