diff options
| author | Nick White <git@njw.name> | 2020-08-25 16:03:42 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.name> | 2020-08-25 16:03:42 +0100 | 
| commit | 090c9986c9b93f45de70f21f649877536839dc29 (patch) | |
| tree | 0f54d8e393ffe836bc2adfc1502c89fdb07b33c6 | |
| parent | 496fe0af1cb212a6a4af932b6c188bf082d15383 (diff) | |
Move dehyphenate string code into its own function
| -rw-r--r-- | cmd/dehyphenate/main.go | 47 | 
1 files changed, 27 insertions, 20 deletions
diff --git a/cmd/dehyphenate/main.go b/cmd/dehyphenate/main.go index afd1aac..0974368 100644 --- a/cmd/dehyphenate/main.go +++ b/cmd/dehyphenate/main.go @@ -24,6 +24,32 @@ import (  // - loses any formatting; doesn't need to be identical, but e.g. linebreaks after elements would be handy  // - need to handle OcrChar +// dehyphenateString replaces hyphens at the end of a line +// with the first word from the following line, and removes +// that word from its line. +func dehyphenateString(in string) string { +	var newlines []string +	lines := strings.Split(in, "\n") +	for i, line := range lines { +		words := strings.Split(line, " ") +		last := words[len(words)-1] +		// the - 2 here is to account for a trailing newline and counting from zero +		if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { +			nextwords := strings.Split(lines[i+1], " ") +			if len(nextwords) > 0 { +				line = line[0:len(line)-1] + nextwords[0] +			} +			if len(nextwords) > 1 { +				lines[i+1] = strings.Join(nextwords[1:], " ") +			} else { +				lines[i+1] = "" +			} +		} +		newlines = append(newlines, line) +	} +	return strings.Join(newlines, "\n") +} +  func main() {  	flag.Usage = func() {  		fmt.Fprintf(os.Stderr, "Usage: dehyphenate [-hocr] in out\n") @@ -63,26 +89,7 @@ func main() {  			}  		}  	} else { -		var newlines []string -		lines := strings.Split(string(in), "\n") -		for i, line := range lines { -			words := strings.Split(line, " ") -			last := words[len(words)-1] -			// the - 2 here is to account for a trailing newline and counting from zero -			if len(last) > 0 && last[len(last) - 1] == '-' && i < len(lines) - 2 { -				nextwords := strings.Split(lines[i+1], " ") -				if len(nextwords) > 0 { -					line = line[0:len(line)-1] + nextwords[0] -				} -				if len(nextwords) > 1 { -					lines[i+1] = strings.Join(nextwords[1:], " ") -				} else { -					lines[i+1] = "" -				} -			} -			newlines = append(newlines, line) -		} -		finaltxt = strings.Join(newlines, "\n") +		finaltxt = dehyphenateString(string(in))  	}  	f, err := os.Create(flag.Arg(1))  | 
