From 6784a99d927fbdfa045afceb976e1165ba7a85da Mon Sep 17 00:00:00 2001 From: Kurt Jung Date: Sat, 12 Apr 2014 20:40:33 -0400 Subject: UTF-8 rune to code page byte translator so that some UTF-8 text can be converted for proper rendering in the PDF document. --- util.go | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'util.go') diff --git a/util.go b/util.go index 22604bd..4d03417 100644 --- a/util.go +++ b/util.go @@ -21,9 +21,11 @@ import ( "compress/zlib" "fmt" // "github.com/davecgh/go-spew/spew" + "bufio" "io" "math" "os" + "path/filepath" ) func round(f float64) int { @@ -146,3 +148,96 @@ func strIf(cnd bool, aStr, bStr string) string { // fl.Close() // } // } + +func repClosure(m map[rune]byte) func(string) string { + var buf bytes.Buffer + return func(str string) string { + var ch byte + var ok bool + buf.Truncate(0) + for _, r := range str { + if r < 0x80 { + ch = byte(r) + } else { + ch, ok = m[r] + if !ok { + ch = byte('.') + } + } + buf.WriteByte(ch) + } + return string(buf.Bytes()) + } +} + +// UnicodeTranslator returns a function that can be used to translate, where +// possible, utf-8 strings to a form that is compatible with the specified code +// page. The returned function accepts a string and returns a string. +// +// r is a reader that should read a buffer made up of content lines that +// pertain to the code page of interest. Each line is made up of three +// whitespace separated fields. The first begins with "!" and is followed by +// two hexadecimal digits that identify the glyph position in the code page of +// interest. The second field begins with "U+" and is followed by the unicode +// code point value. The third is the glyph name. A number of these code page +// map files are packaged with the gfpdf library in the font directory. +// +// An error occurs only if a line is read that does not conform to the expected +// format. +func UnicodeTranslator(r io.Reader) (f func(string) string, err error) { + m := make(map[rune]byte) + var uPos, cPos uint32 + var lineStr, nameStr string + sc := bufio.NewScanner(r) + for sc.Scan() { + lineStr = sc.Text() + if len(lineStr) > 0 { + _, err = fmt.Sscanf(lineStr, "!%2X U+%4X %s", &cPos, &uPos, &nameStr) + if err == nil { + if cPos >= 0x80 { + m[rune(uPos)] = byte(cPos) + } + } + } + } + if err == nil { + f = repClosure(m) + } + return +} + +// UnicodeTranslatorFromFile returns a function that can be used to translate, +// where possible, utf-8 strings to a form that is compatible with the +// specified code page. See UnicodeTranslator for more details. +// +// fileStr identifies a font descriptor file that maps glyph positions to names. +func UnicodeTranslatorFromFile(fileStr string) (f func(string) string, err error) { + var fl *os.File + fl, err = os.Open(fileStr) + if err == nil { + f, err = UnicodeTranslator(fl) + fl.Close() + } + return +} + +// UnicodeTranslatorFromDescriptor returns a function that can be used to +// translate, where possible, utf-8 strings to a form that is compatible with +// the specified code page. See UnicodeTranslator for more details. +// +// cpStr identifies a code page. A descriptor file in the font directory, set +// with the fontDirStr argument in the call to New(), should have this name +// plus the extension ".map". If cpStr is empty, it will be replaced with +// "cp1252", the gofpdf code page default. +// +// See tutorial 23 for an example of this function. +func (f *Fpdf) UnicodeTranslatorFromDescriptor(cpStr string) (rep func(string) string) { + if f.err != nil { + return + } + if len(cpStr) == 0 { + cpStr = "cp1252" + } + rep, f.err = UnicodeTranslatorFromFile(filepath.Join(f.fontpath, cpStr) + ".map") + return +} -- cgit v1.2.1-24-ge1ad