From 6784a99d927fbdfa045afceb976e1165ba7a85da Mon Sep 17 00:00:00 2001 From: Kurt Jung Date: Sat, 12 Apr 2014 20:40:33 -0400 Subject: UTF-8 rune to code page byte translator so that some UTF-8 text can be converted for proper rendering in the PDF document. --- doc.go | 5 ++-- fpdf_test.go | 27 +++++++++++++++++ util.go | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+), 2 deletions(-) diff --git a/doc.go b/doc.go index a3ef6ff..fb2f475 100644 --- a/doc.go +++ b/doc.go @@ -152,9 +152,10 @@ and http://dejavu-fonts.org/. Roadmap -• Handle UTF-8 source text +• Handle UTF-8 source text natively. Until then, automatic translation of +UTF-8 runes to code page bytes is provided. -• Improve test coverage as reported by gocov (https://github.com/axw/gocov‎) +• Improve test coverage as reported by the coverage tool. */ package gofpdf diff --git a/fpdf_test.go b/fpdf_test.go index 303fdc7..7c96411 100644 --- a/fpdf_test.go +++ b/fpdf_test.go @@ -1158,3 +1158,30 @@ func ExampleFpdf_tutorial22() { // Output: // Successfully generated pdf/tutorial22.pdf } + +// This example demonstrates the conversion of UTF-8 strings to an 8-bit font +// encoding. +func ExampleFpdf_tutorial23() { + pdf := gofpdf.New("P", "mm", "A4", cnFontDir) // A4 210.0 x 297.0 + fontSize := 16.0 + pdf.SetFont("Helvetica", "", fontSize) + ht := pdf.PointConvert(fontSize) + tr := pdf.UnicodeTranslatorFromDescriptor("") // Default: "cp1252" + write := func(str string) { + // fmt.Println(str, tr(str)) + pdf.CellFormat(190, ht, tr(str), "", 1, "C", false, 0, "") + pdf.Ln(ht) + } + pdf.AddPage() + str := ` gofpdf provides a translator that will convert any UTF-8 code point ` + + `that is present in the specified code page.` + pdf.MultiCell(190, ht, str, "", "L", false) + pdf.Ln(2 * ht) + write("Voix ambiguë d'un cœur qui au zéphyr préfère les jattes de kiwi.") + write("Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.") + write("Heizölrückstoßabdämpfung") + write("Forårsjævndøgn / Efterårsjævndøgn") + pdf.OutputAndClose(docWriter(pdf, 23)) + // Output: + // Successfully generated pdf/tutorial23.pdf +} diff --git a/util.go b/util.go index 22604bd..4d03417 100644 --- a/util.go +++ b/util.go @@ -21,9 +21,11 @@ import ( "compress/zlib" "fmt" // "github.com/davecgh/go-spew/spew" + "bufio" "io" "math" "os" + "path/filepath" ) func round(f float64) int { @@ -146,3 +148,96 @@ func strIf(cnd bool, aStr, bStr string) string { // fl.Close() // } // } + +func repClosure(m map[rune]byte) func(string) string { + var buf bytes.Buffer + return func(str string) string { + var ch byte + var ok bool + buf.Truncate(0) + for _, r := range str { + if r < 0x80 { + ch = byte(r) + } else { + ch, ok = m[r] + if !ok { + ch = byte('.') + } + } + buf.WriteByte(ch) + } + return string(buf.Bytes()) + } +} + +// UnicodeTranslator returns a function that can be used to translate, where +// possible, utf-8 strings to a form that is compatible with the specified code +// page. The returned function accepts a string and returns a string. +// +// r is a reader that should read a buffer made up of content lines that +// pertain to the code page of interest. Each line is made up of three +// whitespace separated fields. The first begins with "!" and is followed by +// two hexadecimal digits that identify the glyph position in the code page of +// interest. The second field begins with "U+" and is followed by the unicode +// code point value. The third is the glyph name. A number of these code page +// map files are packaged with the gfpdf library in the font directory. +// +// An error occurs only if a line is read that does not conform to the expected +// format. +func UnicodeTranslator(r io.Reader) (f func(string) string, err error) { + m := make(map[rune]byte) + var uPos, cPos uint32 + var lineStr, nameStr string + sc := bufio.NewScanner(r) + for sc.Scan() { + lineStr = sc.Text() + if len(lineStr) > 0 { + _, err = fmt.Sscanf(lineStr, "!%2X U+%4X %s", &cPos, &uPos, &nameStr) + if err == nil { + if cPos >= 0x80 { + m[rune(uPos)] = byte(cPos) + } + } + } + } + if err == nil { + f = repClosure(m) + } + return +} + +// UnicodeTranslatorFromFile returns a function that can be used to translate, +// where possible, utf-8 strings to a form that is compatible with the +// specified code page. See UnicodeTranslator for more details. +// +// fileStr identifies a font descriptor file that maps glyph positions to names. +func UnicodeTranslatorFromFile(fileStr string) (f func(string) string, err error) { + var fl *os.File + fl, err = os.Open(fileStr) + if err == nil { + f, err = UnicodeTranslator(fl) + fl.Close() + } + return +} + +// UnicodeTranslatorFromDescriptor returns a function that can be used to +// translate, where possible, utf-8 strings to a form that is compatible with +// the specified code page. See UnicodeTranslator for more details. +// +// cpStr identifies a code page. A descriptor file in the font directory, set +// with the fontDirStr argument in the call to New(), should have this name +// plus the extension ".map". If cpStr is empty, it will be replaced with +// "cp1252", the gofpdf code page default. +// +// See tutorial 23 for an example of this function. +func (f *Fpdf) UnicodeTranslatorFromDescriptor(cpStr string) (rep func(string) string) { + if f.err != nil { + return + } + if len(cpStr) == 0 { + cpStr = "cp1252" + } + rep, f.err = UnicodeTranslatorFromFile(filepath.Join(f.fontpath, cpStr) + ".map") + return +} -- cgit v1.2.1-24-ge1ad