summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKurt Jung <kurt.w.jung@code.google.com>2014-04-12 20:40:33 -0400
committerKurt Jung <kurt.w.jung@code.google.com>2014-04-12 20:40:33 -0400
commit6784a99d927fbdfa045afceb976e1165ba7a85da (patch)
treee302a7ac8f1f332386eb3f60092882d7f672ae02
parent202a1a1b9f07994138de669f4d8b42b4c0bddcc2 (diff)
UTF-8 rune to code page byte translator so that some UTF-8 text can be converted for proper rendering in the PDF document.
-rw-r--r--doc.go5
-rw-r--r--fpdf_test.go27
-rw-r--r--util.go95
3 files changed, 125 insertions, 2 deletions
diff --git a/doc.go b/doc.go
index a3ef6ff..fb2f475 100644
--- a/doc.go
+++ b/doc.go
@@ -152,9 +152,10 @@ and http://dejavu-fonts.org/.
Roadmap
-• Handle UTF-8 source text
+• Handle UTF-8 source text natively. Until then, automatic translation of
+UTF-8 runes to code page bytes is provided.
-• Improve test coverage as reported by gocov (https://github.com/axw/gocov‎)
+• Improve test coverage as reported by the coverage tool.
*/
package gofpdf
diff --git a/fpdf_test.go b/fpdf_test.go
index 303fdc7..7c96411 100644
--- a/fpdf_test.go
+++ b/fpdf_test.go
@@ -1158,3 +1158,30 @@ func ExampleFpdf_tutorial22() {
// Output:
// Successfully generated pdf/tutorial22.pdf
}
+
+// This example demonstrates the conversion of UTF-8 strings to an 8-bit font
+// encoding.
+func ExampleFpdf_tutorial23() {
+ pdf := gofpdf.New("P", "mm", "A4", cnFontDir) // A4 210.0 x 297.0
+ fontSize := 16.0
+ pdf.SetFont("Helvetica", "", fontSize)
+ ht := pdf.PointConvert(fontSize)
+ tr := pdf.UnicodeTranslatorFromDescriptor("") // Default: "cp1252"
+ write := func(str string) {
+ // fmt.Println(str, tr(str))
+ pdf.CellFormat(190, ht, tr(str), "", 1, "C", false, 0, "")
+ pdf.Ln(ht)
+ }
+ pdf.AddPage()
+ str := ` gofpdf provides a translator that will convert any UTF-8 code point ` +
+ `that is present in the specified code page.`
+ pdf.MultiCell(190, ht, str, "", "L", false)
+ pdf.Ln(2 * ht)
+ write("Voix ambiguë d'un cœur qui au zéphyr préfère les jattes de kiwi.")
+ write("Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.")
+ write("Heizölrückstoßabdämpfung")
+ write("Forårsjævndøgn / Efterårsjævndøgn")
+ pdf.OutputAndClose(docWriter(pdf, 23))
+ // Output:
+ // Successfully generated pdf/tutorial23.pdf
+}
diff --git a/util.go b/util.go
index 22604bd..4d03417 100644
--- a/util.go
+++ b/util.go
@@ -21,9 +21,11 @@ import (
"compress/zlib"
"fmt"
// "github.com/davecgh/go-spew/spew"
+ "bufio"
"io"
"math"
"os"
+ "path/filepath"
)
func round(f float64) int {
@@ -146,3 +148,96 @@ func strIf(cnd bool, aStr, bStr string) string {
// fl.Close()
// }
// }
+
+func repClosure(m map[rune]byte) func(string) string {
+ var buf bytes.Buffer
+ return func(str string) string {
+ var ch byte
+ var ok bool
+ buf.Truncate(0)
+ for _, r := range str {
+ if r < 0x80 {
+ ch = byte(r)
+ } else {
+ ch, ok = m[r]
+ if !ok {
+ ch = byte('.')
+ }
+ }
+ buf.WriteByte(ch)
+ }
+ return string(buf.Bytes())
+ }
+}
+
+// UnicodeTranslator returns a function that can be used to translate, where
+// possible, utf-8 strings to a form that is compatible with the specified code
+// page. The returned function accepts a string and returns a string.
+//
+// r is a reader that should read a buffer made up of content lines that
+// pertain to the code page of interest. Each line is made up of three
+// whitespace separated fields. The first begins with "!" and is followed by
+// two hexadecimal digits that identify the glyph position in the code page of
+// interest. The second field begins with "U+" and is followed by the unicode
+// code point value. The third is the glyph name. A number of these code page
+// map files are packaged with the gfpdf library in the font directory.
+//
+// An error occurs only if a line is read that does not conform to the expected
+// format.
+func UnicodeTranslator(r io.Reader) (f func(string) string, err error) {
+ m := make(map[rune]byte)
+ var uPos, cPos uint32
+ var lineStr, nameStr string
+ sc := bufio.NewScanner(r)
+ for sc.Scan() {
+ lineStr = sc.Text()
+ if len(lineStr) > 0 {
+ _, err = fmt.Sscanf(lineStr, "!%2X U+%4X %s", &cPos, &uPos, &nameStr)
+ if err == nil {
+ if cPos >= 0x80 {
+ m[rune(uPos)] = byte(cPos)
+ }
+ }
+ }
+ }
+ if err == nil {
+ f = repClosure(m)
+ }
+ return
+}
+
+// UnicodeTranslatorFromFile returns a function that can be used to translate,
+// where possible, utf-8 strings to a form that is compatible with the
+// specified code page. See UnicodeTranslator for more details.
+//
+// fileStr identifies a font descriptor file that maps glyph positions to names.
+func UnicodeTranslatorFromFile(fileStr string) (f func(string) string, err error) {
+ var fl *os.File
+ fl, err = os.Open(fileStr)
+ if err == nil {
+ f, err = UnicodeTranslator(fl)
+ fl.Close()
+ }
+ return
+}
+
+// UnicodeTranslatorFromDescriptor returns a function that can be used to
+// translate, where possible, utf-8 strings to a form that is compatible with
+// the specified code page. See UnicodeTranslator for more details.
+//
+// cpStr identifies a code page. A descriptor file in the font directory, set
+// with the fontDirStr argument in the call to New(), should have this name
+// plus the extension ".map". If cpStr is empty, it will be replaced with
+// "cp1252", the gofpdf code page default.
+//
+// See tutorial 23 for an example of this function.
+func (f *Fpdf) UnicodeTranslatorFromDescriptor(cpStr string) (rep func(string) string) {
+ if f.err != nil {
+ return
+ }
+ if len(cpStr) == 0 {
+ cpStr = "cp1252"
+ }
+ rep, f.err = UnicodeTranslatorFromFile(filepath.Join(f.fontpath, cpStr) + ".map")
+ return
+}