diff options
Diffstat (limited to 'htmlbasic.go')
-rw-r--r-- | htmlbasic.go | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/htmlbasic.go b/htmlbasic.go new file mode 100644 index 0000000..b2a05e5 --- /dev/null +++ b/htmlbasic.go @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung) + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +package gofpdf + +import ( + "regexp" + "strings" +) + +type HtmlBasicSegmentType struct { + Cat byte // 'O' open tag, 'C' close tag, 'T' text + Str string // Literal text unchanged, tags are lower case + Attr map[string]string // Attribute keys are lower case +} + +// HtmlBasicTokenize returns a list of HTML tags and literal elements. This is +// done with regular expressions, so the result is only marginally better than +// useless. +func HtmlBasicTokenize(htmlStr string) (list []HtmlBasicSegmentType) { + // This routine is adapted from http://www.fpdf.org/ + list = make([]HtmlBasicSegmentType, 0, 16) + htmlStr = strings.Replace(htmlStr, "\n", " ", -1) + htmlStr = strings.Replace(htmlStr, "\r", "", -1) + tagRe, _ := regexp.Compile(`(?U)<.*>`) + attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`) + capList := tagRe.FindAllStringIndex(htmlStr, -1) + if capList != nil { + var seg HtmlBasicSegmentType + var parts []string + pos := 0 + for _, cap := range capList { + if pos < cap[0] { + seg.Cat = 'T' + seg.Str = htmlStr[pos:cap[0]] + seg.Attr = nil + list = append(list, seg) + } + if htmlStr[cap[0]+1] == '/' { + seg.Cat = 'C' + seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1]) + seg.Attr = nil + list = append(list, seg) + } else { + // Extract attributes + parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ") + if len(parts) > 0 { + for j, part := range parts { + if j == 0 { + seg.Cat = 'O' + seg.Str = strings.ToLower(parts[0]) + seg.Attr = make(map[string]string) + } else { + attrList := attrRe.FindAllStringSubmatch(part, -1) + if attrList != nil { + for _, attr := range attrList { + seg.Attr[strings.ToLower(attr[1])] = attr[2] + } + } + } + } + list = append(list, seg) + } + } + pos = cap[1] + } + if len(htmlStr) > pos { + seg.Cat = 'T' + seg.Str = htmlStr[pos:] + seg.Attr = nil + list = append(list, seg) + } + } + return +} + +// HtmlBasicType is used for rendering a very basic subset of HTML. It supports +// only the bold, italic and underscore attributes and hyperlinks. In the Link +// structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color +// of hyperlinks. The Bold, Italic and Underscore values (0 for off, 1 for on) +// define the hyperlink style. +type HtmlBasicType struct { + pdf *Fpdf + Link struct { + ClrR, ClrG, ClrB int + Bold, Italic, Underscore bool + } +} + +// HtmlBasicNew returns an instance that facilitates writing basic HTML in the +// specified PDF file. +func (f *Fpdf) HtmlBasicNew() (html HtmlBasicType) { + html.pdf = f + html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128 + html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true + return +} + +// Write prints text from the current position using the currently selected +// font. The text can be encoded with a basic subset of HTML that includes tags +// for italic (I), bold (B) and underscore (U) attributes and hyperlinks. When +// the right margin is reached a line break occurs and text continues from the +// left margin. Upon method exit, the current position is left at the end of +// the text. +// +// lineHt indicates the line height in the unit of measure specified in New(). +func (html *HtmlBasicType) Write(lineHt float64, htmlStr string) { + var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int + var textR, textG, textB = html.pdf.GetTextColor() + var hrefStr string + if html.Link.Bold { + linkBold = 1 + } + if html.Link.Italic { + linkItalic = 1 + } + if html.Link.Underscore { + linkUnderscore = 1 + } + setStyle := func(boldAdj, italicAdj, underscoreAdj int) { + styleStr := "" + boldLvl += boldAdj + if boldLvl > 0 { + styleStr += "B" + } + italicLvl += italicAdj + if italicLvl > 0 { + styleStr += "I" + } + underscoreLvl += underscoreAdj + if underscoreLvl > 0 { + styleStr += "U" + } + html.pdf.SetFont("", styleStr, 0) + } + putLink := func(urlStr, txtStr string) { + // Put a hyperlink + html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB) + setStyle(linkBold, linkItalic, linkUnderscore) + html.pdf.WriteLinkString(lineHt, txtStr, urlStr) + setStyle(-linkBold, -linkItalic, -linkUnderscore) + html.pdf.SetTextColor(textR, textG, textB) + } + list := HtmlBasicTokenize(htmlStr) + var ok bool + for _, el := range list { + switch el.Cat { + case 'T': + if len(hrefStr) > 0 { + putLink(hrefStr, el.Str) + hrefStr = "" + } else { + html.pdf.Write(lineHt, el.Str) + } + case 'O': + switch el.Str { + case "b": + setStyle(1, 0, 0) + case "i": + setStyle(0, 1, 0) + case "u": + setStyle(0, 0, 1) + case "br": + html.pdf.Ln(lineHt) + case "a": + hrefStr, ok = el.Attr["href"] + if !ok { + hrefStr = "" + } + } + case 'C': + switch el.Str { + case "b": + setStyle(-1, 0, 0) + case "i": + setStyle(0, -1, 0) + case "u": + setStyle(0, 0, -1) + + } + } + } +} |