aboutsummaryrefslogtreecommitdiff
path: root/ui/messages/parser/htmlparser.go
diff options
context:
space:
mode:
authorTulir Asokan <tulir@maunium.net>2019-04-07 23:21:11 +0300
committerTulir Asokan <tulir@maunium.net>2019-04-07 23:21:11 +0300
commite5db799fa30c5e3c7290d7ead07c84ada11087ac (patch)
tree536f231aa79c6736a0dc9e721bc51d77719deb12 /ui/messages/parser/htmlparser.go
parentb81ba7b631b5243bd61514db1787a3f9043990e0 (diff)
parent5d7c1a4caab46f7e981aed7b9cc825b7602b4098 (diff)
Merge branch 'deep-message-rendering'
Diffstat (limited to 'ui/messages/parser/htmlparser.go')
-rw-r--r--ui/messages/parser/htmlparser.go339
1 files changed, 200 insertions, 139 deletions
diff --git a/ui/messages/parser/htmlparser.go b/ui/messages/parser/htmlparser.go
index f01d3cb..e658c61 100644
--- a/ui/messages/parser/htmlparser.go
+++ b/ui/messages/parser/htmlparser.go
@@ -17,12 +17,13 @@
package parser
import (
- "fmt"
- "math"
"regexp"
"strconv"
"strings"
+ "github.com/alecthomas/chroma"
+ "github.com/alecthomas/chroma/lexers"
+ "github.com/alecthomas/chroma/styles"
"github.com/lucasb-eyer/go-colorful"
"golang.org/x/net/html"
@@ -30,7 +31,7 @@ import (
"maunium.net/go/tcell"
"maunium.net/go/gomuks/matrix/rooms"
- "maunium.net/go/gomuks/ui/messages/tstring"
+ "maunium.net/go/gomuks/ui/messages"
"maunium.net/go/gomuks/ui/widget"
)
@@ -40,11 +41,6 @@ type htmlParser struct {
room *rooms.Room
}
-type taggedTString struct {
- tstring.TString
- tag string
-}
-
func AdjustStyleBold(style tcell.Style) tcell.Style {
return style.Bold(true)
}
@@ -82,65 +78,53 @@ func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string
return ""
}
-func digits(num int) int {
- if num <= 0 {
- return 0
- }
- return int(math.Floor(math.Log10(float64(num))) + 1)
-}
-
-func (parser *htmlParser) listToTString(node *html.Node, stripLinebreak bool) tstring.TString {
+func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
+ children := parser.nodeToEntities(node.FirstChild, stripLinebreak)
ordered := node.Data == "ol"
- taggedChildren := parser.nodeToTaggedTStrings(node.FirstChild, stripLinebreak)
- counter := 1
- indentLength := 0
+ start := 1
if ordered {
- start := parser.getAttribute(node, "start")
- if len(start) > 0 {
- counter, _ = strconv.Atoi(start)
+ if startRaw := parser.getAttribute(node, "start"); len(startRaw) > 0 {
+ var err error
+ start, err = strconv.Atoi(startRaw)
+ if err != nil {
+ start = 1
+ }
}
-
- longestIndex := (counter - 1) + len(taggedChildren)
- indentLength = digits(longestIndex)
}
- indent := strings.Repeat(" ", indentLength+2)
- var children []tstring.TString
- for _, child := range taggedChildren {
- if child.tag != "li" {
- continue
- }
- var prefix string
- if ordered {
- indexPadding := indentLength - digits(counter)
- prefix = fmt.Sprintf("%d. %s", counter, strings.Repeat(" ", indexPadding))
- } else {
- prefix = "● "
+ listItems := children[:0]
+ for _, child := range children {
+ if child.GetTag() == "li" {
+ listItems = append(listItems, child)
}
- str := child.TString.Prepend(prefix)
- counter++
- parts := str.Split('\n')
- for i, part := range parts[1:] {
- parts[i+1] = part.Prepend(indent)
- }
- str = tstring.Join(parts, "\n")
- children = append(children, str)
}
- return tstring.Join(children, "\n")
+ return messages.NewListEntity(ordered, start, listItems)
}
-func (parser *htmlParser) basicFormatToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
+func (parser *htmlParser) basicFormatToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
+ entity := &messages.BaseHTMLEntity{
+ Tag: node.Data,
+ Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
+ }
switch node.Data {
case "b", "strong":
- str.AdjustStyleFull(AdjustStyleBold)
+ entity.AdjustStyle(AdjustStyleBold)
case "i", "em":
- str.AdjustStyleFull(AdjustStyleItalic)
+ entity.AdjustStyle(AdjustStyleItalic)
case "s", "del":
- str.AdjustStyleFull(AdjustStyleStrikethrough)
+ entity.AdjustStyle(AdjustStyleStrikethrough)
case "u", "ins":
- str.AdjustStyleFull(AdjustStyleUnderline)
+ entity.AdjustStyle(AdjustStyleUnderline)
+ case "font":
+ fgColor, ok := parser.parseColor(node, "data-mx-color", "color")
+ if ok {
+ entity.AdjustStyle(AdjustStyleTextColor(fgColor))
+ }
+ bgColor, ok := parser.parseColor(node, "data-mx-bg-color", "background-color")
+ if ok {
+ entity.AdjustStyle(AdjustStyleBackgroundColor(bgColor))
+ }
}
- return str
+ return entity
}
func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string) (color tcell.Color, ok bool) {
@@ -165,103 +149,200 @@ func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string)
return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true
}
-func (parser *htmlParser) fontToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
- fgColor, ok := parser.parseColor(node, "data-mx-color", "color")
- if ok {
- str.AdjustStyleFull(AdjustStyleTextColor(fgColor))
- }
- bgColor, ok := parser.parseColor(node, "data-mx-bg-color", "background-color")
- if ok {
- str.AdjustStyleFull(AdjustStyleBackgroundColor(bgColor))
- }
- return str
-}
-
-func (parser *htmlParser) headerToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- children := parser.nodeToTStrings(node.FirstChild, stripLinebreak)
+func (parser *htmlParser) headerToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
length := int(node.Data[1] - '0')
prefix := strings.Repeat("#", length) + " "
- return tstring.Join(children, "").Prepend(prefix)
+ return (&messages.BaseHTMLEntity{
+ Tag: node.Data,
+ Text: prefix,
+ Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
+ }).AdjustStyle(AdjustStyleBold)
}
-func (parser *htmlParser) blockquoteToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
- childrenArr := str.TrimSpace().Split('\n')
- for index, child := range childrenArr {
- childrenArr[index] = child.Prepend("> ")
- }
- return tstring.Join(childrenArr, "\n")
+func (parser *htmlParser) blockquoteToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
+ return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild, stripLinebreak))
}
-func (parser *htmlParser) linkToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
+func (parser *htmlParser) linkToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
+ entity := &messages.BaseHTMLEntity{
+ Tag: "a",
+ Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
+ }
href := parser.getAttribute(node, "href")
if len(href) == 0 {
- return str
+ return entity
}
match := matrixToURL.FindStringSubmatch(href)
if len(match) == 2 {
+ entity.Children = nil
pillTarget := match[1]
+ entity.Text = pillTarget
if pillTarget[0] == '@' {
if member := parser.room.GetMember(pillTarget); member != nil {
- return tstring.NewColorTString(member.Displayname, widget.GetHashColor(pillTarget))
+ entity.Text = member.Displayname
+ entity.Style = entity.Style.Foreground(widget.GetHashColor(pillTarget))
}
}
- return tstring.NewTString(pillTarget)
}
- return str.Append(fmt.Sprintf(" (%s)", href))
+ // TODO add click action and underline on hover for links
+ return entity
}
-func (parser *htmlParser) tagToTString(node *html.Node, stripLinebreak bool) tstring.TString {
+func (parser *htmlParser) imageToEntity(node *html.Node) messages.HTMLEntity {
+ alt := parser.getAttribute(node, "alt")
+ if len(alt) == 0 {
+ alt = parser.getAttribute(node, "title")
+ if len(alt) == 0 {
+ alt = "[inline image]"
+ }
+ }
+ entity := &messages.BaseHTMLEntity{
+ Tag: "img",
+ Text: alt,
+ }
+ // TODO add click action and underline on hover for inline images
+ return entity
+}
+
+func colourToColor(colour chroma.Colour) tcell.Color {
+ if !colour.IsSet() {
+ return tcell.ColorDefault
+ }
+ return tcell.NewRGBColor(int32(colour.Red()), int32(colour.Green()), int32(colour.Blue()))
+}
+
+func styleEntryToStyle(se chroma.StyleEntry) tcell.Style {
+ return tcell.StyleDefault.
+ Bold(se.Bold == chroma.Yes).
+ Italic(se.Italic == chroma.Yes).
+ Underline(se.Underline == chroma.Yes).
+ Foreground(colourToColor(se.Colour)).
+ Background(colourToColor(se.Background))
+}
+
+func (parser *htmlParser) syntaxHighlight(text, language string) messages.HTMLEntity {
+ lexer := lexers.Get(language)
+ if lexer == nil {
+ return nil
+ }
+ iter, err := lexer.Tokenise(nil, text)
+ if err != nil {
+ return nil
+ }
+ style := styles.SolarizedDark
+ tokens := iter.Tokens()
+ children := make([]messages.HTMLEntity, len(tokens))
+ for i, token := range tokens {
+ if token.Value == "\n" {
+ children[i] = &messages.BaseHTMLEntity{Block: true, Tag: "br"}
+ } else {
+ children[i] = &messages.BaseHTMLEntity{
+ Tag: token.Type.String(),
+ Text: token.Value,
+ Style: styleEntryToStyle(style.Get(token.Type)),
+
+ DefaultHeight: 1,
+ }
+ }
+ }
+ return &messages.BaseHTMLEntity{
+ Tag: "pre",
+ Block: true,
+ Children: children,
+ }
+}
+
+func (parser *htmlParser) codeblockToEntity(node *html.Node) messages.HTMLEntity {
+ entity := &messages.BaseHTMLEntity{
+ Tag: "pre",
+ Block: true,
+ }
+ // TODO allow disabling syntax highlighting
+ if node.FirstChild.Type == html.ElementNode && node.FirstChild.Data == "code" {
+ text := (&messages.BaseHTMLEntity{
+ Children: parser.nodeToEntities(node.FirstChild.FirstChild, false),
+ }).PlainText()
+ attr := parser.getAttribute(node.FirstChild, "class")
+ var lang string
+ for _, class := range strings.Split(attr, " ") {
+ if strings.HasPrefix(class, "language-") {
+ lang = class[len("language-"):]
+ break
+ }
+ }
+ if len(lang) != 0 {
+ if parsed := parser.syntaxHighlight(text, lang); parsed != nil {
+ return parsed
+ }
+ }
+ }
+ entity.Children = parser.nodeToEntities(node.FirstChild, false)
+ return entity
+}
+
+func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
switch node.Data {
case "blockquote":
- return parser.blockquoteToTString(node, stripLinebreak)
+ return parser.blockquoteToEntity(node, stripLinebreak)
case "ol", "ul":
- return parser.listToTString(node, stripLinebreak)
+ return parser.listToEntity(node, stripLinebreak)
case "h1", "h2", "h3", "h4", "h5", "h6":
- return parser.headerToTString(node, stripLinebreak)
+ return parser.headerToEntity(node, stripLinebreak)
case "br":
- return tstring.NewTString("\n")
- case "b", "strong", "i", "em", "s", "del", "u", "ins":
- return parser.basicFormatToTString(node, stripLinebreak)
- case "font":
- return parser.fontToTString(node, stripLinebreak)
+ return messages.NewBreakEntity()
+ case "b", "strong", "i", "em", "s", "del", "u", "ins", "font":
+ return parser.basicFormatToEntity(node, stripLinebreak)
case "a":
- return parser.linkToTString(node, stripLinebreak)
- case "p":
- return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak).Append("\n")
+ return parser.linkToEntity(node, stripLinebreak)
+ case "img":
+ return parser.imageToEntity(node)
case "pre":
- return parser.nodeToTString(node.FirstChild, false)
+ return parser.codeblockToEntity(node)
default:
- return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
+ return &messages.BaseHTMLEntity{
+ Tag: node.Data,
+ Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
+ Block: parser.isBlockTag(node.Data),
+ }
}
}
-func (parser *htmlParser) singleNodeToTString(node *html.Node, stripLinebreak bool) taggedTString {
+func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
switch node.Type {
case html.TextNode:
if stripLinebreak {
node.Data = strings.Replace(node.Data, "\n", "", -1)
}
- return taggedTString{tstring.NewTString(node.Data), "text"}
+ return &messages.BaseHTMLEntity{
+ Tag: "text",
+ Text: node.Data,
+ }
case html.ElementNode:
- return taggedTString{parser.tagToTString(node, stripLinebreak), node.Data}
+ return parser.tagNodeToEntity(node, stripLinebreak)
case html.DocumentNode:
- return taggedTString{parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak), "html"}
+ if node.FirstChild.Data == "html" && node.FirstChild.NextSibling == nil {
+ return parser.singleNodeToEntity(node.FirstChild, stripLinebreak)
+ }
+ return &messages.BaseHTMLEntity{
+ Tag: "html",
+ Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
+ Block: true,
+ }
default:
- return taggedTString{tstring.NewBlankTString(), "unknown"}
+ return nil
}
}
-func (parser *htmlParser) nodeToTaggedTStrings(node *html.Node, stripLinebreak bool) (strs []taggedTString) {
+func (parser *htmlParser) nodeToEntities(node *html.Node, stripLinebreak bool) (entities []messages.HTMLEntity) {
for ; node != nil; node = node.NextSibling {
- strs = append(strs, parser.singleNodeToTString(node, stripLinebreak))
+ if entity := parser.singleNodeToEntity(node, stripLinebreak); entity != nil {
+ entities = append(entities, entity)
+ }
}
return
}
-var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
+var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "li", "pre", "blockquote", "div", "hr", "table"}
func (parser *htmlParser) isBlockTag(tag string) bool {
for _, blockTag := range BlockTags {
@@ -272,51 +353,31 @@ func (parser *htmlParser) isBlockTag(tag string) bool {
return false
}
-func (parser *htmlParser) nodeToTagAwareTString(node *html.Node, stripLinebreak bool) tstring.TString {
- strs := parser.nodeToTaggedTStrings(node, stripLinebreak)
- output := tstring.NewBlankTString()
- for _, str := range strs {
- tstr := str.TString
- if parser.isBlockTag(str.tag) {
- tstr = tstr.Prepend("\n").Append("\n")
- }
- output = output.AppendTString(tstr)
- }
- return output.TrimSpace()
-}
-
-func (parser *htmlParser) nodeToTStrings(node *html.Node, stripLinebreak bool) (strs []tstring.TString) {
- for ; node != nil; node = node.NextSibling {
- strs = append(strs, parser.singleNodeToTString(node, stripLinebreak).TString)
- }
- return
-}
-
-func (parser *htmlParser) nodeToTString(node *html.Node, stripLinebreak bool) tstring.TString {
- return tstring.Join(parser.nodeToTStrings(node, stripLinebreak), "")
-}
-
-func (parser *htmlParser) Parse(htmlData string) tstring.TString {
+func (parser *htmlParser) Parse(htmlData string) messages.HTMLEntity {
node, _ := html.Parse(strings.NewReader(htmlData))
- return parser.nodeToTagAwareTString(node, true)
+ return parser.singleNodeToEntity(node, true)
}
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
-func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname string) tstring.TString {
+func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname string) messages.HTMLEntity {
htmlData := evt.Content.FormattedBody
htmlData = strings.Replace(htmlData, "\t", " ", -1)
parser := htmlParser{room}
- str := parser.Parse(htmlData)
+ root := parser.Parse(htmlData)
+ root.(*messages.BaseHTMLEntity).Block = false
if evt.Content.MsgType == mautrix.MsgEmote {
- str = tstring.Join([]tstring.TString{
- tstring.NewTString("* "),
- tstring.NewColorTString(senderDisplayname, widget.GetHashColor(evt.Sender)),
- tstring.NewTString(" "),
- str,
- }, "")
+ root = &messages.BaseHTMLEntity{
+ Tag: "emote",
+ Children: []messages.HTMLEntity{
+ messages.NewHTMLTextEntity("* "),
+ messages.NewHTMLTextEntity("* ").AdjustStyle(AdjustStyleTextColor(widget.GetHashColor(evt.Sender))),
+ messages.NewHTMLTextEntity(" "),
+ root,
+ },
+ }
}
- return str
+ return root
}