aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/htmlparser/htmlparser.go108
-rw-r--r--ui/messages/htmlparser.go218
-rw-r--r--ui/messages/htmltagarray.go118
-rw-r--r--ui/messages/parser.go6
-rw-r--r--ui/messages/tstring/string.go4
5 files changed, 367 insertions, 87 deletions
diff --git a/lib/htmlparser/htmlparser.go b/lib/htmlparser/htmlparser.go
new file mode 100644
index 0000000..fb4e012
--- /dev/null
+++ b/lib/htmlparser/htmlparser.go
@@ -0,0 +1,108 @@
+// gomuks - A terminal Matrix client written in Go.
+// Copyright (C) 2018 Tulir Asokan
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package htmlparser
+
+import (
+ "io"
+ "strings"
+
+ "golang.org/x/net/html"
+)
+
+type HTMLProcessor interface {
+ Preprocess()
+ HandleStartTag(tagName string, attrs map[string]string)
+ HandleSelfClosingTag(tagName string, attrs map[string]string)
+ HandleText(text string)
+ HandleEndTag(tagName string)
+ ReceiveError(err error)
+ Postprocess()
+}
+
+type HTMLParser struct {
+ *html.Tokenizer
+ processor HTMLProcessor
+}
+
+func NewHTMLParserFromTokenizer(z *html.Tokenizer, processor HTMLProcessor) HTMLParser {
+ return HTMLParser{
+ z,
+ processor,
+ }
+}
+
+func NewHTMLParserFromReader(reader io.Reader, processor HTMLProcessor) HTMLParser {
+ return NewHTMLParserFromTokenizer(html.NewTokenizer(reader), processor)
+}
+
+func NewHTMLParserFromString(html string, processor HTMLProcessor) HTMLParser {
+ return NewHTMLParserFromReader(strings.NewReader(html), processor)
+}
+
+var SelfClosingTags = []string{"img", "br", "hr", "area", "base", "basefont", "input", "link", "meta"}
+
+func (parser HTMLParser) mapAttrs() map[string]string {
+ attrs := make(map[string]string)
+ hasMore := true
+ for hasMore {
+ var key, val []byte
+ key, val, hasMore = parser.TagAttr()
+ attrs[string(key)] = string(val)
+ }
+ return attrs
+}
+
+func (parser HTMLParser) isSelfClosing(tag string) bool {
+ for _, selfClosingTag := range SelfClosingTags {
+ if tag == selfClosingTag {
+ return true
+ }
+ }
+ return false
+}
+
+func (parser HTMLParser) Process() {
+ parser.processor.Preprocess()
+Loop:
+ for {
+ tt := parser.Next()
+ switch tt {
+ case html.ErrorToken:
+ parser.processor.ReceiveError(parser.Err())
+ break Loop
+ case html.TextToken:
+ parser.processor.HandleText(string(parser.Text()))
+ case html.StartTagToken, html.SelfClosingTagToken:
+ tagb, _ := parser.TagName()
+ attrs := parser.mapAttrs()
+ tag := string(tagb)
+
+ selfClosing := tt == html.SelfClosingTagToken || parser.isSelfClosing(tag)
+
+ if selfClosing {
+ parser.processor.HandleSelfClosingTag(tag, attrs)
+ } else {
+ parser.processor.HandleStartTag(tag, attrs)
+ }
+ case html.EndTagToken:
+ tagb, _ := parser.TagName()
+ parser.processor.HandleEndTag(string(tagb))
+ }
+ }
+
+ parser.processor.Postprocess()
+}
diff --git a/ui/messages/htmlparser.go b/ui/messages/htmlparser.go
index 0475e7a..aa6211e 100644
--- a/ui/messages/htmlparser.go
+++ b/ui/messages/htmlparser.go
@@ -17,120 +17,170 @@
package messages
import (
+ "fmt"
+ "io"
+ "math"
+ "regexp"
"strings"
- "golang.org/x/net/html"
"maunium.net/go/gomatrix"
"maunium.net/go/gomuks/debug"
+ "maunium.net/go/gomuks/lib/htmlparser"
+ "maunium.net/go/gomuks/matrix/rooms"
"maunium.net/go/gomuks/ui/messages/tstring"
+ "maunium.net/go/gomuks/ui/widget"
"maunium.net/go/tcell"
)
-// TagArray is a reversed queue for remembering what HTML tags are open.
-type TagArray []string
+var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([#@!].*)")
-// Pushb converts the given byte array into a string and calls Push().
-func (ta *TagArray) Pushb(tag []byte) {
- ta.Push(string(tag))
+type MatrixHTMLProcessor struct {
+ text tstring.TString
+
+ indent string
+ listType string
+ lineIsNew bool
+ openTags *TagArray
+
+ room *rooms.Room
}
-// Popb converts the given byte array into a string and calls Pop().
-func (ta *TagArray) Popb(tag []byte) {
- ta.Pop(string(tag))
+func (parser *MatrixHTMLProcessor) newline() {
+ if !parser.lineIsNew {
+ parser.text = parser.text.Append("\n" + parser.indent)
+ parser.lineIsNew = true
+ }
}
-// Hasb converts the given byte array into a string and calls Has().
-func (ta *TagArray) Hasb(tag []byte) {
- ta.Has(string(tag))
+func (parser *MatrixHTMLProcessor) Preprocess() {}
+
+func (parser *MatrixHTMLProcessor) HandleText(text string) {
+ style := tcell.StyleDefault
+ for _, tag := range *parser.openTags {
+ switch tag.Tag {
+ case "b", "strong":
+ style = style.Bold(true)
+ case "i", "em":
+ style = style.Italic(true)
+ case "s", "del":
+ style = style.Strikethrough(true)
+ case "u", "ins":
+ style = style.Underline(true)
+ case "a":
+ tag.Text += text
+ return
+ }
+ }
+
+ if parser.openTags.Has("pre", "code") {
+ text = strings.Replace(text, "\n", "", -1)
+ }
+ parser.text = parser.text.AppendStyle(text, style)
+ parser.lineIsNew = false
}
-// HasAfterb converts the given byte array into a string and calls HasAfter().
-func (ta *TagArray) HasAfterb(tag []byte, after int) {
- ta.HasAfter(string(tag), after)
+func (parser *MatrixHTMLProcessor) HandleStartTag(tagName string, attrs map[string]string) {
+ tag := &TagWithMeta{Tag: tagName}
+ switch tag.Tag {
+ case "h1", "h2", "h3", "h4", "h5", "h6":
+ length := int(tag.Tag[1] - '0')
+ parser.text = parser.text.Append(strings.Repeat("#", length) + " ")
+ parser.lineIsNew = false
+ case "a":
+ tag.Meta, _ = attrs["href"]
+ case "ol", "ul":
+ parser.listType = tag.Tag
+ case "li":
+ indentSize := 2
+ if parser.listType == "ol" {
+ list := parser.openTags.Get(parser.listType)
+ list.Counter++
+ parser.text = parser.text.Append(fmt.Sprintf("%d. ", list.Counter))
+ indentSize = int(math.Log10(float64(list.Counter))+1) + len(". ")
+ } else {
+ parser.text = parser.text.Append("* ")
+ }
+ parser.indent += strings.Repeat(" ", indentSize)
+ parser.lineIsNew = false
+ case "blockquote":
+ parser.indent += "> "
+ parser.text = parser.text.Append("> ")
+ parser.lineIsNew = false
+ }
+ parser.openTags.PushMeta(tag)
}
-// Push adds the given tag to the array.
-func (ta *TagArray) Push(tag string) {
- *ta = append(*ta, "")
- copy((*ta)[1:], *ta)
- (*ta)[0] = tag
+func (parser *MatrixHTMLProcessor) HandleSelfClosingTag(tagName string, attrs map[string]string) {
+ if tagName == "br" {
+ parser.newline()
+ }
}
-// Pop removes the given tag from the array.
-func (ta *TagArray) Pop(tag string) {
- if (*ta)[0] == tag {
- // This is the default case and is lighter than append(), so we handle it separately.
- *ta = (*ta)[1:]
- } else if index := ta.Has(tag); index != -1 {
- *ta = append((*ta)[:index], (*ta)[index+1:]...)
+func (parser *MatrixHTMLProcessor) HandleEndTag(tagName string) {
+ tag := parser.openTags.Pop(tagName)
+
+ switch tag.Tag {
+ case "li", "blockquote":
+ indentSize := 2
+ if tag.Tag == "li" && parser.listType == "ol" {
+ list := parser.openTags.Get(parser.listType)
+ indentSize = int(math.Log10(float64(list.Counter))+1) + len(". ")
+ }
+ if len(parser.indent) >= indentSize {
+ parser.indent = parser.indent[0 : len(parser.indent)-indentSize]
+ }
+ // TODO this newline is sometimes not good
+ parser.newline()
+ case "a":
+ match := matrixToURL.FindStringSubmatch(tag.Meta)
+ if len(match) == 2 {
+ pillTarget := match[1]
+ if pillTarget[0] == '@' {
+ if member := parser.room.GetMember(pillTarget); member != nil {
+ parser.text = parser.text.AppendColor(member.DisplayName, widget.GetHashColor(member.DisplayName))
+ } else {
+ parser.text = parser.text.Append(pillTarget)
+ }
+ } else {
+ parser.text = parser.text.Append(pillTarget)
+ }
+ } else {
+ // TODO make text clickable rather than printing URL
+ parser.text = parser.text.Append(fmt.Sprintf("%s (%s)", tag.Text, tag.Meta))
+ }
+ parser.lineIsNew = false
+ case "p", "pre", "ol", "ul", "h1", "h2", "h3", "h4", "h5", "h6", "div":
+ // parser.newline()
}
}
-// Has returns the first index where the given tag is, or -1 if it's not in the list.
-func (ta *TagArray) Has(tag string) int {
- return ta.HasAfter(tag, -1)
+func (parser *MatrixHTMLProcessor) ReceiveError(err error) {
+ if err != io.EOF {
+ debug.Print("Unexpected error parsing HTML:", err)
+ }
}
-// HasAfter returns the first index after the given index where the given tag is,
-// or -1 if the given tag is not on the list after the given index.
-func (ta *TagArray) HasAfter(tag string, after int) int {
- for i := after + 1; i < len(*ta); i++ {
- if (*ta)[i] == tag {
- return i
- }
+func (parser *MatrixHTMLProcessor) Postprocess() {
+ if len(parser.text) > 0 && parser.text[len(parser.text)-1].Char == '\n' {
+ parser.text = parser.text[:len(parser.text)-1]
}
- return -1
}
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
-func ParseHTMLMessage(evt *gomatrix.Event) tstring.TString {
- //textData, _ := evt.Content["body"].(string)
+func ParseHTMLMessage(room *rooms.Room, evt *gomatrix.Event) tstring.TString {
htmlData, _ := evt.Content["formatted_body"].(string)
- z := html.NewTokenizer(strings.NewReader(htmlData))
- text := tstring.NewTString("")
-
- openTags := &TagArray{}
-
-Loop:
- for {
- tt := z.Next()
- switch tt {
- case html.ErrorToken:
- break Loop
- case html.TextToken:
- style := tcell.StyleDefault
- for _, tag := range *openTags {
- switch tag {
- case "b", "strong":
- style = style.Bold(true)
- case "i", "em":
- style = style.Italic(true)
- case "s", "del":
- style = style.Strikethrough(true)
- case "u", "ins":
- style = style.Underline(true)
- }
- }
- text = text.AppendStyle(string(z.Text()), style)
- case html.SelfClosingTagToken, html.StartTagToken:
- tagb, _ := z.TagName()
- tag := string(tagb)
- switch tag {
- case "br":
- debug.Print("BR found")
- debug.Print(text.String())
- text = text.Append("\n")
- default:
- if tt == html.StartTagToken {
- openTags.Push(tag)
- }
- }
- case html.EndTagToken:
- tagb, _ := z.TagName()
- openTags.Popb(tagb)
- }
+ processor := &MatrixHTMLProcessor{
+ room: room,
+ text: tstring.NewBlankTString(),
+ indent: "",
+ listType: "",
+ lineIsNew: true,
+ openTags: &TagArray{},
}
- return text
+ parser := htmlparser.NewHTMLParserFromString(htmlData, processor)
+ parser.Process()
+
+ return processor.text
}
diff --git a/ui/messages/htmltagarray.go b/ui/messages/htmltagarray.go
new file mode 100644
index 0000000..597f0c7
--- /dev/null
+++ b/ui/messages/htmltagarray.go
@@ -0,0 +1,118 @@
+// gomuks - A terminal Matrix client written in Go.
+// Copyright (C) 2018 Tulir Asokan
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+package messages
+
+// TagWithMeta is an open HTML tag with some metadata (e.g. list index, a href value).
+type TagWithMeta struct {
+ Tag string
+ Counter int
+ Meta string
+ Text string
+}
+
+// BlankTag is a blank TagWithMeta object.
+var BlankTag = &TagWithMeta{}
+
+// TagArray is a reversed queue for remembering what HTML tags are open.
+type TagArray []*TagWithMeta
+
+// Pushb converts the given byte array into a string and calls Push().
+func (ta *TagArray) Pushb(tag []byte) {
+ ta.Push(string(tag))
+}
+
+// Popb converts the given byte array into a string and calls Pop().
+func (ta *TagArray) Popb(tag []byte) *TagWithMeta {
+ return ta.Pop(string(tag))
+}
+
+// Indexb converts the given byte array into a string and calls Index().
+func (ta *TagArray) Indexb(tag []byte) {
+ ta.Index(string(tag))
+}
+
+// IndexAfterb converts the given byte array into a string and calls IndexAfter().
+func (ta *TagArray) IndexAfterb(tag []byte, after int) {
+ ta.IndexAfter(string(tag), after)
+}
+
+// Push adds the given tag to the array.
+func (ta *TagArray) Push(tag string) {
+ ta.PushMeta(&TagWithMeta{Tag: tag})
+}
+
+// Push adds the given tag to the array.
+func (ta *TagArray) PushMeta(tag *TagWithMeta) {
+ *ta = append(*ta, BlankTag)
+ copy((*ta)[1:], *ta)
+ (*ta)[0] = tag
+}
+
+// Pop removes the given tag from the array.
+func (ta *TagArray) Pop(tag string) (removed *TagWithMeta) {
+ if (*ta)[0].Tag == tag {
+ // This is the default case and is lighter than append(), so we handle it separately.
+ removed = (*ta)[0]
+ *ta = (*ta)[1:]
+ } else if index := ta.Index(tag); index != -1 {
+ removed = (*ta)[index]
+ *ta = append((*ta)[:index], (*ta)[index+1:]...)
+ }
+ return
+}
+
+// Index returns the first index where the given tag is, or -1 if it's not in the list.
+func (ta *TagArray) Index(tag string) int {
+ return ta.IndexAfter(tag, -1)
+}
+
+// IndexAfter returns the first index after the given index where the given tag is,
+// or -1 if the given tag is not on the list after the given index.
+func (ta *TagArray) IndexAfter(tag string, after int) int {
+ for i := after + 1; i < len(*ta); i++ {
+ if (*ta)[i].Tag == tag {
+ return i
+ }
+ }
+ return -1
+}
+
+// Get returns the first occurrence of the given tag, or nil if it's not in the list.
+func (ta *TagArray) Get(tag string) *TagWithMeta {
+ return ta.GetAfter(tag, -1)
+}
+
+// IndexAfter returns the first occurrence of the given tag, or nil if the given
+// tag is not on the list after the given index.
+func (ta *TagArray) GetAfter(tag string, after int) *TagWithMeta {
+ for i := after + 1; i < len(*ta); i++ {
+ if (*ta)[i].Tag == tag {
+ return (*ta)[i]
+ }
+ }
+ return nil
+}
+
+// Has returns whether or not the list has at least one of the given tags.
+func (ta *TagArray) Has(tags ...string) bool {
+ for _, tag := range tags {
+ if index := ta.Index(tag); index != -1 {
+ return true
+ }
+ }
+ return false
+}
diff --git a/ui/messages/parser.go b/ui/messages/parser.go
index d8069c6..80ce5d6 100644
--- a/ui/messages/parser.go
+++ b/ui/messages/parser.go
@@ -36,7 +36,7 @@ func ParseEvent(gmx ifc.Gomuks, room *rooms.Room, evt *gomatrix.Event) UIMessage
}
switch evt.Type {
case "m.room.message":
- return ParseMessage(gmx, evt)
+ return ParseMessage(gmx, room, evt)
case "m.room.member":
return ParseMembershipEvent(evt)
}
@@ -51,14 +51,14 @@ func unixToTime(unix int64) time.Time {
return timestamp
}
-func ParseMessage(gmx ifc.Gomuks, evt *gomatrix.Event) UIMessage {
+func ParseMessage(gmx ifc.Gomuks, room *rooms.Room, evt *gomatrix.Event) UIMessage {
msgtype, _ := evt.Content["msgtype"].(string)
ts := unixToTime(evt.Timestamp)
switch msgtype {
case "m.text", "m.notice", "m.emote":
format, hasFormat := evt.Content["format"].(string)
if hasFormat && format == "org.matrix.custom.html" {
- text := ParseHTMLMessage(evt)
+ text := ParseHTMLMessage(room, evt)
return NewExpandedTextMessage(evt.ID, evt.Sender, msgtype, text, ts)
} else {
text, _ := evt.Content["body"].(string)
diff --git a/ui/messages/tstring/string.go b/ui/messages/tstring/string.go
index d1ad446..a87d16a 100644
--- a/ui/messages/tstring/string.go
+++ b/ui/messages/tstring/string.go
@@ -25,6 +25,10 @@ import (
type TString []Cell
+func NewBlankTString() TString {
+ return make([]Cell, 0)
+}
+
func NewTString(str string) TString {
newStr := make([]Cell, len(str))
for i, char := range str {