path: root/vendor/gopkg.in/russross/blackfriday.v2/inline.go
diff options
authorTulir Asokan <tulir@maunium.net>2018-11-14 00:00:35 +0200
committerTulir Asokan <tulir@maunium.net>2018-11-14 00:00:35 +0200
commitba387764ca1590625d349e74eb8a8a64d1849b67 (patch)
treebc8f02156a63eac99dcddaed38e45b7c312b40c0 /vendor/gopkg.in/russross/blackfriday.v2/inline.go
parentcfb2cc057c32330be0ca0a68cfbd245cb2b8e31b (diff)
Fix things
Diffstat (limited to 'vendor/gopkg.in/russross/blackfriday.v2/inline.go')
1 files changed, 16 insertions, 2 deletions
diff --git a/vendor/gopkg.in/russross/blackfriday.v2/inline.go b/vendor/gopkg.in/russross/blackfriday.v2/inline.go
index 3d63310..4ed2907 100644
--- a/vendor/gopkg.in/russross/blackfriday.v2/inline.go
+++ b/vendor/gopkg.in/russross/blackfriday.v2/inline.go
@@ -23,8 +23,22 @@ var (
urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
- // TODO: improve this regexp to catch all possible entities:
- htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
+ // https://www.w3.org/TR/html5/syntax.html#character-references
+ // highest unicode code point in 17 planes (2^20): 1,114,112d =
+ // 7 dec digits or 6 hex digits
+ // named entity references can be 2-31 characters with stuff like &lt;
+ // at one end and &CounterClockwiseContourIntegral; at the other. There
+ // are also sometimes numbers at the end, although this isn't inherent
+ // in the specification; there are never numbers anywhere else in
+ // current character references, though; see &frac34; and &blk12;, etc.
+ // https://www.w3.org/TR/html5/syntax.html#named-character-references
+ //
+ // entity := "&" (named group | number ref) ";"
+ // named group := [a-zA-Z]{2,31}[0-9]{0,2}
+ // number ref := "#" (dec ref | hex ref)
+ // dec ref := [0-9]{1,7}
+ // hex ref := ("x" | "X") [0-9a-fA-F]{1,6}
+ htmlEntityRe = regexp.MustCompile(`&([a-zA-Z]{2,31}[0-9]{0,2}|#([0-9]{1,7}|[xX][0-9a-fA-F]{1,6}));`)
// Functions to parse text within a block