From 331597b9f8a7942cbcb233a328301e4d5bf94fb0 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Fri, 11 Jan 2019 23:28:47 +0200 Subject: Switch to Go modules and make other changes --- vendor/golang.org/x/net/html/escape.go | 258 --------------------------------- 1 file changed, 258 deletions(-) delete mode 100644 vendor/golang.org/x/net/html/escape.go (limited to 'vendor/golang.org/x/net/html/escape.go') diff --git a/vendor/golang.org/x/net/html/escape.go b/vendor/golang.org/x/net/html/escape.go deleted file mode 100644 index d856139..0000000 --- a/vendor/golang.org/x/net/html/escape.go +++ /dev/null @@ -1,258 +0,0 @@ -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package html - -import ( - "bytes" - "strings" - "unicode/utf8" -) - -// These replacements permit compatibility with old numeric entities that -// assumed Windows-1252 encoding. -// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference -var replacementTable = [...]rune{ - '\u20AC', // First entry is what 0x80 should be replaced with. - '\u0081', - '\u201A', - '\u0192', - '\u201E', - '\u2026', - '\u2020', - '\u2021', - '\u02C6', - '\u2030', - '\u0160', - '\u2039', - '\u0152', - '\u008D', - '\u017D', - '\u008F', - '\u0090', - '\u2018', - '\u2019', - '\u201C', - '\u201D', - '\u2022', - '\u2013', - '\u2014', - '\u02DC', - '\u2122', - '\u0161', - '\u203A', - '\u0153', - '\u009D', - '\u017E', - '\u0178', // Last entry is 0x9F. - // 0x00->'\uFFFD' is handled programmatically. - // 0x0D->'\u000D' is a no-op. -} - -// unescapeEntity reads an entity like "<" from b[src:] and writes the -// corresponding "<" to b[dst:], returning the incremented dst and src cursors. -// Precondition: b[src] == '&' && dst <= src. -// attribute should be true if parsing an attribute value. -func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) { - // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference - - // i starts at 1 because we already know that s[0] == '&'. - i, s := 1, b[src:] - - if len(s) <= 1 { - b[dst] = b[src] - return dst + 1, src + 1 - } - - if s[i] == '#' { - if len(s) <= 3 { // We need to have at least "&#.". - b[dst] = b[src] - return dst + 1, src + 1 - } - i++ - c := s[i] - hex := false - if c == 'x' || c == 'X' { - hex = true - i++ - } - - x := '\x00' - for i < len(s) { - c = s[i] - i++ - if hex { - if '0' <= c && c <= '9' { - x = 16*x + rune(c) - '0' - continue - } else if 'a' <= c && c <= 'f' { - x = 16*x + rune(c) - 'a' + 10 - continue - } else if 'A' <= c && c <= 'F' { - x = 16*x + rune(c) - 'A' + 10 - continue - } - } else if '0' <= c && c <= '9' { - x = 10*x + rune(c) - '0' - continue - } - if c != ';' { - i-- - } - break - } - - if i <= 3 { // No characters matched. - b[dst] = b[src] - return dst + 1, src + 1 - } - - if 0x80 <= x && x <= 0x9F { - // Replace characters from Windows-1252 with UTF-8 equivalents. - x = replacementTable[x-0x80] - } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF { - // Replace invalid characters with the replacement character. - x = '\uFFFD' - } - - return dst + utf8.EncodeRune(b[dst:], x), src + i - } - - // Consume the maximum number of characters possible, with the - // consumed characters matching one of the named references. - - for i < len(s) { - c := s[i] - i++ - // Lower-cased characters are more common in entities, so we check for them first. - if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { - continue - } - if c != ';' { - i-- - } - break - } - - entityName := string(s[1:i]) - if entityName == "" { - // No-op. - } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' { - // No-op. - } else if x := entity[entityName]; x != 0 { - return dst + utf8.EncodeRune(b[dst:], x), src + i - } else if x := entity2[entityName]; x[0] != 0 { - dst1 := dst + utf8.EncodeRune(b[dst:], x[0]) - return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i - } else if !attribute { - maxLen := len(entityName) - 1 - if maxLen > longestEntityWithoutSemicolon { - maxLen = longestEntityWithoutSemicolon - } - for j := maxLen; j > 1; j-- { - if x := entity[entityName[:j]]; x != 0 { - return dst + utf8.EncodeRune(b[dst:], x), src + j + 1 - } - } - } - - dst1, src1 = dst+i, src+i - copy(b[dst:dst1], b[src:src1]) - return dst1, src1 -} - -// unescape unescapes b's entities in-place, so that "a<b" becomes "a': - esc = ">" - case '"': - // """ is shorter than """. - esc = """ - case '\r': - esc = " " - default: - panic("unrecognized escape character") - } - s = s[i+1:] - if _, err := w.WriteString(esc); err != nil { - return err - } - i = strings.IndexAny(s, escapedChars) - } - _, err := w.WriteString(s) - return err -} - -// EscapeString escapes special characters like "<" to become "<". It -// escapes only five such characters: <, >, &, ' and ". -// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't -// always true. -func EscapeString(s string) string { - if strings.IndexAny(s, escapedChars) == -1 { - return s - } - var buf bytes.Buffer - escape(&buf, s) - return buf.String() -} - -// UnescapeString unescapes entities like "<" to become "<". It unescapes a -// larger range of entities than EscapeString escapes. For example, "á" -// unescapes to "รก", as does "á" and "&xE1;". -// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't -// always true. -func UnescapeString(s string) string { - for _, c := range s { - if c == '&' { - return string(unescape([]byte(s), false)) - } - } - return s -} -- cgit v1.2.3