From 64fa922ec013079f8f0c90fc9e93c56db3611d30 Mon Sep 17 00:00:00 2001 From: Tulir Asokan Date: Sun, 22 Apr 2018 21:25:06 +0300 Subject: Switch to dep --- vendor/github.com/gdamore/encoding/charmap.go | 192 ++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 vendor/github.com/gdamore/encoding/charmap.go (limited to 'vendor/github.com/gdamore/encoding/charmap.go') diff --git a/vendor/github.com/gdamore/encoding/charmap.go b/vendor/github.com/gdamore/encoding/charmap.go new file mode 100644 index 0000000..e64eaed --- /dev/null +++ b/vendor/github.com/gdamore/encoding/charmap.go @@ -0,0 +1,192 @@ +// Copyright 2015 Garrett D'Amore +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use file except in compliance with the License. +// You may obtain a copy of the license at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package encoding + +import ( + "sync" + "unicode/utf8" + + "golang.org/x/text/transform" + "golang.org/x/text/encoding" +) + +const ( + // RuneError is an alias for the UTF-8 replacement rune, '\uFFFD'. + RuneError = '\uFFFD' + + // RuneSelf is the rune below which UTF-8 and the Unicode values are + // identical. Its also the limit for ASCII. + RuneSelf = 0x80 + + // ASCIISub is the ASCII substitution character. + ASCIISub = '\x1a' +) + +// Charmap is a structure for setting up encodings for 8-bit character sets, +// for transforming between UTF8 and that other character set. It has some +// ideas borrowed from golang.org/x/text/encoding/charmap, but it uses a +// different implementation. This implementation uses maps, and supports +// user-defined maps. +// +// We do assume that a character map has a reasonable substitution character, +// and that valid encodings are stable (exactly a 1:1 map) and stateless +// (that is there is no shift character or anything like that.) Hence this +// approach will not work for many East Asian character sets. +// +// Measurement shows little or no measurable difference in the performance of +// the two approaches. The difference was down to a couple of nsec/op, and +// no consistent pattern as to which ran faster. With the conversion to +// UTF-8 the code takes about 25 nsec/op. The conversion in the reverse +// direction takes about 100 nsec/op. (The larger cost for conversion +// from UTF-8 is most likely due to the need to convert the UTF-8 byte stream +// to a rune before conversion. +// +type Charmap struct { + transform.NopResetter + bytes map[rune]byte + runes [256][]byte + once sync.Once + + // The map between bytes and runes. To indicate that a specific + // byte value is invalid for a charcter set, use the rune + // utf8.RuneError. Values that are absent from this map will + // be assumed to have the identity mapping -- that is the default + // is to assume ISO8859-1, where all 8-bit characters have the same + // numeric value as their Unicode runes. (Not to be confused with + // the UTF-8 values, which *will* be different for non-ASCII runes.) + // + // If no values less than RuneSelf are changed (or have non-identity + // mappings), then the character set is assumed to be an ASCII + // superset, and certain assumptions and optimizations become + // available for ASCII bytes. + Map map[byte]rune + + // The ReplacementChar is the byte value to use for substitution. + // It should normally be ASCIISub for ASCII encodings. This may be + // unset (left to zero) for mappings that are strictly ASCII supersets. + // In that case ASCIISub will be assumed instead. + ReplacementChar byte +} + +type cmapDecoder struct { + transform.NopResetter + runes [256][]byte +} + +type cmapEncoder struct { + transform.NopResetter + bytes map[rune]byte + replace byte +} + +// Init initializes internal values of a character map. This should +// be done early, to minimize the cost of allocation of transforms +// later. It is not strictly necessary however, as the allocation +// functions will arrange to call it if it has not already been done. +func (c *Charmap) Init() { + c.once.Do(c.initialize) +} + +func (c *Charmap) initialize() { + c.bytes = make(map[rune]byte) + ascii := true + + for i := 0; i < 256; i++ { + r, ok := c.Map[byte(i)] + if !ok { + r = rune(i) + } + if r < 128 && r != rune(i) { + ascii = false + } + if r != RuneError { + c.bytes[r] = byte(i) + } + utf := make([]byte, utf8.RuneLen(r)) + utf8.EncodeRune(utf, r) + c.runes[i] = utf + } + if ascii && c.ReplacementChar == '\x00' { + c.ReplacementChar = ASCIISub + } +} + +// NewDecoder returns a Decoder the converts from the 8-bit +// character set to UTF-8. Unknown mappings, if any, are mapped +// to '\uFFFD'. +func (c *Charmap) NewDecoder() *encoding.Decoder { + c.Init() + return &encoding.Decoder{Transformer: &cmapDecoder{runes: c.runes}} +} + +// NewEncoder returns a Transformer that converts from UTF8 to the +// 8-bit character set. Unknown mappings are mapped to 0x1A. +func (c *Charmap) NewEncoder() *encoding.Encoder { + c.Init() + return &encoding.Encoder{Transformer: + &cmapEncoder{bytes: c.bytes, replace: c.ReplacementChar}} +} + +func (d *cmapDecoder) Transform(dst, src []byte, atEOF bool) (int, int, error) { + var e error + var ndst, nsrc int + + for _, c := range src { + b := d.runes[c] + l := len(b) + + if ndst+l > len(dst) { + e = transform.ErrShortDst + break + } + for i := 0; i < l; i++ { + dst[ndst] = b[i] + ndst++ + } + nsrc++ + } + return ndst, nsrc, e +} + +func (d *cmapEncoder) Transform(dst, src []byte, atEOF bool) (int, int, error) { + var e error + var ndst, nsrc int + for nsrc < len(src) { + if ndst >= len(dst) { + e = transform.ErrShortDst + break + } + + r, sz := utf8.DecodeRune(src[nsrc:]) + if r == utf8.RuneError && sz == 1 { + // If its inconclusive due to insufficient data in + // in the source, report it + if !atEOF && !utf8.FullRune(src[nsrc:]) { + e = transform.ErrShortSrc + break + } + } + + if c, ok := d.bytes[r]; ok { + dst[ndst] = c + } else { + dst[ndst] = d.replace + } + nsrc += sz + ndst++ + } + + return ndst, nsrc, e +} -- cgit v1.2.3