Checking in vendor folder for ease of using go get.
This commit is contained in:
parent
7a1251853b
commit
cdb4b5a1d0
3554 changed files with 1270116 additions and 0 deletions
58
vendor/golang.org/x/text/internal/triegen/compact.go
generated
vendored
Normal file
58
vendor/golang.org/x/text/internal/triegen/compact.go
generated
vendored
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen
|
||||
|
||||
// This file defines Compacter and its implementations.
|
||||
|
||||
import "io"
|
||||
|
||||
// A Compacter generates an alternative, more space-efficient way to store a
|
||||
// trie value block. A trie value block holds all possible values for the last
|
||||
// byte of a UTF-8 encoded rune. Excluding ASCII characters, a trie value block
|
||||
// always has 64 values, as a UTF-8 encoding ends with a byte in [0x80, 0xC0).
|
||||
type Compacter interface {
|
||||
// Size returns whether the Compacter could encode the given block as well
|
||||
// as its size in case it can. len(v) is always 64.
|
||||
Size(v []uint64) (sz int, ok bool)
|
||||
|
||||
// Store stores the block using the Compacter's compression method.
|
||||
// It returns a handle with which the block can be retrieved.
|
||||
// len(v) is always 64.
|
||||
Store(v []uint64) uint32
|
||||
|
||||
// Print writes the data structures associated to the given store to w.
|
||||
Print(w io.Writer) error
|
||||
|
||||
// Handler returns the name of a function that gets called during trie
|
||||
// lookup for blocks generated by the Compacter. The function should be of
|
||||
// the form func (n uint32, b byte) uint64, where n is the index returned by
|
||||
// the Compacter's Store method and b is the last byte of the UTF-8
|
||||
// encoding, where 0x80 <= b < 0xC0, for which to do the lookup in the
|
||||
// block.
|
||||
Handler() string
|
||||
}
|
||||
|
||||
// simpleCompacter is the default Compacter used by builder. It implements a
|
||||
// normal trie block.
|
||||
type simpleCompacter builder
|
||||
|
||||
func (b *simpleCompacter) Size([]uint64) (sz int, ok bool) {
|
||||
return blockSize * b.ValueSize, true
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Store(v []uint64) uint32 {
|
||||
h := uint32(len(b.ValueBlocks) - blockOffset)
|
||||
b.ValueBlocks = append(b.ValueBlocks, v)
|
||||
return h
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Print(io.Writer) error {
|
||||
// Structures are printed in print.go.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *simpleCompacter) Handler() string {
|
||||
panic("Handler should be special-cased for this Compacter")
|
||||
}
|
875
vendor/golang.org/x/text/internal/triegen/data_test.go
generated
vendored
Normal file
875
vendor/golang.org/x/text/internal/triegen/data_test.go
generated
vendored
Normal file
|
@ -0,0 +1,875 @@
|
|||
// This file is generated with "go test -tags generate". DO NOT EDIT!
|
||||
// +build !generate
|
||||
|
||||
package triegen_test
|
||||
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return randValues[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = randIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = randIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = randIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *randTrie) lookupUnsafe(s []byte) uint8 {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return randValues[c0]
|
||||
}
|
||||
i := randIndex[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = randIndex[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = randIndex[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lookupString returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *randTrie) lookupString(s string) (v uint8, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return randValues[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = randIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := randIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = randIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = randIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *randTrie) lookupStringUnsafe(s string) uint8 {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return randValues[c0]
|
||||
}
|
||||
i := randIndex[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = randIndex[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = randIndex[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
|
||||
type randTrie struct{}
|
||||
|
||||
func newRandTrie(i int) *randTrie {
|
||||
return &randTrie{}
|
||||
}
|
||||
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
|
||||
switch {
|
||||
default:
|
||||
return uint8(randValues[n<<6+uint32(b)])
|
||||
}
|
||||
}
|
||||
|
||||
// randValues: 56 blocks, 3584 entries, 3584 bytes
|
||||
// The third block is the zero block.
|
||||
var randValues = [3584]uint8{
|
||||
// Block 0x0, offset 0x0
|
||||
// Block 0x1, offset 0x40
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0xc9: 0x0001,
|
||||
// Block 0x4, offset 0x100
|
||||
0x100: 0x0001,
|
||||
// Block 0x5, offset 0x140
|
||||
0x155: 0x0001,
|
||||
// Block 0x6, offset 0x180
|
||||
0x196: 0x0001,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x1ef: 0x0001,
|
||||
// Block 0x8, offset 0x200
|
||||
0x206: 0x0001,
|
||||
// Block 0x9, offset 0x240
|
||||
0x258: 0x0001,
|
||||
// Block 0xa, offset 0x280
|
||||
0x288: 0x0001,
|
||||
// Block 0xb, offset 0x2c0
|
||||
0x2f2: 0x0001,
|
||||
// Block 0xc, offset 0x300
|
||||
0x304: 0x0001,
|
||||
// Block 0xd, offset 0x340
|
||||
0x34b: 0x0001,
|
||||
// Block 0xe, offset 0x380
|
||||
0x3ba: 0x0001,
|
||||
// Block 0xf, offset 0x3c0
|
||||
0x3f5: 0x0001,
|
||||
// Block 0x10, offset 0x400
|
||||
0x41d: 0x0001,
|
||||
// Block 0x11, offset 0x440
|
||||
0x442: 0x0001,
|
||||
// Block 0x12, offset 0x480
|
||||
0x4bb: 0x0001,
|
||||
// Block 0x13, offset 0x4c0
|
||||
0x4e9: 0x0001,
|
||||
// Block 0x14, offset 0x500
|
||||
0x53e: 0x0001,
|
||||
// Block 0x15, offset 0x540
|
||||
0x55f: 0x0001,
|
||||
// Block 0x16, offset 0x580
|
||||
0x5b7: 0x0001,
|
||||
// Block 0x17, offset 0x5c0
|
||||
0x5d9: 0x0001,
|
||||
// Block 0x18, offset 0x600
|
||||
0x60e: 0x0001,
|
||||
// Block 0x19, offset 0x640
|
||||
0x652: 0x0001,
|
||||
// Block 0x1a, offset 0x680
|
||||
0x68f: 0x0001,
|
||||
// Block 0x1b, offset 0x6c0
|
||||
0x6dc: 0x0001,
|
||||
// Block 0x1c, offset 0x700
|
||||
0x703: 0x0001,
|
||||
// Block 0x1d, offset 0x740
|
||||
0x741: 0x0001,
|
||||
// Block 0x1e, offset 0x780
|
||||
0x79b: 0x0001,
|
||||
// Block 0x1f, offset 0x7c0
|
||||
0x7f1: 0x0001,
|
||||
// Block 0x20, offset 0x800
|
||||
0x833: 0x0001,
|
||||
// Block 0x21, offset 0x840
|
||||
0x853: 0x0001,
|
||||
// Block 0x22, offset 0x880
|
||||
0x8a2: 0x0001,
|
||||
// Block 0x23, offset 0x8c0
|
||||
0x8f8: 0x0001,
|
||||
// Block 0x24, offset 0x900
|
||||
0x917: 0x0001,
|
||||
// Block 0x25, offset 0x940
|
||||
0x945: 0x0001,
|
||||
// Block 0x26, offset 0x980
|
||||
0x99e: 0x0001,
|
||||
// Block 0x27, offset 0x9c0
|
||||
0x9fd: 0x0001,
|
||||
// Block 0x28, offset 0xa00
|
||||
0xa0d: 0x0001,
|
||||
// Block 0x29, offset 0xa40
|
||||
0xa66: 0x0001,
|
||||
// Block 0x2a, offset 0xa80
|
||||
0xaab: 0x0001,
|
||||
// Block 0x2b, offset 0xac0
|
||||
0xaea: 0x0001,
|
||||
// Block 0x2c, offset 0xb00
|
||||
0xb2d: 0x0001,
|
||||
// Block 0x2d, offset 0xb40
|
||||
0xb54: 0x0001,
|
||||
// Block 0x2e, offset 0xb80
|
||||
0xb90: 0x0001,
|
||||
// Block 0x2f, offset 0xbc0
|
||||
0xbe5: 0x0001,
|
||||
// Block 0x30, offset 0xc00
|
||||
0xc28: 0x0001,
|
||||
// Block 0x31, offset 0xc40
|
||||
0xc7c: 0x0001,
|
||||
// Block 0x32, offset 0xc80
|
||||
0xcbf: 0x0001,
|
||||
// Block 0x33, offset 0xcc0
|
||||
0xcc7: 0x0001,
|
||||
// Block 0x34, offset 0xd00
|
||||
0xd34: 0x0001,
|
||||
// Block 0x35, offset 0xd40
|
||||
0xd61: 0x0001,
|
||||
// Block 0x36, offset 0xd80
|
||||
0xdb9: 0x0001,
|
||||
// Block 0x37, offset 0xdc0
|
||||
0xdda: 0x0001,
|
||||
}
|
||||
|
||||
// randIndex: 89 blocks, 5696 entries, 5696 bytes
|
||||
// Block 0 is the zero block.
|
||||
var randIndex = [5696]uint8{
|
||||
// Block 0x0, offset 0x0
|
||||
// Block 0x1, offset 0x40
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
|
||||
0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
|
||||
0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
|
||||
// Block 0x4, offset 0x100
|
||||
0x107: 0x01,
|
||||
// Block 0x5, offset 0x140
|
||||
0x16c: 0x02,
|
||||
// Block 0x6, offset 0x180
|
||||
0x19c: 0x03,
|
||||
0x1ae: 0x04,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x1d8: 0x05,
|
||||
0x1f7: 0x06,
|
||||
// Block 0x8, offset 0x200
|
||||
0x20c: 0x07,
|
||||
// Block 0x9, offset 0x240
|
||||
0x24a: 0x08,
|
||||
// Block 0xa, offset 0x280
|
||||
0x2b6: 0x09,
|
||||
// Block 0xb, offset 0x2c0
|
||||
0x2d5: 0x0a,
|
||||
// Block 0xc, offset 0x300
|
||||
0x31a: 0x0b,
|
||||
// Block 0xd, offset 0x340
|
||||
0x373: 0x0c,
|
||||
// Block 0xe, offset 0x380
|
||||
0x38b: 0x0d,
|
||||
// Block 0xf, offset 0x3c0
|
||||
0x3f0: 0x0e,
|
||||
// Block 0x10, offset 0x400
|
||||
0x433: 0x0f,
|
||||
// Block 0x11, offset 0x440
|
||||
0x45d: 0x10,
|
||||
// Block 0x12, offset 0x480
|
||||
0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
|
||||
0x49b: 0x0b, 0x49c: 0x0c,
|
||||
0x4a1: 0x0d,
|
||||
0x4ad: 0x0e,
|
||||
0x4ba: 0x0f,
|
||||
// Block 0x13, offset 0x4c0
|
||||
0x4c1: 0x11,
|
||||
// Block 0x14, offset 0x500
|
||||
0x531: 0x12,
|
||||
// Block 0x15, offset 0x540
|
||||
0x546: 0x13,
|
||||
// Block 0x16, offset 0x580
|
||||
0x5ab: 0x14,
|
||||
// Block 0x17, offset 0x5c0
|
||||
0x5d4: 0x11,
|
||||
0x5fe: 0x11,
|
||||
// Block 0x18, offset 0x600
|
||||
0x618: 0x0a,
|
||||
// Block 0x19, offset 0x640
|
||||
0x65b: 0x15,
|
||||
// Block 0x1a, offset 0x680
|
||||
0x6a0: 0x16,
|
||||
// Block 0x1b, offset 0x6c0
|
||||
0x6d2: 0x17,
|
||||
0x6f6: 0x18,
|
||||
// Block 0x1c, offset 0x700
|
||||
0x711: 0x19,
|
||||
// Block 0x1d, offset 0x740
|
||||
0x768: 0x1a,
|
||||
// Block 0x1e, offset 0x780
|
||||
0x783: 0x1b,
|
||||
// Block 0x1f, offset 0x7c0
|
||||
0x7f9: 0x1c,
|
||||
// Block 0x20, offset 0x800
|
||||
0x831: 0x1d,
|
||||
// Block 0x21, offset 0x840
|
||||
0x85e: 0x1e,
|
||||
// Block 0x22, offset 0x880
|
||||
0x898: 0x1f,
|
||||
// Block 0x23, offset 0x8c0
|
||||
0x8c7: 0x18,
|
||||
0x8d5: 0x14,
|
||||
0x8f7: 0x20,
|
||||
0x8fe: 0x1f,
|
||||
// Block 0x24, offset 0x900
|
||||
0x905: 0x21,
|
||||
// Block 0x25, offset 0x940
|
||||
0x966: 0x03,
|
||||
// Block 0x26, offset 0x980
|
||||
0x981: 0x07, 0x983: 0x11,
|
||||
0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
|
||||
0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
|
||||
0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
|
||||
0x9a3: 0x1d,
|
||||
0x9ad: 0x1e, 0x9af: 0x1f,
|
||||
0x9b0: 0x20, 0x9b1: 0x21,
|
||||
0x9b8: 0x22, 0x9bd: 0x23,
|
||||
// Block 0x27, offset 0x9c0
|
||||
0x9cd: 0x22,
|
||||
// Block 0x28, offset 0xa00
|
||||
0xa0c: 0x08,
|
||||
// Block 0x29, offset 0xa40
|
||||
0xa6f: 0x1c,
|
||||
// Block 0x2a, offset 0xa80
|
||||
0xa90: 0x1a,
|
||||
0xaaf: 0x23,
|
||||
// Block 0x2b, offset 0xac0
|
||||
0xae3: 0x19,
|
||||
0xae8: 0x24,
|
||||
0xafc: 0x25,
|
||||
// Block 0x2c, offset 0xb00
|
||||
0xb13: 0x26,
|
||||
// Block 0x2d, offset 0xb40
|
||||
0xb67: 0x1c,
|
||||
// Block 0x2e, offset 0xb80
|
||||
0xb8f: 0x0b,
|
||||
// Block 0x2f, offset 0xbc0
|
||||
0xbcb: 0x27,
|
||||
0xbe7: 0x26,
|
||||
// Block 0x30, offset 0xc00
|
||||
0xc34: 0x16,
|
||||
// Block 0x31, offset 0xc40
|
||||
0xc62: 0x03,
|
||||
// Block 0x32, offset 0xc80
|
||||
0xcbb: 0x12,
|
||||
// Block 0x33, offset 0xcc0
|
||||
0xcdf: 0x09,
|
||||
// Block 0x34, offset 0xd00
|
||||
0xd34: 0x0a,
|
||||
// Block 0x35, offset 0xd40
|
||||
0xd41: 0x1e,
|
||||
// Block 0x36, offset 0xd80
|
||||
0xd83: 0x28,
|
||||
// Block 0x37, offset 0xdc0
|
||||
0xdc0: 0x15,
|
||||
// Block 0x38, offset 0xe00
|
||||
0xe1a: 0x15,
|
||||
// Block 0x39, offset 0xe40
|
||||
0xe65: 0x29,
|
||||
// Block 0x3a, offset 0xe80
|
||||
0xe86: 0x1f,
|
||||
// Block 0x3b, offset 0xec0
|
||||
0xeec: 0x18,
|
||||
// Block 0x3c, offset 0xf00
|
||||
0xf28: 0x2a,
|
||||
// Block 0x3d, offset 0xf40
|
||||
0xf53: 0x08,
|
||||
// Block 0x3e, offset 0xf80
|
||||
0xfa2: 0x2b,
|
||||
0xfaa: 0x17,
|
||||
// Block 0x3f, offset 0xfc0
|
||||
0xfc0: 0x25, 0xfc2: 0x26,
|
||||
0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
|
||||
0xfd5: 0x2a,
|
||||
0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
|
||||
0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
|
||||
0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
|
||||
0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
|
||||
0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
|
||||
// Block 0x40, offset 0x1000
|
||||
0x102c: 0x2c,
|
||||
// Block 0x41, offset 0x1040
|
||||
0x1074: 0x2c,
|
||||
// Block 0x42, offset 0x1080
|
||||
0x108c: 0x08,
|
||||
0x10a0: 0x2d,
|
||||
// Block 0x43, offset 0x10c0
|
||||
0x10e8: 0x10,
|
||||
// Block 0x44, offset 0x1100
|
||||
0x110f: 0x13,
|
||||
// Block 0x45, offset 0x1140
|
||||
0x114b: 0x2e,
|
||||
// Block 0x46, offset 0x1180
|
||||
0x118b: 0x23,
|
||||
0x119d: 0x0c,
|
||||
// Block 0x47, offset 0x11c0
|
||||
0x11c3: 0x12,
|
||||
0x11f9: 0x0f,
|
||||
// Block 0x48, offset 0x1200
|
||||
0x121e: 0x1b,
|
||||
// Block 0x49, offset 0x1240
|
||||
0x1270: 0x2f,
|
||||
// Block 0x4a, offset 0x1280
|
||||
0x128a: 0x1b,
|
||||
0x12a7: 0x02,
|
||||
// Block 0x4b, offset 0x12c0
|
||||
0x12fb: 0x14,
|
||||
// Block 0x4c, offset 0x1300
|
||||
0x1333: 0x30,
|
||||
// Block 0x4d, offset 0x1340
|
||||
0x134d: 0x31,
|
||||
// Block 0x4e, offset 0x1380
|
||||
0x138e: 0x15,
|
||||
// Block 0x4f, offset 0x13c0
|
||||
0x13f4: 0x32,
|
||||
// Block 0x50, offset 0x1400
|
||||
0x141b: 0x33,
|
||||
// Block 0x51, offset 0x1440
|
||||
0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
|
||||
0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
|
||||
0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
|
||||
0x1472: 0x4b, 0x1473: 0x4c,
|
||||
0x1479: 0x4d, 0x147b: 0x4e,
|
||||
// Block 0x52, offset 0x1480
|
||||
0x1480: 0x34,
|
||||
0x1499: 0x11,
|
||||
0x14b6: 0x2c,
|
||||
// Block 0x53, offset 0x14c0
|
||||
0x14e4: 0x0d,
|
||||
// Block 0x54, offset 0x1500
|
||||
0x1527: 0x08,
|
||||
// Block 0x55, offset 0x1540
|
||||
0x1555: 0x2b,
|
||||
// Block 0x56, offset 0x1580
|
||||
0x15b2: 0x35,
|
||||
// Block 0x57, offset 0x15c0
|
||||
0x15f2: 0x1c, 0x15f4: 0x29,
|
||||
// Block 0x58, offset 0x1600
|
||||
0x1600: 0x50, 0x1603: 0x51,
|
||||
0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
|
||||
}
|
||||
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return t.ascii[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = multiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = multiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = multiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return t.ascii[c0]
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = multiIndex[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = multiIndex[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// lookupString returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return t.ascii[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = multiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = multiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = multiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return t.ascii[c0]
|
||||
}
|
||||
i := t.utf8Start[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = multiIndex[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = multiIndex[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
|
||||
type multiTrie struct {
|
||||
ascii []uint64 // index for ASCII bytes
|
||||
utf8Start []uint8 // index for UTF-8 bytes >= 0xC0
|
||||
}
|
||||
|
||||
func newMultiTrie(i int) *multiTrie {
|
||||
h := multiTrieHandles[i]
|
||||
return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
|
||||
}
|
||||
|
||||
type multiTrieHandle struct {
|
||||
ascii, multi uint8
|
||||
}
|
||||
|
||||
// multiTrieHandles: 5 handles, 10 bytes
|
||||
var multiTrieHandles = [5]multiTrieHandle{
|
||||
{0, 0}, // 8c1e77823143d35c: all
|
||||
{0, 23}, // 8fb58ff8243b45b0: ASCII only
|
||||
{0, 23}, // 8fb58ff8243b45b0: ASCII only 2
|
||||
{0, 24}, // 2ccc43994f11046f: BMP only
|
||||
{30, 25}, // ce448591bdcb4733: No BMP
|
||||
}
|
||||
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
|
||||
switch {
|
||||
default:
|
||||
return uint64(multiValues[n<<6+uint32(b)])
|
||||
}
|
||||
}
|
||||
|
||||
// multiValues: 32 blocks, 2048 entries, 16384 bytes
|
||||
// The third block is the zero block.
|
||||
var multiValues = [2048]uint64{
|
||||
// Block 0x0, offset 0x0
|
||||
0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
|
||||
0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
|
||||
0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
|
||||
0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
|
||||
0x3f: 0x4fd3bcfa72bce8b0,
|
||||
// Block 0x1, offset 0x40
|
||||
0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
|
||||
0x7f: 0x782caa2d25a418a9,
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
|
||||
// Block 0x4, offset 0x100
|
||||
0x13f: 0x56f8c4c82f5962dc,
|
||||
// Block 0x5, offset 0x140
|
||||
0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
|
||||
// Block 0x6, offset 0x180
|
||||
0x1bf: 0x7bf4d0ebf302a088,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
|
||||
// Block 0x8, offset 0x200
|
||||
0x23f: 0x5de81c1dff6bf29d,
|
||||
// Block 0x9, offset 0x240
|
||||
0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
|
||||
// Block 0xa, offset 0x280
|
||||
0x2bf: 0x6a28f01979cbf059,
|
||||
// Block 0xb, offset 0x2c0
|
||||
0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
|
||||
// Block 0xc, offset 0x300
|
||||
0x33f: 0x5a10ffa9e29184fb,
|
||||
// Block 0xd, offset 0x340
|
||||
0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
|
||||
// Block 0xe, offset 0x380
|
||||
0x3bf: 0x74071288fff39c76,
|
||||
// Block 0xf, offset 0x3c0
|
||||
0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
|
||||
// Block 0x10, offset 0x400
|
||||
0x43f: 0x5676a62fd49c6bec,
|
||||
// Block 0x11, offset 0x440
|
||||
0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
|
||||
// Block 0x12, offset 0x480
|
||||
0x4bf: 0x69d6f0fe711fafc9,
|
||||
// Block 0x13, offset 0x4c0
|
||||
0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
|
||||
// Block 0x14, offset 0x500
|
||||
0x53f: 0xe03b31814c95f8b,
|
||||
// Block 0x15, offset 0x540
|
||||
0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
|
||||
// Block 0x16, offset 0x580
|
||||
0x5bf: 0x3c02ea92fb168559,
|
||||
// Block 0x17, offset 0x5c0
|
||||
0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
|
||||
// Block 0x18, offset 0x600
|
||||
0x63f: 0x3bb2ed2a72748f4b,
|
||||
// Block 0x19, offset 0x640
|
||||
0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
|
||||
// Block 0x1a, offset 0x680
|
||||
0x6bf: 0x352711cfb7236418,
|
||||
// Block 0x1b, offset 0x6c0
|
||||
0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
|
||||
// Block 0x1c, offset 0x700
|
||||
0x73f: 0x7191a77b28d23110,
|
||||
// Block 0x1d, offset 0x740
|
||||
0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
|
||||
// Block 0x1e, offset 0x780
|
||||
// Block 0x1f, offset 0x7c0
|
||||
}
|
||||
|
||||
// multiIndex: 29 blocks, 1856 entries, 1856 bytes
|
||||
// Block 0 is the zero block.
|
||||
var multiIndex = [1856]uint8{
|
||||
// Block 0x0, offset 0x0
|
||||
// Block 0x1, offset 0x40
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
|
||||
0xc8: 0x05, 0xcf: 0x06,
|
||||
0xd0: 0x07,
|
||||
0xdf: 0x08,
|
||||
0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
|
||||
0xe8: 0x08, 0xef: 0x09,
|
||||
0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
|
||||
// Block 0x4, offset 0x100
|
||||
0x120: 0x09,
|
||||
0x13f: 0x0a,
|
||||
// Block 0x5, offset 0x140
|
||||
0x140: 0x0b,
|
||||
0x17f: 0x0c,
|
||||
// Block 0x6, offset 0x180
|
||||
0x180: 0x0d,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x1ff: 0x0e,
|
||||
// Block 0x8, offset 0x200
|
||||
0x200: 0x0f,
|
||||
// Block 0x9, offset 0x240
|
||||
0x27f: 0x10,
|
||||
// Block 0xa, offset 0x280
|
||||
0x280: 0x11,
|
||||
// Block 0xb, offset 0x2c0
|
||||
0x2ff: 0x12,
|
||||
// Block 0xc, offset 0x300
|
||||
0x300: 0x13,
|
||||
// Block 0xd, offset 0x340
|
||||
0x37f: 0x14,
|
||||
// Block 0xe, offset 0x380
|
||||
0x380: 0x15,
|
||||
// Block 0xf, offset 0x3c0
|
||||
0x3ff: 0x16,
|
||||
// Block 0x10, offset 0x400
|
||||
0x410: 0x0a,
|
||||
0x41f: 0x0b,
|
||||
0x420: 0x0c,
|
||||
0x43f: 0x0d,
|
||||
// Block 0x11, offset 0x440
|
||||
0x440: 0x17,
|
||||
// Block 0x12, offset 0x480
|
||||
0x4bf: 0x18,
|
||||
// Block 0x13, offset 0x4c0
|
||||
0x4c0: 0x0f,
|
||||
0x4ff: 0x10,
|
||||
// Block 0x14, offset 0x500
|
||||
0x500: 0x19,
|
||||
// Block 0x15, offset 0x540
|
||||
0x540: 0x12,
|
||||
// Block 0x16, offset 0x580
|
||||
0x5bf: 0x1a,
|
||||
// Block 0x17, offset 0x5c0
|
||||
0x5ff: 0x14,
|
||||
// Block 0x18, offset 0x600
|
||||
0x600: 0x1b,
|
||||
// Block 0x19, offset 0x640
|
||||
0x640: 0x16,
|
||||
// Block 0x1a, offset 0x680
|
||||
// Block 0x1b, offset 0x6c0
|
||||
0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
|
||||
0x6c8: 0x05, 0x6cf: 0x06,
|
||||
0x6d0: 0x07,
|
||||
0x6df: 0x08,
|
||||
0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
|
||||
0x6e8: 0x08, 0x6ef: 0x09,
|
||||
// Block 0x1c, offset 0x700
|
||||
0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
|
||||
}
|
71
vendor/golang.org/x/text/internal/triegen/example_compact_test.go
generated
vendored
Normal file
71
vendor/golang.org/x/text/internal/triegen/example_compact_test.go
generated
vendored
Normal file
|
@ -0,0 +1,71 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"golang.org/x/text/internal/triegen"
|
||||
)
|
||||
|
||||
func ExampleCompacter() {
|
||||
t := triegen.NewTrie("root")
|
||||
for r := rune(0); r < 10000; r += 64 {
|
||||
t.Insert(r, 0x9015BADA55^uint64(r))
|
||||
}
|
||||
sz, _ := t.Gen(ioutil.Discard)
|
||||
|
||||
fmt.Printf("Size normal: %5d\n", sz)
|
||||
|
||||
var c myCompacter
|
||||
sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c))
|
||||
|
||||
fmt.Printf("Size compacted: %5d\n", sz)
|
||||
|
||||
// Output:
|
||||
// Size normal: 81344
|
||||
// Size compacted: 3224
|
||||
}
|
||||
|
||||
// A myCompacter accepts a block if only the first value is given.
|
||||
type myCompacter []uint64
|
||||
|
||||
func (c *myCompacter) Size(values []uint64) (sz int, ok bool) {
|
||||
for _, v := range values[1:] {
|
||||
if v != 0 {
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
return 8, true // the size of a uint64
|
||||
}
|
||||
|
||||
func (c *myCompacter) Store(v []uint64) uint32 {
|
||||
x := uint32(len(*c))
|
||||
*c = append(*c, v[0])
|
||||
return x
|
||||
}
|
||||
|
||||
func (c *myCompacter) Print(w io.Writer) error {
|
||||
fmt.Fprintln(w, "var firstValue = []uint64{")
|
||||
for _, v := range *c {
|
||||
fmt.Fprintf(w, "\t%#x,\n", v)
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *myCompacter) Handler() string {
|
||||
return "getFirstValue"
|
||||
|
||||
// Where getFirstValue is included along with the generated code:
|
||||
// func getFirstValue(n uint32, b byte) uint64 {
|
||||
// if b == 0x80 { // the first continuation byte
|
||||
// return firstValue[n]
|
||||
// }
|
||||
// return 0
|
||||
// }
|
||||
}
|
148
vendor/golang.org/x/text/internal/triegen/example_test.go
generated
vendored
Normal file
148
vendor/golang.org/x/text/internal/triegen/example_test.go
generated
vendored
Normal file
|
@ -0,0 +1,148 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/triegen"
|
||||
)
|
||||
|
||||
const seed = 0x12345
|
||||
|
||||
var genWriter = ioutil.Discard
|
||||
|
||||
func randomRunes() map[rune]uint8 {
|
||||
rnd := rand.New(rand.NewSource(seed))
|
||||
m := map[rune]uint8{}
|
||||
for len(m) < 100 {
|
||||
// Only set our random rune if it is a valid Unicode code point.
|
||||
if r := rune(rnd.Int31n(unicode.MaxRune + 1)); []rune(string(r))[0] == r {
|
||||
m[r] = 1
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// Example_build shows how to build a simple trie. It assigns the value 1 to
|
||||
// 100 random runes generated by randomRunes.
|
||||
func Example_build() {
|
||||
t := triegen.NewTrie("rand")
|
||||
|
||||
for r, _ := range randomRunes() {
|
||||
t.Insert(r, 1)
|
||||
}
|
||||
sz, err := t.Gen(genWriter)
|
||||
|
||||
fmt.Printf("Trie size: %d bytes\n", sz)
|
||||
fmt.Printf("Error: %v\n", err)
|
||||
|
||||
// Output:
|
||||
// Trie size: 9280 bytes
|
||||
// Error: <nil>
|
||||
}
|
||||
|
||||
// Example_lookup demonstrates how to use the trie generated by Example_build.
|
||||
func Example_lookup() {
|
||||
trie := newRandTrie(0)
|
||||
|
||||
// The same set of runes used by Example_build.
|
||||
runes := randomRunes()
|
||||
|
||||
// Verify the right value is returned for all runes.
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
// Note that the return type of lookup is uint8.
|
||||
if v, _ := trie.lookupString(string(r)); v != runes[r] {
|
||||
fmt.Println("FAILURE")
|
||||
return
|
||||
}
|
||||
}
|
||||
fmt.Println("SUCCESS")
|
||||
|
||||
// Output:
|
||||
// SUCCESS
|
||||
}
|
||||
|
||||
// runeValues generates some random values for a set of interesting runes.
|
||||
func runeValues() map[rune]uint64 {
|
||||
rnd := rand.New(rand.NewSource(seed))
|
||||
m := map[rune]uint64{}
|
||||
for p := 4; p <= unicode.MaxRune; p <<= 1 {
|
||||
for d := -1; d <= 1; d++ {
|
||||
m[rune(p+d)] = uint64(rnd.Int63())
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// ExampleGen_build demonstrates the creation of multiple tries sharing common
|
||||
// blocks. ExampleGen_lookup demonstrates how to use the generated tries.
|
||||
func ExampleGen_build() {
|
||||
var tries []*triegen.Trie
|
||||
|
||||
rv := runeValues()
|
||||
for _, c := range []struct {
|
||||
include func(rune) bool
|
||||
name string
|
||||
}{
|
||||
{func(r rune) bool { return true }, "all"},
|
||||
{func(r rune) bool { return r < 0x80 }, "ASCII only"},
|
||||
{func(r rune) bool { return r < 0x80 }, "ASCII only 2"},
|
||||
{func(r rune) bool { return r <= 0xFFFF }, "BMP only"},
|
||||
{func(r rune) bool { return r > 0xFFFF }, "No BMP"},
|
||||
} {
|
||||
t := triegen.NewTrie(c.name)
|
||||
tries = append(tries, t)
|
||||
|
||||
for r, v := range rv {
|
||||
if c.include(r) {
|
||||
t.Insert(r, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
sz, err := triegen.Gen(genWriter, "multi", tries)
|
||||
|
||||
fmt.Printf("Trie size: %d bytes\n", sz)
|
||||
fmt.Printf("Error: %v\n", err)
|
||||
|
||||
// Output:
|
||||
// Trie size: 18250 bytes
|
||||
// Error: <nil>
|
||||
}
|
||||
|
||||
// ExampleGen_lookup shows how to look up values in the trie generated by
|
||||
// ExampleGen_build.
|
||||
func ExampleGen_lookup() {
|
||||
rv := runeValues()
|
||||
for i, include := range []func(rune) bool{
|
||||
func(r rune) bool { return true }, // all
|
||||
func(r rune) bool { return r < 0x80 }, // ASCII only
|
||||
func(r rune) bool { return r < 0x80 }, // ASCII only 2
|
||||
func(r rune) bool { return r <= 0xFFFF }, // BMP only
|
||||
func(r rune) bool { return r > 0xFFFF }, // No BMP
|
||||
} {
|
||||
t := newMultiTrie(i)
|
||||
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
x := uint64(0)
|
||||
if include(r) {
|
||||
x = rv[r]
|
||||
}
|
||||
// As we convert from a valid rune, we know it is safe to use
|
||||
// lookupStringUnsafe.
|
||||
if v := t.lookupStringUnsafe(string(r)); x != v {
|
||||
fmt.Println("FAILURE")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("SUCCESS")
|
||||
|
||||
// Output:
|
||||
// SUCCESS
|
||||
}
|
68
vendor/golang.org/x/text/internal/triegen/gen_test.go
generated
vendored
Normal file
68
vendor/golang.org/x/text/internal/triegen/gen_test.go
generated
vendored
Normal file
|
@ -0,0 +1,68 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build generate
|
||||
|
||||
package triegen_test
|
||||
|
||||
// The code in this file generates captures and writes the tries generated in
|
||||
// the examples to data_test.go. To invoke it, run:
|
||||
// go test -tags=generate
|
||||
//
|
||||
// Making the generation code a "test" allows us to link in the necessary test
|
||||
// code.
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
const tmpfile = "tmpout"
|
||||
const dstfile = "data_test.go"
|
||||
|
||||
f, err := os.Create(tmpfile)
|
||||
if err != nil {
|
||||
log.Fatalf("Could not create output file: %v", err)
|
||||
}
|
||||
defer os.Remove(tmpfile)
|
||||
defer f.Close()
|
||||
|
||||
// We exit before this function returns, regardless of success or failure,
|
||||
// so there's no need to save (and later restore) the existing genWriter
|
||||
// value.
|
||||
genWriter = f
|
||||
|
||||
f.Write([]byte(header))
|
||||
|
||||
Example_build()
|
||||
ExampleGen_build()
|
||||
|
||||
if err := exec.Command("gofmt", "-w", tmpfile).Run(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Remove(dstfile)
|
||||
os.Rename(tmpfile, dstfile)
|
||||
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
const header = `// This file is generated with "go test -tags generate". DO NOT EDIT!
|
||||
// +build !generate
|
||||
|
||||
package triegen_test
|
||||
`
|
||||
|
||||
// Stubs for generated tries. These are needed as we exclude data_test.go if
|
||||
// the generate flag is set. This will clearly make the tests fail, but that
|
||||
// is okay. It allows us to bootstrap.
|
||||
|
||||
type trie struct{}
|
||||
|
||||
func (t *trie) lookupString(string) (uint8, int) { return 0, 1 }
|
||||
func (t *trie) lookupStringUnsafe(string) uint64 { return 0 }
|
||||
|
||||
func newRandTrie(i int) *trie { return &trie{} }
|
||||
func newMultiTrie(i int) *trie { return &trie{} }
|
251
vendor/golang.org/x/text/internal/triegen/print.go
generated
vendored
Normal file
251
vendor/golang.org/x/text/internal/triegen/print.go
generated
vendored
Normal file
|
@ -0,0 +1,251 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package triegen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"text/template"
|
||||
)
|
||||
|
||||
// print writes all the data structures as well as the code necessary to use the
|
||||
// trie to w.
|
||||
func (b *builder) print(w io.Writer) error {
|
||||
b.Stats.NValueEntries = len(b.ValueBlocks) * blockSize
|
||||
b.Stats.NValueBytes = len(b.ValueBlocks) * blockSize * b.ValueSize
|
||||
b.Stats.NIndexEntries = len(b.IndexBlocks) * blockSize
|
||||
b.Stats.NIndexBytes = len(b.IndexBlocks) * blockSize * b.IndexSize
|
||||
b.Stats.NHandleBytes = len(b.Trie) * 2 * b.IndexSize
|
||||
|
||||
// If we only have one root trie, all starter blocks are at position 0 and
|
||||
// we can access the arrays directly.
|
||||
if len(b.Trie) == 1 {
|
||||
// At this point we cannot refer to the generated tables directly.
|
||||
b.ASCIIBlock = b.Name + "Values"
|
||||
b.StarterBlock = b.Name + "Index"
|
||||
} else {
|
||||
// Otherwise we need to have explicit starter indexes in the trie
|
||||
// structure.
|
||||
b.ASCIIBlock = "t.ascii"
|
||||
b.StarterBlock = "t.utf8Start"
|
||||
}
|
||||
|
||||
b.SourceType = "[]byte"
|
||||
if err := lookupGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b.SourceType = "string"
|
||||
if err := lookupGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := trieGen.Execute(w, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, c := range b.Compactions {
|
||||
if err := c.c.Print(w); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func printValues(n int, values []uint64) string {
|
||||
w := &bytes.Buffer{}
|
||||
boff := n * blockSize
|
||||
fmt.Fprintf(w, "\t// Block %#x, offset %#x", n, boff)
|
||||
var newline bool
|
||||
for i, v := range values {
|
||||
if i%6 == 0 {
|
||||
newline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if newline {
|
||||
fmt.Fprintf(w, "\n")
|
||||
newline = false
|
||||
}
|
||||
fmt.Fprintf(w, "\t%#02x:%#04x, ", boff+i, v)
|
||||
}
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
func printIndex(b *builder, nr int, n *node) string {
|
||||
w := &bytes.Buffer{}
|
||||
boff := nr * blockSize
|
||||
fmt.Fprintf(w, "\t// Block %#x, offset %#x", nr, boff)
|
||||
var newline bool
|
||||
for i, c := range n.children {
|
||||
if i%8 == 0 {
|
||||
newline = true
|
||||
}
|
||||
if c != nil {
|
||||
v := b.Compactions[c.index.compaction].Offset + uint32(c.index.index)
|
||||
if v != 0 {
|
||||
if newline {
|
||||
fmt.Fprintf(w, "\n")
|
||||
newline = false
|
||||
}
|
||||
fmt.Fprintf(w, "\t%#02x:%#02x, ", boff+i, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
return w.String()
|
||||
}
|
||||
|
||||
var (
|
||||
trieGen = template.Must(template.New("trie").Funcs(template.FuncMap{
|
||||
"printValues": printValues,
|
||||
"printIndex": printIndex,
|
||||
"title": strings.Title,
|
||||
"dec": func(x int) int { return x - 1 },
|
||||
"psize": func(n int) string {
|
||||
return fmt.Sprintf("%d bytes (%.2f KiB)", n, float64(n)/1024)
|
||||
},
|
||||
}).Parse(trieTemplate))
|
||||
lookupGen = template.Must(template.New("lookup").Parse(lookupTemplate))
|
||||
)
|
||||
|
||||
// TODO: consider the return type of lookup. It could be uint64, even if the
|
||||
// internal value type is smaller. We will have to verify this with the
|
||||
// performance of unicode/norm, which is very sensitive to such changes.
|
||||
const trieTemplate = `{{$b := .}}{{$multi := gt (len .Trie) 1}}
|
||||
// {{.Name}}Trie. Total size: {{psize .Size}}. Checksum: {{printf "%08x" .Checksum}}.
|
||||
type {{.Name}}Trie struct { {{if $multi}}
|
||||
ascii []{{.ValueType}} // index for ASCII bytes
|
||||
utf8Start []{{.IndexType}} // index for UTF-8 bytes >= 0xC0
|
||||
{{end}}}
|
||||
|
||||
func new{{title .Name}}Trie(i int) *{{.Name}}Trie { {{if $multi}}
|
||||
h := {{.Name}}TrieHandles[i]
|
||||
return &{{.Name}}Trie{ {{.Name}}Values[uint32(h.ascii)<<6:], {{.Name}}Index[uint32(h.multi)<<6:] }
|
||||
}
|
||||
|
||||
type {{.Name}}TrieHandle struct {
|
||||
ascii, multi {{.IndexType}}
|
||||
}
|
||||
|
||||
// {{.Name}}TrieHandles: {{len .Trie}} handles, {{.Stats.NHandleBytes}} bytes
|
||||
var {{.Name}}TrieHandles = [{{len .Trie}}]{{.Name}}TrieHandle{
|
||||
{{range .Trie}} { {{.ASCIIIndex}}, {{.StarterIndex}} }, // {{printf "%08x" .Checksum}}: {{.Name}}
|
||||
{{end}}}{{else}}
|
||||
return &{{.Name}}Trie{}
|
||||
}
|
||||
{{end}}
|
||||
// lookupValue determines the type of block n and looks up the value for b.
|
||||
func (t *{{.Name}}Trie) lookupValue(n uint32, b byte) {{.ValueType}}{{$last := dec (len .Compactions)}} {
|
||||
switch { {{range $i, $c := .Compactions}}
|
||||
{{if eq $i $last}}default{{else}}case n < {{$c.Cutoff}}{{end}}:{{if ne $i 0}}
|
||||
n -= {{$c.Offset}}{{end}}
|
||||
return {{print $b.ValueType}}({{$c.Handler}}){{end}}
|
||||
}
|
||||
}
|
||||
|
||||
// {{.Name}}Values: {{len .ValueBlocks}} blocks, {{.Stats.NValueEntries}} entries, {{.Stats.NValueBytes}} bytes
|
||||
// The third block is the zero block.
|
||||
var {{.Name}}Values = [{{.Stats.NValueEntries}}]{{.ValueType}} {
|
||||
{{range $i, $v := .ValueBlocks}}{{printValues $i $v}}
|
||||
{{end}}}
|
||||
|
||||
// {{.Name}}Index: {{len .IndexBlocks}} blocks, {{.Stats.NIndexEntries}} entries, {{.Stats.NIndexBytes}} bytes
|
||||
// Block 0 is the zero block.
|
||||
var {{.Name}}Index = [{{.Stats.NIndexEntries}}]{{.IndexType}} {
|
||||
{{range $i, $v := .IndexBlocks}}{{printIndex $b $i $v}}
|
||||
{{end}}}
|
||||
`
|
||||
|
||||
// TODO: consider allowing zero-length strings after evaluating performance with
|
||||
// unicode/norm.
|
||||
const lookupTemplate = `
|
||||
// lookup{{if eq .SourceType "string"}}String{{end}} returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}(s {{.SourceType}}) (v {{.ValueType}}, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return {{.ASCIIBlock}}[c0], 1
|
||||
case c0 < 0xC2:
|
||||
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c1), 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = {{.Name}}Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c2), 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return 0, 1 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = {{.Name}}Index[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return 0, 2 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = {{.Name}}Index[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return 0, 3 // Illegal UTF-8: not a continuation byte.
|
||||
}
|
||||
return t.lookupValue(uint32(i), c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// lookup{{if eq .SourceType "string"}}String{{end}}Unsafe returns the trie value for the first UTF-8 encoding in s.
|
||||
// s must start with a full and valid UTF-8 encoded rune.
|
||||
func (t *{{.Name}}Trie) lookup{{if eq .SourceType "string"}}String{{end}}Unsafe(s {{.SourceType}}) {{.ValueType}} {
|
||||
c0 := s[0]
|
||||
if c0 < 0x80 { // is ASCII
|
||||
return {{.ASCIIBlock}}[c0]
|
||||
}
|
||||
i := {{.StarterBlock}}[c0]
|
||||
if c0 < 0xE0 { // 2-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[1])
|
||||
}
|
||||
i = {{.Name}}Index[uint32(i)<<6+uint32(s[1])]
|
||||
if c0 < 0xF0 { // 3-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[2])
|
||||
}
|
||||
i = {{.Name}}Index[uint32(i)<<6+uint32(s[2])]
|
||||
if c0 < 0xF8 { // 4-byte UTF-8
|
||||
return t.lookupValue(uint32(i), s[3])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
`
|
494
vendor/golang.org/x/text/internal/triegen/triegen.go
generated
vendored
Normal file
494
vendor/golang.org/x/text/internal/triegen/triegen.go
generated
vendored
Normal file
|
@ -0,0 +1,494 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package triegen implements a code generator for a trie for associating
|
||||
// unsigned integer values with UTF-8 encoded runes.
|
||||
//
|
||||
// Many of the go.text packages use tries for storing per-rune information. A
|
||||
// trie is especially useful if many of the runes have the same value. If this
|
||||
// is the case, many blocks can be expected to be shared allowing for
|
||||
// information on many runes to be stored in little space.
|
||||
//
|
||||
// As most of the lookups are done directly on []byte slices, the tries use the
|
||||
// UTF-8 bytes directly for the lookup. This saves a conversion from UTF-8 to
|
||||
// runes and contributes a little bit to better performance. It also naturally
|
||||
// provides a fast path for ASCII.
|
||||
//
|
||||
// Space is also an issue. There are many code points defined in Unicode and as
|
||||
// a result tables can get quite large. So every byte counts. The triegen
|
||||
// package automatically chooses the smallest integer values to represent the
|
||||
// tables. Compacters allow further compression of the trie by allowing for
|
||||
// alternative representations of individual trie blocks.
|
||||
//
|
||||
// triegen allows generating multiple tries as a single structure. This is
|
||||
// useful when, for example, one wants to generate tries for several languages
|
||||
// that have a lot of values in common. Some existing libraries for
|
||||
// internationalization store all per-language data as a dynamically loadable
|
||||
// chunk. The go.text packages are designed with the assumption that the user
|
||||
// typically wants to compile in support for all supported languages, in line
|
||||
// with the approach common to Go to create a single standalone binary. The
|
||||
// multi-root trie approach can give significant storage savings in this
|
||||
// scenario.
|
||||
//
|
||||
// triegen generates both tables and code. The code is optimized to use the
|
||||
// automatically chosen data types. The following code is generated for a Trie
|
||||
// or multiple Tries named "foo":
|
||||
// - type fooTrie
|
||||
// The trie type.
|
||||
//
|
||||
// - func newFooTrie(x int) *fooTrie
|
||||
// Trie constructor, where x is the index of the trie passed to Gen.
|
||||
//
|
||||
// - func (t *fooTrie) lookup(s []byte) (v uintX, sz int)
|
||||
// The lookup method, where uintX is automatically chosen.
|
||||
//
|
||||
// - func lookupString, lookupUnsafe and lookupStringUnsafe
|
||||
// Variants of the above.
|
||||
//
|
||||
// - var fooValues and fooIndex and any tables generated by Compacters.
|
||||
// The core trie data.
|
||||
//
|
||||
// - var fooTrieHandles
|
||||
// Indexes of starter blocks in case of multiple trie roots.
|
||||
//
|
||||
// It is recommended that users test the generated trie by checking the returned
|
||||
// value for every rune. Such exhaustive tests are possible as the the number of
|
||||
// runes in Unicode is limited.
|
||||
package triegen // import "golang.org/x/text/internal/triegen"
|
||||
|
||||
// TODO: Arguably, the internally optimized data types would not have to be
|
||||
// exposed in the generated API. We could also investigate not generating the
|
||||
// code, but using it through a package. We would have to investigate the impact
|
||||
// on performance of making such change, though. For packages like unicode/norm,
|
||||
// small changes like this could tank performance.
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"log"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// builder builds a set of tries for associating values with runes. The set of
|
||||
// tries can share common index and value blocks.
|
||||
type builder struct {
|
||||
Name string
|
||||
|
||||
// ValueType is the type of the trie values looked up.
|
||||
ValueType string
|
||||
|
||||
// ValueSize is the byte size of the ValueType.
|
||||
ValueSize int
|
||||
|
||||
// IndexType is the type of trie index values used for all UTF-8 bytes of
|
||||
// a rune except the last one.
|
||||
IndexType string
|
||||
|
||||
// IndexSize is the byte size of the IndexType.
|
||||
IndexSize int
|
||||
|
||||
// SourceType is used when generating the lookup functions. If the user
|
||||
// requests StringSupport, all lookup functions will be generated for
|
||||
// string input as well.
|
||||
SourceType string
|
||||
|
||||
Trie []*Trie
|
||||
|
||||
IndexBlocks []*node
|
||||
ValueBlocks [][]uint64
|
||||
Compactions []compaction
|
||||
Checksum uint64
|
||||
|
||||
ASCIIBlock string
|
||||
StarterBlock string
|
||||
|
||||
indexBlockIdx map[uint64]int
|
||||
valueBlockIdx map[uint64]nodeIndex
|
||||
asciiBlockIdx map[uint64]int
|
||||
|
||||
// Stats are used to fill out the template.
|
||||
Stats struct {
|
||||
NValueEntries int
|
||||
NValueBytes int
|
||||
NIndexEntries int
|
||||
NIndexBytes int
|
||||
NHandleBytes int
|
||||
}
|
||||
|
||||
err error
|
||||
}
|
||||
|
||||
// A nodeIndex encodes the index of a node, which is defined by the compaction
|
||||
// which stores it and an index within the compaction. For internal nodes, the
|
||||
// compaction is always 0.
|
||||
type nodeIndex struct {
|
||||
compaction int
|
||||
index int
|
||||
}
|
||||
|
||||
// compaction keeps track of stats used for the compaction.
|
||||
type compaction struct {
|
||||
c Compacter
|
||||
blocks []*node
|
||||
maxHandle uint32
|
||||
totalSize int
|
||||
|
||||
// Used by template-based generator and thus exported.
|
||||
Cutoff uint32
|
||||
Offset uint32
|
||||
Handler string
|
||||
}
|
||||
|
||||
func (b *builder) setError(err error) {
|
||||
if b.err == nil {
|
||||
b.err = err
|
||||
}
|
||||
}
|
||||
|
||||
// An Option can be passed to Gen.
|
||||
type Option func(b *builder) error
|
||||
|
||||
// Compact configures the trie generator to use the given Compacter.
|
||||
func Compact(c Compacter) Option {
|
||||
return func(b *builder) error {
|
||||
b.Compactions = append(b.Compactions, compaction{
|
||||
c: c,
|
||||
Handler: c.Handler() + "(n, b)"})
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Gen writes Go code for a shared trie lookup structure to w for the given
|
||||
// Tries. The generated trie type will be called nameTrie. newNameTrie(x) will
|
||||
// return the *nameTrie for tries[x]. A value can be looked up by using one of
|
||||
// the various lookup methods defined on nameTrie. It returns the table size of
|
||||
// the generated trie.
|
||||
func Gen(w io.Writer, name string, tries []*Trie, opts ...Option) (sz int, err error) {
|
||||
// The index contains two dummy blocks, followed by the zero block. The zero
|
||||
// block is at offset 0x80, so that the offset for the zero block for
|
||||
// continuation bytes is 0.
|
||||
b := &builder{
|
||||
Name: name,
|
||||
Trie: tries,
|
||||
IndexBlocks: []*node{{}, {}, {}},
|
||||
Compactions: []compaction{{
|
||||
Handler: name + "Values[n<<6+uint32(b)]",
|
||||
}},
|
||||
// The 0 key in indexBlockIdx and valueBlockIdx is the hash of the zero
|
||||
// block.
|
||||
indexBlockIdx: map[uint64]int{0: 0},
|
||||
valueBlockIdx: map[uint64]nodeIndex{0: {}},
|
||||
asciiBlockIdx: map[uint64]int{},
|
||||
}
|
||||
b.Compactions[0].c = (*simpleCompacter)(b)
|
||||
|
||||
for _, f := range opts {
|
||||
if err := f(b); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
b.build()
|
||||
if b.err != nil {
|
||||
return 0, b.err
|
||||
}
|
||||
if err = b.print(w); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return b.Size(), nil
|
||||
}
|
||||
|
||||
// A Trie represents a single root node of a trie. A builder may build several
|
||||
// overlapping tries at once.
|
||||
type Trie struct {
|
||||
root *node
|
||||
|
||||
hiddenTrie
|
||||
}
|
||||
|
||||
// hiddenTrie contains values we want to be visible to the template generator,
|
||||
// but hidden from the API documentation.
|
||||
type hiddenTrie struct {
|
||||
Name string
|
||||
Checksum uint64
|
||||
ASCIIIndex int
|
||||
StarterIndex int
|
||||
}
|
||||
|
||||
// NewTrie returns a new trie root.
|
||||
func NewTrie(name string) *Trie {
|
||||
return &Trie{
|
||||
&node{
|
||||
children: make([]*node, blockSize),
|
||||
values: make([]uint64, utf8.RuneSelf),
|
||||
},
|
||||
hiddenTrie{Name: name},
|
||||
}
|
||||
}
|
||||
|
||||
// Gen is a convenience wrapper around the Gen func passing t as the only trie
|
||||
// and uses the name passed to NewTrie. It returns the size of the generated
|
||||
// tables.
|
||||
func (t *Trie) Gen(w io.Writer, opts ...Option) (sz int, err error) {
|
||||
return Gen(w, t.Name, []*Trie{t}, opts...)
|
||||
}
|
||||
|
||||
// node is a node of the intermediate trie structure.
|
||||
type node struct {
|
||||
// children holds this node's children. It is always of length 64.
|
||||
// A child node may be nil.
|
||||
children []*node
|
||||
|
||||
// values contains the values of this node. If it is non-nil, this node is
|
||||
// either a root or leaf node:
|
||||
// For root nodes, len(values) == 128 and it maps the bytes in [0x00, 0x7F].
|
||||
// For leaf nodes, len(values) == 64 and it maps the bytes in [0x80, 0xBF].
|
||||
values []uint64
|
||||
|
||||
index nodeIndex
|
||||
}
|
||||
|
||||
// Insert associates value with the given rune. Insert will panic if a non-zero
|
||||
// value is passed for an invalid rune.
|
||||
func (t *Trie) Insert(r rune, value uint64) {
|
||||
if value == 0 {
|
||||
return
|
||||
}
|
||||
s := string(r)
|
||||
if []rune(s)[0] != r && value != 0 {
|
||||
// Note: The UCD tables will always assign what amounts to a zero value
|
||||
// to a surrogate. Allowing a zero value for an illegal rune allows
|
||||
// users to iterate over [0..MaxRune] without having to explicitly
|
||||
// exclude surrogates, which would be tedious.
|
||||
panic(fmt.Sprintf("triegen: non-zero value for invalid rune %U", r))
|
||||
}
|
||||
if len(s) == 1 {
|
||||
// It is a root node value (ASCII).
|
||||
t.root.values[s[0]] = value
|
||||
return
|
||||
}
|
||||
|
||||
n := t.root
|
||||
for ; len(s) > 1; s = s[1:] {
|
||||
if n.children == nil {
|
||||
n.children = make([]*node, blockSize)
|
||||
}
|
||||
p := s[0] % blockSize
|
||||
c := n.children[p]
|
||||
if c == nil {
|
||||
c = &node{}
|
||||
n.children[p] = c
|
||||
}
|
||||
if len(s) > 2 && c.values != nil {
|
||||
log.Fatalf("triegen: insert(%U): found internal node with values", r)
|
||||
}
|
||||
n = c
|
||||
}
|
||||
if n.values == nil {
|
||||
n.values = make([]uint64, blockSize)
|
||||
}
|
||||
if n.children != nil {
|
||||
log.Fatalf("triegen: insert(%U): found leaf node that also has child nodes", r)
|
||||
}
|
||||
n.values[s[0]-0x80] = value
|
||||
}
|
||||
|
||||
// Size returns the number of bytes the generated trie will take to store. It
|
||||
// needs to be exported as it is used in the templates.
|
||||
func (b *builder) Size() int {
|
||||
// Index blocks.
|
||||
sz := len(b.IndexBlocks) * blockSize * b.IndexSize
|
||||
|
||||
// Skip the first compaction, which represents the normal value blocks, as
|
||||
// its totalSize does not account for the ASCII blocks, which are managed
|
||||
// separately.
|
||||
sz += len(b.ValueBlocks) * blockSize * b.ValueSize
|
||||
for _, c := range b.Compactions[1:] {
|
||||
sz += c.totalSize
|
||||
}
|
||||
|
||||
// TODO: this computation does not account for the fixed overhead of a using
|
||||
// a compaction, either code or data. As for data, though, the typical
|
||||
// overhead of data is in the order of bytes (2 bytes for cases). Further,
|
||||
// the savings of using a compaction should anyway be substantial for it to
|
||||
// be worth it.
|
||||
|
||||
// For multi-root tries, we also need to account for the handles.
|
||||
if len(b.Trie) > 1 {
|
||||
sz += 2 * b.IndexSize * len(b.Trie)
|
||||
}
|
||||
return sz
|
||||
}
|
||||
|
||||
func (b *builder) build() {
|
||||
// Compute the sizes of the values.
|
||||
var vmax uint64
|
||||
for _, t := range b.Trie {
|
||||
vmax = maxValue(t.root, vmax)
|
||||
}
|
||||
b.ValueType, b.ValueSize = getIntType(vmax)
|
||||
|
||||
// Compute all block allocations.
|
||||
// TODO: first compute the ASCII blocks for all tries and then the other
|
||||
// nodes. ASCII blocks are more restricted in placement, as they require two
|
||||
// blocks to be placed consecutively. Processing them first may improve
|
||||
// sharing (at least one zero block can be expected to be saved.)
|
||||
for _, t := range b.Trie {
|
||||
b.Checksum += b.buildTrie(t)
|
||||
}
|
||||
|
||||
// Compute the offsets for all the Compacters.
|
||||
offset := uint32(0)
|
||||
for i := range b.Compactions {
|
||||
c := &b.Compactions[i]
|
||||
c.Offset = offset
|
||||
offset += c.maxHandle + 1
|
||||
c.Cutoff = offset
|
||||
}
|
||||
|
||||
// Compute the sizes of indexes.
|
||||
// TODO: different byte positions could have different sizes. So far we have
|
||||
// not found a case where this is beneficial.
|
||||
imax := uint64(b.Compactions[len(b.Compactions)-1].Cutoff)
|
||||
for _, ib := range b.IndexBlocks {
|
||||
if x := uint64(ib.index.index); x > imax {
|
||||
imax = x
|
||||
}
|
||||
}
|
||||
b.IndexType, b.IndexSize = getIntType(imax)
|
||||
}
|
||||
|
||||
func maxValue(n *node, max uint64) uint64 {
|
||||
if n == nil {
|
||||
return max
|
||||
}
|
||||
for _, c := range n.children {
|
||||
max = maxValue(c, max)
|
||||
}
|
||||
for _, v := range n.values {
|
||||
if max < v {
|
||||
max = v
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
func getIntType(v uint64) (string, int) {
|
||||
switch {
|
||||
case v < 1<<8:
|
||||
return "uint8", 1
|
||||
case v < 1<<16:
|
||||
return "uint16", 2
|
||||
case v < 1<<32:
|
||||
return "uint32", 4
|
||||
}
|
||||
return "uint64", 8
|
||||
}
|
||||
|
||||
const (
|
||||
blockSize = 64
|
||||
|
||||
// Subtract two blocks to offset 0x80, the first continuation byte.
|
||||
blockOffset = 2
|
||||
|
||||
// Subtract three blocks to offset 0xC0, the first non-ASCII starter.
|
||||
rootBlockOffset = 3
|
||||
)
|
||||
|
||||
var crcTable = crc64.MakeTable(crc64.ISO)
|
||||
|
||||
func (b *builder) buildTrie(t *Trie) uint64 {
|
||||
n := t.root
|
||||
|
||||
// Get the ASCII offset. For the first trie, the ASCII block will be at
|
||||
// position 0.
|
||||
hasher := crc64.New(crcTable)
|
||||
binary.Write(hasher, binary.BigEndian, n.values)
|
||||
hash := hasher.Sum64()
|
||||
|
||||
v, ok := b.asciiBlockIdx[hash]
|
||||
if !ok {
|
||||
v = len(b.ValueBlocks)
|
||||
b.asciiBlockIdx[hash] = v
|
||||
|
||||
b.ValueBlocks = append(b.ValueBlocks, n.values[:blockSize], n.values[blockSize:])
|
||||
if v == 0 {
|
||||
// Add the zero block at position 2 so that it will be assigned a
|
||||
// zero reference in the lookup blocks.
|
||||
// TODO: always do this? This would allow us to remove a check from
|
||||
// the trie lookup, but at the expense of extra space. Analyze
|
||||
// performance for unicode/norm.
|
||||
b.ValueBlocks = append(b.ValueBlocks, make([]uint64, blockSize))
|
||||
}
|
||||
}
|
||||
t.ASCIIIndex = v
|
||||
|
||||
// Compute remaining offsets.
|
||||
t.Checksum = b.computeOffsets(n, true)
|
||||
// We already subtracted the normal blockOffset from the index. Subtract the
|
||||
// difference for starter bytes.
|
||||
t.StarterIndex = n.index.index - (rootBlockOffset - blockOffset)
|
||||
return t.Checksum
|
||||
}
|
||||
|
||||
func (b *builder) computeOffsets(n *node, root bool) uint64 {
|
||||
// For the first trie, the root lookup block will be at position 3, which is
|
||||
// the offset for UTF-8 non-ASCII starter bytes.
|
||||
first := len(b.IndexBlocks) == rootBlockOffset
|
||||
if first {
|
||||
b.IndexBlocks = append(b.IndexBlocks, n)
|
||||
}
|
||||
|
||||
// We special-case the cases where all values recursively are 0. This allows
|
||||
// for the use of a zero block to which all such values can be directed.
|
||||
hash := uint64(0)
|
||||
if n.children != nil || n.values != nil {
|
||||
hasher := crc64.New(crcTable)
|
||||
for _, c := range n.children {
|
||||
var v uint64
|
||||
if c != nil {
|
||||
v = b.computeOffsets(c, false)
|
||||
}
|
||||
binary.Write(hasher, binary.BigEndian, v)
|
||||
}
|
||||
binary.Write(hasher, binary.BigEndian, n.values)
|
||||
hash = hasher.Sum64()
|
||||
}
|
||||
|
||||
if first {
|
||||
b.indexBlockIdx[hash] = rootBlockOffset - blockOffset
|
||||
}
|
||||
|
||||
// Compacters don't apply to internal nodes.
|
||||
if n.children != nil {
|
||||
v, ok := b.indexBlockIdx[hash]
|
||||
if !ok {
|
||||
v = len(b.IndexBlocks) - blockOffset
|
||||
b.IndexBlocks = append(b.IndexBlocks, n)
|
||||
b.indexBlockIdx[hash] = v
|
||||
}
|
||||
n.index = nodeIndex{0, v}
|
||||
} else {
|
||||
h, ok := b.valueBlockIdx[hash]
|
||||
if !ok {
|
||||
bestI, bestSize := 0, blockSize*b.ValueSize
|
||||
for i, c := range b.Compactions[1:] {
|
||||
if sz, ok := c.c.Size(n.values); ok && bestSize > sz {
|
||||
bestI, bestSize = i+1, sz
|
||||
}
|
||||
}
|
||||
c := &b.Compactions[bestI]
|
||||
c.totalSize += bestSize
|
||||
v := c.c.Store(n.values)
|
||||
if c.maxHandle < v {
|
||||
c.maxHandle = v
|
||||
}
|
||||
h = nodeIndex{bestI, int(v)}
|
||||
b.valueBlockIdx[hash] = h
|
||||
}
|
||||
n.index = h
|
||||
}
|
||||
return hash
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue