Checking in vendor folder for ease of using go get.

This commit is contained in:
Renan DelValle 2018-10-23 23:32:59 -07:00
parent 7a1251853b
commit cdb4b5a1d0
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
3554 changed files with 1270116 additions and 0 deletions

702
vendor/golang.org/x/text/collate/build/builder.go generated vendored Normal file
View file

@ -0,0 +1,702 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build // import "golang.org/x/text/collate/build"
import (
"fmt"
"io"
"log"
"sort"
"strings"
"unicode/utf8"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
// TODO: optimizations:
// - expandElem is currently 20K. By putting unique colElems in a separate
// table and having a byte array of indexes into this table, we can reduce
// the total size to about 7K. By also factoring out the length bytes, we
// can reduce this to about 6K.
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
// and many consecutive values with the same stride. This can be further
// compacted.
// - Compress secondary weights into 8 bits.
// - Some LDML specs specify a context element. Currently we simply concatenate
// those. Context can be implemented using the contraction trie. If Builder
// could analyze and detect when using a context makes sense, there is no
// need to expose this construct in the API.
// A Builder builds a root collation table. The user must specify the
// collation elements for each entry. A common use will be to base the weights
// on those specified in the allkeys* file as provided by the UCA or CLDR.
type Builder struct {
index *trieBuilder
root ordering
locale []*Tailoring
t *table
err error
built bool
minNonVar int // lowest primary recorded for a variable
varTop int // highest primary recorded for a non-variable
// indexes used for reusing expansions and contractions
expIndex map[string]int // positions of expansions keyed by their string representation
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
ctElem map[string]int // contraction elements keyed by their string representation
}
// A Tailoring builds a collation table based on another collation table.
// The table is defined by specifying tailorings to the underlying table.
// See http://unicode.org/reports/tr35/ for an overview of tailoring
// collation tables. The CLDR contains pre-defined tailorings for a variety
// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
type Tailoring struct {
id string
builder *Builder
index *ordering
anchor *entry
before bool
}
// NewBuilder returns a new Builder.
func NewBuilder() *Builder {
return &Builder{
index: newTrieBuilder(),
root: makeRootOrdering(),
expIndex: make(map[string]int),
ctHandle: make(map[string]ctHandle),
ctElem: make(map[string]int),
}
}
// Tailoring returns a Tailoring for the given locale. One should
// have completed all calls to Add before calling Tailoring.
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
t := &Tailoring{
id: loc.String(),
builder: b,
index: b.root.clone(),
}
t.index.id = t.id
b.locale = append(b.locale, t)
return t
}
// Add adds an entry to the collation element table, mapping
// a slice of runes to a sequence of collation elements.
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
// The entries are typically obtained from a collation element table
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
// Note that the collation elements specified by colelems are only used
// as a guide. The actual weights generated by Builder may differ.
// The argument variables is a list of indices into colelems that should contain
// a value for each colelem that is a variable. (See the reference above.)
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
str := string(runes)
elems := make([]rawCE, len(colelems))
for i, ce := range colelems {
if len(ce) == 0 {
break
}
elems[i] = makeRawCE(ce, 0)
if len(ce) == 1 {
elems[i].w[1] = defaultSecondary
}
if len(ce) <= 2 {
elems[i].w[2] = defaultTertiary
}
if len(ce) <= 3 {
elems[i].w[3] = ce[0]
}
}
for i, ce := range elems {
p := ce.w[0]
isvar := false
for _, j := range variables {
if i == j {
isvar = true
}
}
if isvar {
if p >= b.minNonVar && b.minNonVar > 0 {
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
}
if p > b.varTop {
b.varTop = p
}
} else if p > 1 { // 1 is a special primary value reserved for FFFE
if p <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
}
if b.minNonVar == 0 || p < b.minNonVar {
b.minNonVar = p
}
}
}
elems, err := convertLargeWeights(elems)
if err != nil {
return err
}
cccs := []uint8{}
nfd := norm.NFD.String(str)
for i := range nfd {
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
}
if len(cccs) < len(elems) {
if len(cccs) > 2 {
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
}
p := len(elems) - 1
for ; p > 0 && elems[p].w[0] == 0; p-- {
elems[p].ccc = cccs[len(cccs)-1]
}
for ; p >= 0; p-- {
elems[p].ccc = cccs[0]
}
} else {
for i := range elems {
elems[i].ccc = cccs[i]
}
}
// doNorm in collate.go assumes that the following conditions hold.
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
}
b.root.newEntry(str, elems)
return nil
}
func (t *Tailoring) setAnchor(anchor string) error {
anchor = norm.NFC.String(anchor)
a := t.index.find(anchor)
if a == nil {
a = t.index.newEntry(anchor, nil)
a.implicit = true
a.modified = true
for _, r := range []rune(anchor) {
e := t.index.find(string(r))
e.lock = true
}
}
t.anchor = a
return nil
}
// SetAnchor sets the point after which elements passed in subsequent calls to
// Insert will be inserted. It is equivalent to the reset directive in an LDML
// specification. See Insert for an example.
// SetAnchor supports the following logical reset positions:
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
// and <last_non_ignorable/>.
func (t *Tailoring) SetAnchor(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = false
return nil
}
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
// Insert will insert entries before the anchor.
func (t *Tailoring) SetAnchorBefore(anchor string) error {
if err := t.setAnchor(anchor); err != nil {
return err
}
t.before = true
return nil
}
// Insert sets the ordering of str relative to the entry set by the previous
// call to SetAnchor or Insert. The argument extend corresponds
// to the extend elements as defined in LDML. A non-empty value for extend
// will cause the collation elements corresponding to extend to be appended
// to the collation elements generated for the entry added by Insert.
// This has the same net effect as sorting str after the string anchor+extend.
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
// for full details on LDML.
//
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
// at the primary sorting level:
// t := b.Tailoring("se")
// t.SetAnchor("z")
// t.Insert(colltab.Primary, "ä", "")
// Order "ü" after "ue" at the secondary sorting level:
// t.SetAnchor("ue")
// t.Insert(colltab.Secondary, "ü","")
// or
// t.SetAnchor("u")
// t.Insert(colltab.Secondary, "ü", "e")
// Order "q" afer "ab" at the secondary level and "Q" after "q"
// at the tertiary level:
// t.SetAnchor("ab")
// t.Insert(colltab.Secondary, "q", "")
// t.Insert(colltab.Tertiary, "Q", "")
// Order "b" before "a":
// t.SetAnchorBefore("a")
// t.Insert(colltab.Primary, "b", "")
// Order "0" after the last primary ignorable:
// t.SetAnchor("<last_primary_ignorable/>")
// t.Insert(colltab.Primary, "0", "")
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
if t.anchor == nil {
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
}
str = norm.NFC.String(str)
e := t.index.find(str)
if e == nil {
e = t.index.newEntry(str, nil)
} else if e.logical != noAnchor {
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
}
if e.lock {
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
}
a := t.anchor
// Find the first element after the anchor which differs at a level smaller or
// equal to the given level. Then insert at this position.
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
e.before = t.before
if t.before {
t.before = false
if a.prev == nil {
a.insertBefore(e)
} else {
for a = a.prev; a.level > level; a = a.prev {
}
a.insertAfter(e)
}
e.level = level
} else {
for ; a.level > level; a = a.next {
}
e.level = a.level
if a != e {
a.insertAfter(e)
a.level = level
} else {
// We don't set a to prev itself. This has the effect of the entry
// getting new collation elements that are an increment of itself.
// This is intentional.
a.prev.level = level
}
}
e.extend = norm.NFD.String(extend)
e.exclude = false
e.modified = true
e.elems = nil
t.anchor = e
return nil
}
func (o *ordering) getWeight(e *entry) []rawCE {
if len(e.elems) == 0 && e.logical == noAnchor {
if e.implicit {
for _, r := range e.runes {
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
}
} else if e.before {
count := [colltab.Identity + 1]int{}
a := e
for ; a.elems == nil && !a.implicit; a = a.next {
count[a.level]++
}
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
for i := colltab.Primary; i < colltab.Quaternary; i++ {
if count[i] != 0 {
e.elems[0].w[i] -= count[i]
break
}
}
if e.prev != nil {
o.verifyWeights(e.prev, e, e.prev.level)
}
} else {
prev := e.prev
e.elems = nextWeight(prev.level, o.getWeight(prev))
o.verifyWeights(e, e.next, e.level)
}
}
return e.elems
}
func (o *ordering) addExtension(e *entry) {
if ex := o.find(e.extend); ex != nil {
e.elems = append(e.elems, ex.elems...)
} else {
for _, r := range []rune(e.extend) {
e.elems = append(e.elems, o.find(string(r)).elems...)
}
}
e.extend = ""
}
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
return nil
}
for i := colltab.Primary; i < level; i++ {
if a.elems[0].w[i] < b.elems[0].w[i] {
return nil
}
}
if a.elems[0].w[level] >= b.elems[0].w[level] {
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
log.Println(err)
// TODO: return the error instead, or better, fix the conflicting entry by making room.
}
return nil
}
func (b *Builder) error(e error) {
if e != nil {
b.err = e
}
}
func (b *Builder) errorID(locale string, e error) {
if e != nil {
b.err = fmt.Errorf("%s:%v", locale, e)
}
}
// patchNorm ensures that NFC and NFD counterparts are consistent.
func (o *ordering) patchNorm() {
// Insert the NFD counterparts, if necessary.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if nfd != e.str {
if e0 := o.find(nfd); e0 != nil && !e0.modified {
e0.elems = e.elems
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
e := o.newEntry(nfd, e.elems)
e.modified = true
}
}
}
// Update unchanged composed forms if one of their parts changed.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if e.modified || nfd == e.str {
continue
}
if e0 := o.find(nfd); e0 != nil {
e.elems = e0.elems
} else {
e.elems = o.genColElems(nfd)
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
r := []rune(nfd)
head := string(r[0])
tail := ""
for i := 1; i < len(r); i++ {
s := norm.NFC.String(head + string(r[i]))
if e0 := o.find(s); e0 != nil && e0.modified {
head = s
} else {
tail += string(r[i])
}
}
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
}
}
}
// Exclude entries for which the individual runes generate the same collation elements.
for _, e := range o.ordered {
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
e.exclude = true
}
}
}
func (b *Builder) buildOrdering(o *ordering) {
for _, e := range o.ordered {
o.getWeight(e)
}
for _, e := range o.ordered {
o.addExtension(e)
}
o.patchNorm()
o.sort()
simplify(o)
b.processExpansions(o) // requires simplify
b.processContractions(o) // requires simplify
t := newNode()
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.skip() {
ce, err := e.encode()
b.errorID(o.id, err)
t.insert(e.runes[0], ce)
}
}
o.handle = b.index.addTrie(t)
}
func (b *Builder) build() (*table, error) {
if b.built {
return b.t, b.err
}
b.built = true
b.t = &table{
Table: colltab.Table{
MaxContractLen: utf8.UTFMax,
VariableTop: uint32(b.varTop),
},
}
b.buildOrdering(&b.root)
b.t.root = b.root.handle
for _, t := range b.locale {
b.buildOrdering(t.index)
if b.err != nil {
break
}
}
i, err := b.index.generate()
b.t.trie = *i
b.t.Index = colltab.Trie{
Index: i.index,
Values: i.values,
Index0: i.index[blockSize*b.t.root.lookupStart:],
Values0: i.values[blockSize*b.t.root.valueStart:],
}
b.error(err)
return b.t, b.err
}
// Build builds the root Collator.
func (b *Builder) Build() (colltab.Weighter, error) {
table, err := b.build()
if err != nil {
return nil, err
}
return table, nil
}
// Build builds a Collator for Tailoring t.
func (t *Tailoring) Build() (colltab.Weighter, error) {
// TODO: implement.
return nil, nil
}
// Print prints the tables for b and all its Tailorings as a Go file
// that can be included in the Collate package.
func (b *Builder) Print(w io.Writer) (n int, err error) {
p := func(nn int, e error) {
n += nn
if err == nil {
err = e
}
}
t, err := b.build()
if err != nil {
return 0, err
}
p(fmt.Fprintf(w, `var availableLocales = "und`))
for _, loc := range b.locale {
if loc.id != "und" {
p(fmt.Fprintf(w, ",%s", loc.id))
}
}
p(fmt.Fprint(w, "\"\n\n"))
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
for _, loc := range b.locale {
if loc.id == "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
for _, loc := range b.locale {
if loc.id != "und" {
p(t.fprintIndex(w, loc.index.handle, loc.id))
}
}
p(fmt.Fprint(w, "}\n\n"))
n, _, err = t.fprint(w, "main")
return
}
// reproducibleFromNFKD checks whether the given expansion could be generated
// from an NFKD expansion.
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
// Length must be equal.
if len(exp) != len(nfkd) {
return false
}
for i, ce := range exp {
// Primary and secondary values should be equal.
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
return false
}
// Tertiary values should be equal to maxTertiary for third element onwards.
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
// simply be dropped. Try this out by dropping the following code.
if i >= 2 && ce.w[2] != maxTertiary {
return false
}
if _, err := makeCE(ce); err != nil {
// Simply return false. The error will be caught elsewhere.
return false
}
}
return true
}
func simplify(o *ordering) {
// Runes that are a starter of a contraction should not be removed.
// (To date, there is only Kannada character 0CCA.)
keep := make(map[rune]bool)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if len(e.runes) > 1 {
keep[e.runes[0]] = true
}
}
// Tag entries for which the runes NFKD decompose to identical values.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
s := e.str
nfkd := norm.NFKD.String(s)
nfd := norm.NFD.String(s)
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
continue
}
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
e.decompose = true
}
}
}
// appendExpansion converts the given collation sequence to
// collation elements and adds them to the expansion table.
// It returns an index to the expansion table.
func (b *Builder) appendExpansion(e *entry) int {
t := b.t
i := len(t.ExpandElem)
ce := uint32(len(e.elems))
t.ExpandElem = append(t.ExpandElem, ce)
for _, w := range e.elems {
ce, err := makeCE(w)
if err != nil {
b.error(err)
return -1
}
t.ExpandElem = append(t.ExpandElem, ce)
}
return i
}
// processExpansions extracts data necessary to generate
// the extraction tables.
func (b *Builder) processExpansions(o *ordering) {
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.expansion() {
continue
}
key := fmt.Sprintf("%v", e.elems)
i, ok := b.expIndex[key]
if !ok {
i = b.appendExpansion(e)
b.expIndex[key] = i
}
e.expansionIndex = i
}
}
func (b *Builder) processContractions(o *ordering) {
// Collate contractions per starter rune.
starters := []rune{}
cm := make(map[rune][]*entry)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if e.contraction() {
if len(e.str) > b.t.MaxContractLen {
b.t.MaxContractLen = len(e.str)
}
r := e.runes[0]
if _, ok := cm[r]; !ok {
starters = append(starters, r)
}
cm[r] = append(cm[r], e)
}
}
// Add entries of single runes that are at a start of a contraction.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if !e.contraction() {
r := e.runes[0]
if _, ok := cm[r]; ok {
cm[r] = append(cm[r], e)
}
}
}
// Build the tries for the contractions.
t := b.t
for _, r := range starters {
l := cm[r]
// Compute suffix strings. There are 31 different contraction suffix
// sets for 715 contractions and 82 contraction starter runes as of
// version 6.0.0.
sufx := []string{}
hasSingle := false
for _, e := range l {
if len(e.runes) > 1 {
sufx = append(sufx, string(e.runes[1:]))
} else {
hasSingle = true
}
}
if !hasSingle {
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
continue
}
// Unique the suffix set.
sort.Strings(sufx)
key := strings.Join(sufx, "\n")
handle, ok := b.ctHandle[key]
if !ok {
var err error
handle, err = appendTrie(&t.ContractTries, sufx)
if err != nil {
b.error(err)
}
b.ctHandle[key] = handle
}
// Bucket sort entries in index order.
es := make([]*entry, len(l))
for _, e := range l {
var p, sn int
if len(e.runes) > 1 {
str := []byte(string(e.runes[1:]))
p, sn = lookup(&t.ContractTries, handle, str)
if sn != len(str) {
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
}
}
if es[p] != nil {
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
}
es[p] = e
}
// Create collation elements for contractions.
elems := []uint32{}
for _, e := range es {
ce, err := e.encodeBase()
b.errorID(o.id, err)
elems = append(elems, ce)
}
key = fmt.Sprintf("%v", elems)
i, ok := b.ctElem[key]
if !ok {
i = len(t.ContractElem)
b.ctElem[key] = i
t.ContractElem = append(t.ContractElem, elems...)
}
// Store info in entry for starter rune.
es[0].contractionIndex = i
es[0].contractionHandle = handle
}
}

290
vendor/golang.org/x/text/collate/build/builder_test.go generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import "testing"
// cjk returns an implicit collation element for a CJK rune.
func cjk(r rune) []rawCE {
// A CJK character C is represented in the DUCET as
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
// Where AAAA is the most significant 15 bits plus a base value.
// Any base value will work for the test, so we pick the common value of FB40.
const base = 0xFB40
return []rawCE{
{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
}
}
func pCE(p int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
}
func pqCE(p, q int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
}
func ptCE(p, t int) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, 0)
}
func ptcCE(p, t int, ccc uint8) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
}
func sCE(s int) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, 0)
}
func stCE(s, t int) []rawCE {
return mkCE([]int{0, s, t, 0}, 0)
}
func scCE(s int, ccc uint8) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
}
func mkCE(w []int, ccc uint8) []rawCE {
return []rawCE{rawCE{w, ccc}}
}
// ducetElem is used to define test data that is used to generate a table.
type ducetElem struct {
str string
ces []rawCE
}
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
b := NewBuilder()
for _, e := range ducet {
ces := [][]int{}
for _, ce := range e.ces {
ces = append(ces, ce.w)
}
if err := b.Add([]rune(e.str), ces, nil); err != nil {
t.Errorf(err.Error())
}
}
b.t = &table{}
b.root.sort()
return b
}
type convertTest struct {
in, out []rawCE
err bool
}
var convLargeTests = []convertTest{
{pCE(0xFB39), pCE(0xFB39), false},
{cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
{pCE(0xFB40), pCE(0), true},
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
{pCE(0xFFFE), pCE(illegalOffset), false},
{pCE(0xFFFF), pCE(illegalOffset + 1), false},
}
func TestConvertLarge(t *testing.T) {
for i, tt := range convLargeTests {
e := new(entry)
for _, ce := range tt.in {
e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
}
elems, err := convertLargeWeights(e.elems)
if tt.err {
if err == nil {
t.Errorf("%d: expected error; none found", i)
}
continue
} else if err != nil {
t.Errorf("%d: unexpected error: %v", i, err)
}
if !equalCEArrays(elems, tt.out) {
t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
}
}
}
// Collation element table for simplify tests.
var simplifyTest = []ducetElem{
{"\u0300", sCE(30)}, // grave
{"\u030C", sCE(40)}, // caron
{"A", ptCE(100, 8)},
{"D", ptCE(104, 8)},
{"E", ptCE(105, 8)},
{"I", ptCE(110, 8)},
{"z", ptCE(130, 8)},
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
{"\u05B7", sCE(80)},
{"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
{"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
{"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
// no removal: tertiary value of third element is not maxTertiary
{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
}
var genColTests = []ducetElem{
{"\uFA70", pqCE(0x1FA70, 0xFA70)},
{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
}
func TestGenColElems(t *testing.T) {
b := newBuilder(t, simplifyTest[:5])
for i, tt := range genColTests {
res := b.root.genColElems(tt.str)
if !equalCEArrays(tt.ces, res) {
t.Errorf("%d: result %X; want %X", i, res, tt.ces)
}
}
}
type strArray []string
func (sa strArray) contains(s string) bool {
for _, e := range sa {
if e == s {
return true
}
}
return false
}
var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
var simplifyMarked = strArray{"\u01C5"}
func TestSimplify(t *testing.T) {
b := newBuilder(t, simplifyTest)
o := &b.root
simplify(o)
for i, tt := range simplifyTest {
if simplifyRemoved.contains(tt.str) {
continue
}
e := o.find(tt.str)
if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
break
}
}
var i, k int
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
gold := simplifyMarked.contains(e.str)
if gold {
k++
}
if gold != e.decompose {
t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
}
i++
}
if k != len(simplifyMarked) {
t.Errorf(" an entry that should be marked as decompose was deleted")
}
}
var expandTest = []ducetElem{
{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
}
func TestExpand(t *testing.T) {
const (
totalExpansions = 5
totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
)
b := newBuilder(t, expandTest)
o := &b.root
b.processExpansions(o)
e := o.front()
for _, tt := range expandTest {
exp := b.t.ExpandElem[e.expansionIndex:]
if int(exp[0]) != len(tt.ces) {
t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
}
exp = exp[1:]
for j, w := range tt.ces {
if ce, _ := makeCE(w); exp[j] != ce {
t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
}
}
e, _ = e.nextIndexed()
}
// Verify uniquing.
if len(b.t.ExpandElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
}
}
var contractTest = []ducetElem{
{"abc", pCE(102)},
{"abd", pCE(103)},
{"a", pCE(100)},
{"ab", pCE(101)},
{"ac", pCE(104)},
{"bcd", pCE(202)},
{"b", pCE(200)},
{"bc", pCE(201)},
{"bd", pCE(203)},
// shares suffixes with a*
{"Ab", pCE(301)},
{"A", pCE(300)},
{"Ac", pCE(304)},
{"Abc", pCE(302)},
{"Abd", pCE(303)},
// starter to be ignored
{"z", pCE(1000)},
}
func TestContract(t *testing.T) {
const (
totalElements = 5 + 5 + 4
)
b := newBuilder(t, contractTest)
o := &b.root
b.processContractions(o)
indexMap := make(map[int]bool)
handleMap := make(map[rune]*entry)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if e.contractionHandle.n > 0 {
handleMap[e.runes[0]] = e
indexMap[e.contractionHandle.index] = true
}
}
// Verify uniquing.
if len(indexMap) != 2 {
t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
}
for _, tt := range contractTest {
e, ok := handleMap[[]rune(tt.str)[0]]
if !ok {
continue
}
str := tt.str[1:]
offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
if len(str) != n {
t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
}
ce := b.t.ContractElem[offset+e.contractionIndex]
if want, _ := makeCE(tt.ces[0]); want != ce {
t.Errorf("%s: element %X; want %X", tt.str, ce, want)
}
}
if len(b.t.ContractElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
}
}

294
vendor/golang.org/x/text/collate/build/colelem.go generated vendored Normal file
View file

@ -0,0 +1,294 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"unicode"
"golang.org/x/text/internal/colltab"
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
)
type rawCE struct {
w []int
ccc uint8
}
func makeRawCE(w []int, ccc uint8) rawCE {
ce := rawCE{w: make([]int, 4), ccc: ccc}
copy(ce.w, w)
return ce
}
// A collation element is represented as an uint32.
// In the typical case, a rune maps to a single collation element. If a rune
// can be the start of a contraction or expands into multiple collation elements,
// then the collation element that is associated with a rune will have a special
// form to represent such m to n mappings. Such special collation elements
// have a value >= 0x80000000.
const (
maxPrimaryBits = 21
maxSecondaryBits = 12
maxTertiaryBits = 8
)
func makeCE(ce rawCE) (uint32, error) {
v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
return uint32(v), e
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
if h.n >= 1<<maxNBits {
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
}
if h.index >= 1<<maxTrieIndexBits {
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
}
if offset >= 1<<maxContractOffsetBits {
return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
}
ce := uint32(contractID)
ce += uint32(offset << (maxNBits + maxTrieIndexBits))
ce += uint32(h.index << maxNBits)
ce += uint32(h.n)
return ce, nil
}
// For expansions, collation elements are of the form
// 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const (
expandID = 0xE0000000
maxExpandIndexBits = 16
)
func makeExpandIndex(index int) (uint32, error) {
if index >= 1<<maxExpandIndexBits {
return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
}
return expandID + uint32(index), nil
}
// Each list of collation elements corresponding to an expansion starts with
// a header indicating the length of the sequence.
func makeExpansionHeader(n int) (uint32, error) {
return uint32(n), nil
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The collation element, in this case, is of the form
// 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
const (
decompID = 0xF0000000
)
func makeDecompose(t1, t2 int) (uint32, error) {
if t1 >= 256 || t1 < 0 {
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
}
if t2 >= 256 || t2 < 0 {
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
}
return uint32(t2<<8+t1) + decompID, nil
}
const (
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}
// convertLargeWeights converts collation elements with large
// primaries (either double primaries or for illegal runes)
// to our own representation.
// A CJK character C is represented in the DUCET as
// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
// We will rewrite these characters to a single CE.
// We assume the CJK values start at 0x8000.
// See http://unicode.org/reports/tr10/#Implicit_Weights
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
const (
cjkPrimaryStart = 0xFB40
rarePrimaryStart = 0xFB80
otherPrimaryStart = 0xFBC0
illegalPrimary = 0xFFFE
highBitsMask = 0x3F
lowBitsMask = 0x7FFF
lowBitsFlag = 0x8000
shiftBits = 15
)
for i := 0; i < len(elems); i++ {
ce := elems[i].w
p := ce[0]
if p < cjkPrimaryStart {
continue
}
if p > 0xFFFF {
return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
}
if p >= illegalPrimary {
ce[0] = illegalOffset + p - illegalPrimary
} else {
if i+1 >= len(elems) {
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
}
if elems[i+1].w[0]&lowBitsFlag == 0 {
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
}
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
switch {
case p < rarePrimaryStart:
np += commonUnifiedOffset
case p < otherPrimaryStart:
np += rareUnifiedOffset
default:
p += otherOffset
}
ce[0] = np
for j := i + 1; j+1 < len(elems); j++ {
elems[j] = elems[j+1]
}
elems = elems[:len(elems)-1]
}
}
return elems, nil
}
// nextWeight computes the first possible collation weights following elems
// for the given level.
func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
if level == colltab.Identity {
next := make([]rawCE, len(elems))
copy(next, elems)
return next
}
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
next[0].w[level]++
if level < colltab.Secondary {
next[0].w[colltab.Secondary] = defaultSecondary
}
if level < colltab.Tertiary {
next[0].w[colltab.Tertiary] = defaultTertiary
}
// Filter entries that cannot influence ordering.
for _, ce := range elems[1:] {
skip := true
for i := colltab.Primary; i < level; i++ {
skip = skip && ce.w[i] == 0
}
if !skip {
next = append(next, ce)
}
}
return next
}
func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
}
if i < len(elems) {
return i, elems[i].w[level]
}
return i, 0
}
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
// It also returns the collation level at which the difference is found.
func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
for level := colltab.Primary; level < colltab.Identity; level++ {
var va, vb int
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
ia, va = nextVal(a, ia, level)
ib, vb = nextVal(b, ib, level)
if va != vb {
if va < vb {
return -1, level
} else {
return 1, level
}
}
}
}
return 0, colltab.Identity
}
func equalCE(a, b rawCE) bool {
for i := 0; i < 3; i++ {
if b.w[i] != a.w[i] {
return false
}
}
return true
}
func equalCEArrays(a, b []rawCE) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if !equalCE(a[i], b[i]) {
return false
}
}
return true
}

215
vendor/golang.org/x/text/collate/build/colelem_test.go generated vendored Normal file
View file

@ -0,0 +1,215 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"testing"
"golang.org/x/text/internal/colltab"
)
type ceTest struct {
f func(in []int) (uint32, error)
arg []int
val uint32
}
func normalCE(in []int) (ce uint32, err error) {
return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
}
func expandCE(in []int) (ce uint32, err error) {
return makeExpandIndex(in[0])
}
func contractCE(in []int) (ce uint32, err error) {
return makeContractIndex(ctHandle{in[0], in[1]}, in[2])
}
func decompCE(in []int) (ce uint32, err error) {
return makeDecompose(in[0], in[1])
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
// non-ignorable primary with non-default secondary
{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},
{contractCE, []int{0, 0, 0}, 0xC0000000},
{contractCE, []int{1, 1, 1}, 0xC0010011},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}, 0xC001001F},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}, 0xC001FFF1},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}, 0xDFFF0011},
{contractCE, []int{1, (1 << maxNBits), 1}, 0xFFFF},
{contractCE, []int{(1 << maxTrieIndexBits), 1, 1}, 0xFFFF},
{contractCE, []int{1, (1 << maxContractOffsetBits), 1}, 0xFFFF},
{expandCE, []int{0}, 0xE0000000},
{expandCE, []int{5}, 0xE0000005},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}, 0xE000FFFF},
{expandCE, []int{1 << maxExpandIndexBits}, 0xFFFF},
{decompCE, []int{0, 0}, 0xF0000000},
{decompCE, []int{1, 1}, 0xF0000101},
{decompCE, []int{0x1F, 0x1F}, 0xF0001F1F},
{decompCE, []int{256, 0x1F}, 0xFFFF},
{decompCE, []int{0x1F, 256}, 0xFFFF},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
in := make([]int, len(tt.arg))
copy(in, tt.arg)
ce, err := tt.f(in)
if tt.val == 0xFFFF {
if err == nil {
t.Errorf("%d: expected error for args %x", i, tt.arg)
}
continue
}
if err != nil {
t.Errorf("%d: unexpected error: %v", i, err.Error())
}
if ce != tt.val {
t.Errorf("%d: colElem=%X; want %X", i, ce, tt.val)
}
}
}
func mkRawCES(in [][]int) []rawCE {
out := []rawCE{}
for _, w := range in {
out = append(out, rawCE{w: w})
}
return out
}
type weightsTest struct {
a, b [][]int
level colltab.Level
result int
}
var nextWeightTests = []weightsTest{
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{101, defaultSecondary, defaultTertiary, 0}},
level: colltab.Primary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 21, defaultTertiary, 0}},
level: colltab.Secondary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 6, 0}},
level: colltab.Tertiary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 5, 0}},
level: colltab.Identity,
},
}
var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
func TestNextWeight(t *testing.T) {
for i, tt := range nextWeightTests {
test := func(l colltab.Level, tt weightsTest, a, gold [][]int) {
res := nextWeight(tt.level, mkRawCES(a))
if !equalCEArrays(mkRawCES(gold), res) {
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
}
}
test(-1, tt, tt.a, tt.b)
for l := colltab.Primary; l <= colltab.Tertiary; l++ {
if tt.level <= l {
test(l, tt, append(tt.a, extra[l]), tt.b)
} else {
test(l, tt, append(tt.a, extra[l]), append(tt.b, extra[l]))
}
}
}
}
var compareTests = []weightsTest{
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 5, 0}},
colltab.Identity,
0,
},
{
[][]int{{100, 20, 5, 0}, extra[0]},
[][]int{{100, 20, 5, 1}},
colltab.Primary,
1,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{101, 20, 5, 0}},
colltab.Primary,
-1,
},
{
[][]int{{101, 20, 5, 0}},
[][]int{{100, 20, 5, 0}},
colltab.Primary,
1,
},
{
[][]int{{100, 0, 0, 0}, {0, 20, 5, 0}},
[][]int{{0, 20, 5, 0}, {100, 0, 0, 0}},
colltab.Identity,
0,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 21, 5, 0}},
colltab.Secondary,
-1,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 2, 0}},
colltab.Tertiary,
1,
},
{
[][]int{{100, 20, 5, 1}},
[][]int{{100, 20, 5, 2}},
colltab.Quaternary,
-1,
},
}
func TestCompareWeights(t *testing.T) {
for i, tt := range compareTests {
test := func(tt weightsTest, a, b [][]int) {
res, level := compareWeights(mkRawCES(a), mkRawCES(b))
if res != tt.result {
t.Errorf("%d: expected comparison result %d; found %d", i, tt.result, res)
}
if level != tt.level {
t.Errorf("%d: expected level %d; found %d", i, tt.level, level)
}
}
test(tt, tt.a, tt.b)
test(tt, append(tt.a, extra[0]), append(tt.b, extra[0]))
}
}

309
vendor/golang.org/x/text/collate/build/contract.go generated vendored Normal file
View file

@ -0,0 +1,309 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"sort"
"strings"
"golang.org/x/text/internal/colltab"
)
// This file contains code for detecting contractions and generating
// the necessary tables.
// Any Unicode Collation Algorithm (UCA) table entry that has more than
// one rune one the left-hand side is called a contraction.
// See http://www.unicode.org/reports/tr10/#Contractions for more details.
//
// We define the following terms:
// initial: a rune that appears as the first rune in a contraction.
// suffix: a sequence of runes succeeding the initial rune
// in a given contraction.
// non-initial: a rune that appears in a suffix.
//
// A rune may be both an initial and a non-initial and may be so in
// many contractions. An initial may typically also appear by itself.
// In case of ambiguities, the UCA requires we match the longest
// contraction.
//
// Many contraction rules share the same set of possible suffixes.
// We store sets of suffixes in a trie that associates an index with
// each suffix in the set. This index can be used to look up a
// collation element associated with the (starter rune, suffix) pair.
//
// The trie is defined on a UTF-8 byte sequence.
// The overall trie is represented as an array of ctEntries. Each node of the trie
// is represented as a subsequence of ctEntries, where each entry corresponds to
// a possible match of a next character in the search string. An entry
// also includes the length and offset to the next sequence of entries
// to check in case of a match.
const (
final = 0
noIndex = 0xFF
)
// ctEntry associates to a matching byte an offset and/or next sequence of
// bytes to check. A ctEntry c is called final if a match means that the
// longest suffix has been found. An entry c is final if c.N == 0.
// A single final entry can match a range of characters to an offset.
// A non-final entry always matches a single byte. Note that a non-final
// entry might still resemble a completed suffix.
// Examples:
// The suffix strings "ab" and "ac" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
// }
//
// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
// []ctEntry{
// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
// {'d', 'd', final, 3}, // "abd" -> 3
// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
// {'d', 'd', final, 4}, // "abcd" -> 4
// }
// See genStateTests in contract_test.go for more examples.
type ctEntry struct {
L uint8 // non-final: byte value to match; final: lowest match in range.
H uint8 // non-final: relative index to next block; final: highest match in range.
N uint8 // non-final: length of next block; final: final
I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
}
// contractTrieSet holds a set of contraction tries. The tries are stored
// consecutively in the entry field.
type contractTrieSet []struct{ l, h, n, i uint8 }
// ctHandle is used to identify a trie in the trie set, consisting in an offset
// in the array and the size of the first node.
type ctHandle struct {
index, n int
}
// appendTrie adds a new trie for the given suffixes to the trie set and returns
// a handle to it. The handle will be invalid on error.
func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
es := make([]stridx, len(suffixes))
for i, s := range suffixes {
es[i].str = s
}
sort.Sort(offsetSort(es))
for i := range es {
es[i].index = i + 1
}
sort.Sort(genidxSort(es))
i := len(*ct)
n, err := genStates(ct, es)
if err != nil {
*ct = (*ct)[:i]
return ctHandle{}, err
}
return ctHandle{i, n}, nil
}
// genStates generates ctEntries for a given suffix set and returns
// the number of entries for the first node.
func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
if len(sis) == 0 {
return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
}
start := len(*ct)
// create entries for differing first bytes.
for _, si := range sis {
s := si.str
if len(s) == 0 {
continue
}
added := false
c := s[0]
if len(s) > 1 {
for j := len(*ct) - 1; j >= start; j-- {
if (*ct)[j].L == c {
added = true
break
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, I: noIndex})
}
} else {
for j := len(*ct) - 1; j >= start; j-- {
// Update the offset for longer suffixes with the same byte.
if (*ct)[j].L == c {
(*ct)[j].I = uint8(si.index)
added = true
}
// Extend range of final ctEntry, if possible.
if (*ct)[j].H+1 == c {
(*ct)[j].H = c
added = true
}
}
if !added {
*ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
}
}
}
n := len(*ct) - start
// Append nodes for the remainder of the suffixes for each ctEntry.
sp := 0
for i, end := start, len(*ct); i < end; i++ {
fe := (*ct)[i]
if fe.H == 0 { // uninitialized non-final
ln := len(*ct) - start - n
if ln > 0xFF {
return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
}
fe.H = uint8(ln)
// Find first non-final strings with same byte as current entry.
for ; sis[sp].str[0] != fe.L; sp++ {
}
se := sp + 1
for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
}
sl := sis[sp:se]
sp = se
for i, si := range sl {
sl[i].str = si.str[1:]
}
nn, err := genStates(ct, sl)
if err != nil {
return 0, err
}
fe.N = uint8(nn)
(*ct)[i] = fe
}
}
sort.Sort(entrySort((*ct)[start : start+n]))
return n, nil
}
// There may be both a final and non-final entry for a byte if the byte
// is implied in a range of matches in the final entry.
// We need to ensure that the non-final entry comes first in that case.
type entrySort colltab.ContractTrieSet
func (fe entrySort) Len() int { return len(fe) }
func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
func (fe entrySort) Less(i, j int) bool {
return fe[i].L > fe[j].L
}
// stridx is used for sorting suffixes and their associated offsets.
type stridx struct {
str string
index int
}
// For computing the offsets, we first sort by size, and then by string.
// This ensures that strings that only differ in the last byte by 1
// are sorted consecutively in increasing order such that they can
// be packed as a range in a final ctEntry.
type offsetSort []stridx
func (si offsetSort) Len() int { return len(si) }
func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si offsetSort) Less(i, j int) bool {
if len(si[i].str) != len(si[j].str) {
return len(si[i].str) > len(si[j].str)
}
return si[i].str < si[j].str
}
// For indexing, we want to ensure that strings are sorted in string order, where
// for strings with the same prefix, we put longer strings before shorter ones.
type genidxSort []stridx
func (si genidxSort) Len() int { return len(si) }
func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
func (si genidxSort) Less(i, j int) bool {
if strings.HasPrefix(si[j].str, si[i].str) {
return false
}
if strings.HasPrefix(si[i].str, si[j].str) {
return true
}
return si[i].str < si[j].str
}
// lookup matches the longest suffix in str and returns the associated offset
// and the number of bytes consumed.
func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
states := (*ct)[h.index:]
p := 0
n := h.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
index, ns = int(e.I), p
}
if e.N != final {
// set to new state
i, states, n = 0, states[int(e.H)+n:], int(e.N)
} else {
return
}
continue
} else if e.N == final && c <= e.H {
p++
return int(c-e.L) + int(e.I), p
}
}
i++
}
return
}
// print writes the contractTrieSet t as compilable Go code to w. It returns
// the total number of bytes written and the size of the resulting data structure in bytes.
func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
update3 := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
update2 := func(nn int, e error) { update3(nn, 0, e) }
update3(printArray(*t, w, name))
update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
update3(printStruct(*t, w, name))
update2(fmt.Fprintln(w))
return
}
func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
size = len(ct) * 4
p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
for _, fe := range ct {
p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
}
p("}\n")
return
}
func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
size = int(reflect.TypeOf(ct).Size())
return
}

266
vendor/golang.org/x/text/collate/build/contract_test.go generated vendored Normal file
View file

@ -0,0 +1,266 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"bytes"
"sort"
"testing"
"golang.org/x/text/internal/colltab"
)
var largetosmall = []stridx{
{"a", 5},
{"ab", 4},
{"abc", 3},
{"abcd", 2},
{"abcde", 1},
{"abcdef", 0},
}
var offsetSortTests = [][]stridx{
{
{"bcde", 1},
{"bc", 5},
{"ab", 4},
{"bcd", 3},
{"abcd", 0},
{"abc", 2},
},
largetosmall,
}
func TestOffsetSort(t *testing.T) {
for i, st := range offsetSortTests {
sort.Sort(offsetSort(st))
for j, si := range st {
if j != si.index {
t.Errorf("%d: failed: %v", i, st)
}
}
}
for i, tt := range genStateTests {
// ensure input is well-formed
sort.Sort(offsetSort(tt.in))
for j, si := range tt.in {
if si.index != j+1 {
t.Errorf("%dth sort failed: %v", i, tt.in)
}
}
}
}
var genidxtest1 = []stridx{
{"bcde", 3},
{"bc", 6},
{"ab", 2},
{"bcd", 5},
{"abcd", 0},
{"abc", 1},
{"bcdf", 4},
}
var genidxSortTests = [][]stridx{
genidxtest1,
largetosmall,
}
func TestGenIdxSort(t *testing.T) {
for i, st := range genidxSortTests {
sort.Sort(genidxSort(st))
for j, si := range st {
if j != si.index {
t.Errorf("%dth sort failed %v", i, st)
break
}
}
}
}
var entrySortTests = []colltab.ContractTrieSet{
{
{10, 0, 1, 3},
{99, 0, 1, 0},
{20, 50, 0, 2},
{30, 0, 1, 1},
},
}
func TestEntrySort(t *testing.T) {
for i, et := range entrySortTests {
sort.Sort(entrySort(et))
for j, fe := range et {
if j != int(fe.I) {
t.Errorf("%dth sort failed %v", i, et)
break
}
}
}
}
type GenStateTest struct {
in []stridx
firstBlockLen int
out colltab.ContractTrieSet
}
var genStateTests = []GenStateTest{
{[]stridx{
{"abc", 1},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, noIndex},
{'b', 0, 1, noIndex},
{'c', 'c', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"abd", 2},
{"abe", 3},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, noIndex},
{'b', 0, 1, noIndex},
{'c', 'e', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"ab", 2},
{"a", 3},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"abd", 2},
{"ab", 3},
{"ac", 4},
{"a", 5},
{"b", 6},
},
2,
colltab.ContractTrieSet{
{'b', 'b', final, 6},
{'a', 0, 2, 5},
{'c', 'c', final, 4},
{'b', 0, 1, 3},
{'c', 'd', final, 1},
},
},
{[]stridx{
{"bcde", 2},
{"bc", 7},
{"ab", 6},
{"bcd", 5},
{"abcd", 1},
{"abc", 4},
{"bcdf", 3},
},
2,
colltab.ContractTrieSet{
{'b', 3, 1, noIndex},
{'a', 0, 1, noIndex},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', final, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', final, 2},
},
},
}
func TestGenStates(t *testing.T) {
for i, tt := range genStateTests {
si := []stridx{}
for _, e := range tt.in {
si = append(si, e)
}
// ensure input is well-formed
sort.Sort(genidxSort(si))
ct := colltab.ContractTrieSet{}
n, _ := genStates(&ct, si)
if nn := tt.firstBlockLen; nn != n {
t.Errorf("%d: block len %v; want %v", i, n, nn)
}
if lv, lw := len(ct), len(tt.out); lv != lw {
t.Errorf("%d: len %v; want %v", i, lv, lw)
continue
}
for j, fe := range tt.out {
const msg = "%d:%d: value %s=%v; want %v"
if fe.L != ct[j].L {
t.Errorf(msg, i, j, "l", ct[j].L, fe.L)
}
if fe.H != ct[j].H {
t.Errorf(msg, i, j, "h", ct[j].H, fe.H)
}
if fe.N != ct[j].N {
t.Errorf(msg, i, j, "n", ct[j].N, fe.N)
}
if fe.I != ct[j].I {
t.Errorf(msg, i, j, "i", ct[j].I, fe.I)
}
}
}
}
func TestLookupContraction(t *testing.T) {
for i, tt := range genStateTests {
input := []string{}
for _, e := range tt.in {
input = append(input, e.str)
}
cts := colltab.ContractTrieSet{}
h, _ := appendTrie(&cts, input)
for j, si := range tt.in {
str := si.str
for _, s := range []string{str, str + "X"} {
msg := "%d:%d: %s(%s) %v; want %v"
idx, sn := lookup(&cts, h, []byte(s))
if idx != si.index {
t.Errorf(msg, i, j, "index", s, idx, si.index)
}
if sn != len(str) {
t.Errorf(msg, i, j, "sn", s, sn, len(str))
}
}
}
}
}
func TestPrintContractionTrieSet(t *testing.T) {
testdata := colltab.ContractTrieSet(genStateTests[4].out)
buf := &bytes.Buffer{}
print(&testdata, buf, "test")
if contractTrieOutput != buf.String() {
t.Errorf("output differs; found\n%s", buf.String())
println(string(buf.Bytes()))
}
}
const contractTrieOutput = `// testCTEntries: 8 entries, 32 bytes
var testCTEntries = [8]struct{L,H,N,I uint8}{
{0x62, 0x3, 1, 255},
{0x61, 0x0, 1, 255},
{0x62, 0x0, 1, 6},
{0x63, 0x0, 1, 4},
{0x64, 0x64, 0, 1},
{0x63, 0x0, 1, 7},
{0x64, 0x0, 1, 5},
{0x65, 0x66, 0, 2},
}
var testContractTrieSet = colltab.ContractTrieSet( testCTEntries[:] )
`

393
vendor/golang.org/x/text/collate/build/order.go generated vendored Normal file
View file

@ -0,0 +1,393 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"log"
"sort"
"strings"
"unicode"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/unicode/norm"
)
type logicalAnchor int
const (
firstAnchor logicalAnchor = -1
noAnchor = 0
lastAnchor = 1
)
// entry is used to keep track of a single entry in the collation element table
// during building. Examples of entries can be found in the Default Unicode
// Collation Element Table.
// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
type entry struct {
str string // same as string(runes)
runes []rune
elems []rawCE // the collation elements
extend string // weights of extend to be appended to elems
before bool // weights relative to next instead of previous.
lock bool // entry is used in extension and can no longer be moved.
// prev, next, and level are used to keep track of tailorings.
prev, next *entry
level colltab.Level // next differs at this level
skipRemove bool // do not unlink when removed
decompose bool // can use NFKD decomposition to generate elems
exclude bool // do not include in table
implicit bool // derived, is not included in the list
modified bool // entry was modified in tailoring
logical logicalAnchor
expansionIndex int // used to store index into expansion table
contractionHandle ctHandle
contractionIndex int // index into contraction elements
}
func (e *entry) String() string {
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
}
func (e *entry) skip() bool {
return e.contraction()
}
func (e *entry) expansion() bool {
return !e.decompose && len(e.elems) > 1
}
func (e *entry) contraction() bool {
return len(e.runes) > 1
}
func (e *entry) contractionStarter() bool {
return e.contractionHandle.n != 0
}
// nextIndexed gets the next entry that needs to be stored in the table.
// It returns the entry and the collation level at which the next entry differs
// from the current entry.
// Entries that can be explicitly derived and logical reset positions are
// examples of entries that will not be indexed.
func (e *entry) nextIndexed() (*entry, colltab.Level) {
level := e.level
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
if e.level < level {
level = e.level
}
}
return e, level
}
// remove unlinks entry e from the sorted chain and clears the collation
// elements. e may not be at the front or end of the list. This should always
// be the case, as the front and end of the list are always logical anchors,
// which may not be removed.
func (e *entry) remove() {
if e.logical != noAnchor {
log.Fatalf("may not remove anchor %q", e.str)
}
// TODO: need to set e.prev.level to e.level if e.level is smaller?
e.elems = nil
if !e.skipRemove {
if e.prev != nil {
e.prev.next = e.next
}
if e.next != nil {
e.next.prev = e.prev
}
}
e.skipRemove = false
}
// insertAfter inserts n after e.
func (e *entry) insertAfter(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.next = e.next
n.prev = e
if e.next != nil {
e.next.prev = n
}
e.next = n
}
// insertBefore inserts n before e.
func (e *entry) insertBefore(n *entry) {
if e == n {
panic("e == anchor")
}
if e == nil {
panic("unexpected nil anchor")
}
n.remove()
n.decompose = false // redo decomposition test
n.prev = e.prev
n.next = e
if e.prev != nil {
e.prev.next = n
}
e.prev = n
}
func (e *entry) encodeBase() (ce uint32, err error) {
switch {
case e.expansion():
ce, err = makeExpandIndex(e.expansionIndex)
default:
if e.decompose {
log.Fatal("decompose should be handled elsewhere")
}
ce, err = makeCE(e.elems[0])
}
return
}
func (e *entry) encode() (ce uint32, err error) {
if e.skip() {
log.Fatal("cannot build colElem for entry that should be skipped")
}
switch {
case e.decompose:
t1 := e.elems[0].w[2]
t2 := 0
if len(e.elems) > 1 {
t2 = e.elems[1].w[2]
}
ce, err = makeDecompose(t1, t2)
case e.contractionStarter():
ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
default:
if len(e.runes) > 1 {
log.Fatal("colElem: contractions are handled in contraction trie")
}
ce, err = e.encodeBase()
}
return
}
// entryLess returns true if a sorts before b and false otherwise.
func entryLess(a, b *entry) bool {
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
return res == -1
}
if a.logical != noAnchor {
return a.logical == firstAnchor
}
if b.logical != noAnchor {
return b.logical == lastAnchor
}
return a.str < b.str
}
type sortedEntries []*entry
func (s sortedEntries) Len() int {
return len(s)
}
func (s sortedEntries) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sortedEntries) Less(i, j int) bool {
return entryLess(s[i], s[j])
}
type ordering struct {
id string
entryMap map[string]*entry
ordered []*entry
handle *trieHandle
}
// insert inserts e into both entryMap and ordered.
// Note that insert simply appends e to ordered. To reattain a sorted
// order, o.sort() should be called.
func (o *ordering) insert(e *entry) {
if e.logical == noAnchor {
o.entryMap[e.str] = e
} else {
// Use key format as used in UCA rules.
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
// Also add index entry for XML format.
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
}
o.ordered = append(o.ordered, e)
}
// newEntry creates a new entry for the given info and inserts it into
// the index.
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
e := &entry{
runes: []rune(s),
elems: ces,
str: s,
}
o.insert(e)
return e
}
// find looks up and returns the entry for the given string.
// It returns nil if str is not in the index and if an implicit value
// cannot be derived, that is, if str represents more than one rune.
func (o *ordering) find(str string) *entry {
e := o.entryMap[str]
if e == nil {
r := []rune(str)
if len(r) == 1 {
const (
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r[0] >= firstHangul && r[0] <= lastHangul {
ce := []rawCE{}
nfd := norm.NFD.String(str)
for _, r := range nfd {
ce = append(ce, o.find(string(r)).elems...)
}
e = o.newEntry(nfd, ce)
} else {
e = o.newEntry(string(r[0]), []rawCE{
{w: []int{
implicitPrimary(r[0]),
defaultSecondary,
defaultTertiary,
int(r[0]),
},
},
})
e.modified = true
}
e.exclude = true // do not index implicits
}
}
return e
}
// makeRootOrdering returns a newly initialized ordering value and populates
// it with a set of logical reset points that can be used as anchors.
// The anchors first_tertiary_ignorable and __END__ will always sort at
// the beginning and end, respectively. This means that prev and next are non-nil
// for any indexed entry.
func makeRootOrdering() ordering {
const max = unicode.MaxRune
o := ordering{
entryMap: make(map[string]*entry),
}
insert := func(typ logicalAnchor, s string, ce []int) {
e := &entry{
elems: []rawCE{{w: ce}},
str: s,
exclude: true,
logical: typ,
}
o.insert(e)
}
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
return o
}
// patchForInsert eleminates entries from the list with more than one collation element.
// The next and prev fields of the eliminated entries still point to appropriate
// values in the newly created list.
// It requires that sort has been called.
func (o *ordering) patchForInsert() {
for i := 0; i < len(o.ordered)-1; {
e := o.ordered[i]
lev := e.level
n := e.next
for ; n != nil && len(n.elems) > 1; n = n.next {
if n.level < lev {
lev = n.level
}
n.skipRemove = true
}
for ; o.ordered[i] != n; i++ {
o.ordered[i].level = lev
o.ordered[i].next = n
o.ordered[i+1].prev = e
}
}
}
// clone copies all ordering of es into a new ordering value.
func (o *ordering) clone() *ordering {
o.sort()
oo := ordering{
entryMap: make(map[string]*entry),
}
for _, e := range o.ordered {
ne := &entry{
runes: e.runes,
elems: e.elems,
str: e.str,
decompose: e.decompose,
exclude: e.exclude,
logical: e.logical,
}
oo.insert(ne)
}
oo.sort() // link all ordering.
oo.patchForInsert()
return &oo
}
// front returns the first entry to be indexed.
// It assumes that sort() has been called.
func (o *ordering) front() *entry {
e := o.ordered[0]
if e.prev != nil {
log.Panicf("unexpected first entry: %v", e)
}
// The first entry is always a logical position, which should not be indexed.
e, _ = e.nextIndexed()
return e
}
// sort sorts all ordering based on their collation elements and initializes
// the prev, next, and level fields accordingly.
func (o *ordering) sort() {
sort.Sort(sortedEntries(o.ordered))
l := o.ordered
for i := 1; i < len(l); i++ {
k := i - 1
l[k].next = l[i]
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
l[i].prev = l[k]
}
}
// genColElems generates a collation element array from the runes in str. This
// assumes that all collation elements have already been added to the Builder.
func (o *ordering) genColElems(str string) []rawCE {
elems := []rawCE{}
for _, r := range []rune(str) {
for _, ce := range o.find(string(r)).elems {
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
elems = append(elems, ce)
}
}
}
return elems
}

229
vendor/golang.org/x/text/collate/build/order_test.go generated vendored Normal file
View file

@ -0,0 +1,229 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"strconv"
"testing"
"golang.org/x/text/internal/colltab"
)
type entryTest struct {
f func(in []int) (uint32, error)
arg []int
val uint32
}
// makeList returns a list of entries of length n+2, with n normal
// entries plus a leading and trailing anchor.
func makeList(n int) []*entry {
es := make([]*entry, n+2)
weights := []rawCE{{w: []int{100, 20, 5, 0}}}
for i := range es {
runes := []rune{rune(i)}
es[i] = &entry{
runes: runes,
elems: weights,
}
weights = nextWeight(colltab.Primary, weights)
}
for i := 1; i < len(es); i++ {
es[i-1].next = es[i]
es[i].prev = es[i-1]
_, es[i-1].level = compareWeights(es[i-1].elems, es[i].elems)
}
es[0].exclude = true
es[0].logical = firstAnchor
es[len(es)-1].exclude = true
es[len(es)-1].logical = lastAnchor
return es
}
func TestNextIndexed(t *testing.T) {
const n = 5
es := makeList(n)
for i := int64(0); i < 1<<n; i++ {
mask := strconv.FormatInt(i+(1<<n), 2)
for i, c := range mask {
es[i].exclude = c == '1'
}
e := es[0]
for i, c := range mask {
if c == '0' {
e, _ = e.nextIndexed()
if e != es[i] {
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
}
}
}
if e, _ = e.nextIndexed(); e != nil {
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
}
}
}
func TestRemove(t *testing.T) {
const n = 5
for i := int64(0); i < 1<<n; i++ {
es := makeList(n)
mask := strconv.FormatInt(i+(1<<n), 2)
for i, c := range mask {
if c == '0' {
es[i].remove()
}
}
e := es[0]
for i, c := range mask {
if c == '1' {
if e != es[i] {
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
}
e, _ = e.nextIndexed()
}
}
if e != nil {
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
}
}
}
// nextPerm generates the next permutation of the array. The starting
// permutation is assumed to be a list of integers sorted in increasing order.
// It returns false if there are no more permuations left.
func nextPerm(a []int) bool {
i := len(a) - 2
for ; i >= 0; i-- {
if a[i] < a[i+1] {
break
}
}
if i < 0 {
return false
}
for j := len(a) - 1; j >= i; j-- {
if a[j] > a[i] {
a[i], a[j] = a[j], a[i]
break
}
}
for j := i + 1; j < (len(a)+i+1)/2; j++ {
a[j], a[len(a)+i-j] = a[len(a)+i-j], a[j]
}
return true
}
func TestInsertAfter(t *testing.T) {
const n = 5
orig := makeList(n)
perm := make([]int, n)
for i := range perm {
perm[i] = i + 1
}
for ok := true; ok; ok = nextPerm(perm) {
es := makeList(n)
last := es[0]
for _, i := range perm {
last.insertAfter(es[i])
last = es[i]
}
for _, e := range es {
e.elems = es[0].elems
}
e := es[0]
for _, i := range perm {
e, _ = e.nextIndexed()
if e.runes[0] != orig[i].runes[0] {
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
break
}
}
}
}
func TestInsertBefore(t *testing.T) {
const n = 5
orig := makeList(n)
perm := make([]int, n)
for i := range perm {
perm[i] = i + 1
}
for ok := true; ok; ok = nextPerm(perm) {
es := makeList(n)
last := es[len(es)-1]
for _, i := range perm {
last.insertBefore(es[i])
last = es[i]
}
for _, e := range es {
e.elems = es[0].elems
}
e := es[0]
for i := n - 1; i >= 0; i-- {
e, _ = e.nextIndexed()
if e.runes[0] != rune(perm[i]) {
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
break
}
}
}
}
type entryLessTest struct {
a, b *entry
res bool
}
var (
w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
)
var entryLessTests = []entryLessTest{
{&entry{str: "a", elems: w1},
&entry{str: "a", elems: w1},
false,
},
{&entry{str: "a", elems: w1},
&entry{str: "a", elems: w2},
true,
},
{&entry{str: "a", elems: w1},
&entry{str: "b", elems: w1},
true,
},
{&entry{str: "a", elems: w2},
&entry{str: "a", elems: w1},
false,
},
{&entry{str: "c", elems: w1},
&entry{str: "b", elems: w1},
false,
},
{&entry{str: "a", elems: w1, logical: firstAnchor},
&entry{str: "a", elems: w1},
true,
},
{&entry{str: "a", elems: w1},
&entry{str: "b", elems: w1, logical: firstAnchor},
false,
},
{&entry{str: "b", elems: w1},
&entry{str: "a", elems: w1, logical: lastAnchor},
true,
},
{&entry{str: "a", elems: w1, logical: lastAnchor},
&entry{str: "c", elems: w1},
false,
},
}
func TestEntryLess(t *testing.T) {
for i, tt := range entryLessTests {
if res := entryLess(tt.a, tt.b); res != tt.res {
t.Errorf("%d: was %v; want %v", i, res, tt.res)
}
}
}

81
vendor/golang.org/x/text/collate/build/table.go generated vendored Normal file
View file

@ -0,0 +1,81 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"fmt"
"io"
"reflect"
"golang.org/x/text/internal/colltab"
)
// table is an intermediate structure that roughly resembles the table in collate.
type table struct {
colltab.Table
trie trie
root *trieHandle
}
// print writes the table as Go compilable code to w. It prefixes the
// variable names with name. It returns the number of bytes written
// and the size of the resulting table.
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
update := func(nn, sz int, e error) {
n += nn
if err == nil {
err = e
}
size += sz
}
// Write arrays needed for the structure.
update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
update(printColElems(w, t.ContractElem, name+"ContractElem"))
update(t.trie.printArrays(w, name))
update(printArray(t.ContractTries, w, name))
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
update(nn, 0, e)
return
}
func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
p("\t{ // %s\n", id)
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
p("\t},\n")
return
}
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
p("// %s: %d entries, %d bytes\n", name, len(a), sz)
p("var %s = [%d]uint32 {", name, len(a))
for i, c := range a {
switch {
case i%64 == 0:
p("\n\t// Block %d, offset 0x%x\n", i/64, i)
case (i%64)%6 == 0:
p("\n\t")
}
p("0x%.8X, ", c)
}
p("\n}\n\n")
return
}

290
vendor/golang.org/x/text/collate/build/trie.go generated vendored Normal file
View file

@ -0,0 +1,290 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character
// in a UTF-8 string to a collation element.
// All but the last byte in a UTF-8 byte sequence are
// used to look up offsets in the index table to be used for the next byte.
// The last byte is used to index into a table of collation elements.
// This file contains the code for the generation of the trie.
package build
import (
"fmt"
"hash/fnv"
"io"
"reflect"
)
const (
blockSize = 64
blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
)
type trieHandle struct {
lookupStart uint16 // offset in table for first byte
valueStart uint16 // offset in table for first byte
}
type trie struct {
index []uint16
values []uint32
}
// trieNode is the intermediate trie structure used for generating a trie.
type trieNode struct {
index []*trieNode
value []uint32
b byte
refValue uint16
refIndex uint16
}
func newNode() *trieNode {
return &trieNode{
index: make([]*trieNode, 64),
value: make([]uint32, 128), // root node size is 128 instead of 64
}
}
func (n *trieNode) isInternal() bool {
return n.value != nil
}
func (n *trieNode) insert(r rune, value uint32) {
const maskx = 0x3F // mask out two most-significant bits
str := string(r)
if len(str) == 1 {
n.value[str[0]] = value
return
}
for i := 0; i < len(str)-1; i++ {
b := str[i] & maskx
if n.index == nil {
n.index = make([]*trieNode, blockSize)
}
nn := n.index[b]
if nn == nil {
nn = &trieNode{}
nn.b = b
n.index[b] = nn
}
n = nn
}
if n.value == nil {
n.value = make([]uint32, blockSize)
}
b := str[len(str)-1] & maskx
n.value[b] = value
}
type trieBuilder struct {
t *trie
roots []*trieHandle
lookupBlocks []*trieNode
valueBlocks []*trieNode
lookupBlockIdx map[uint32]*trieNode
valueBlockIdx map[uint32]*trieNode
}
func newTrieBuilder() *trieBuilder {
index := &trieBuilder{}
index.lookupBlocks = make([]*trieNode, 0)
index.valueBlocks = make([]*trieNode, 0)
index.lookupBlockIdx = make(map[uint32]*trieNode)
index.valueBlockIdx = make(map[uint32]*trieNode)
// The third nil is the default null block. The other two blocks
// are used to guarantee an offset of at least 3 for each block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
index.t = &trie{}
return index
}
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
hasher := fnv.New32()
if n.index != nil {
for i, nn := range n.index {
var vi, vv uint16
if nn != nil {
nn = b.computeOffsets(nn)
n.index[i] = nn
vi = nn.refIndex
vv = nn.refValue
}
hasher.Write([]byte{byte(vi >> 8), byte(vi)})
hasher.Write([]byte{byte(vv >> 8), byte(vv)})
}
h := hasher.Sum32()
nn, ok := b.lookupBlockIdx[h]
if !ok {
n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
b.lookupBlocks = append(b.lookupBlocks, n)
b.lookupBlockIdx[h] = n
} else {
n = nn
}
} else {
for _, v := range n.value {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks)) - blockOffset
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
b.valueBlockIdx[h] = n
} else {
n = nn
}
}
return n
}
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
hasher := fnv.New32()
for _, v := range n.value[:2*blockSize] {
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
}
h := hasher.Sum32()
nn, ok := b.valueBlockIdx[h]
if !ok {
n.refValue = uint16(len(b.valueBlocks))
n.refIndex = n.refValue
b.valueBlocks = append(b.valueBlocks, n)
// Add a dummy block to accommodate the double block size.
b.valueBlocks = append(b.valueBlocks, nil)
b.valueBlockIdx[h] = n
} else {
n = nn
}
return n.refValue
}
func genValueBlock(t *trie, n *trieNode) {
if n != nil {
for _, v := range n.value {
t.values = append(t.values, v)
}
}
}
func genLookupBlock(t *trie, n *trieNode) {
for _, nn := range n.index {
v := uint16(0)
if nn != nil {
if n.index != nil {
v = nn.refIndex
} else {
v = nn.refValue
}
}
t.index = append(t.index, v)
}
}
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
h := &trieHandle{}
b.roots = append(b.roots, h)
h.valueStart = b.addStartValueBlock(n)
if len(b.roots) == 1 {
// We insert a null block after the first start value block.
// This ensures that continuation bytes UTF-8 sequences of length
// greater than 2 will automatically hit a null block if there
// was an undefined entry.
b.valueBlocks = append(b.valueBlocks, nil)
}
n = b.computeOffsets(n)
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
h.lookupStart = n.refIndex - 1
return h
}
// generate generates and returns the trie for n.
func (b *trieBuilder) generate() (t *trie, err error) {
t = b.t
if len(b.valueBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
}
if len(b.lookupBlocks) >= 1<<16 {
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
}
genValueBlock(t, b.valueBlocks[0])
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
for i := 2; i < len(b.valueBlocks); i++ {
genValueBlock(t, b.valueBlocks[i])
}
n := &trieNode{index: make([]*trieNode, 64)}
genLookupBlock(t, n)
genLookupBlock(t, n)
genLookupBlock(t, n)
for i := 3; i < len(b.lookupBlocks); i++ {
genLookupBlock(t, b.lookupBlocks[i])
}
return b.t, nil
}
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
n += nn
if err == nil {
err = e
}
}
nv := len(t.values)
p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
p("// Block 2 is the null block.\n")
p("var %sValues = [%d]uint32 {", name, nv)
var printnewline bool
for i, v := range t.values {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%4 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#04x:%#08x, ", i, v)
}
}
p("\n}\n\n")
ni := len(t.index)
p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
p("// Block 0 is the null block.\n")
p("var %sLookup = [%d]uint16 {", name, ni)
printnewline = false
for i, v := range t.index {
if i%blockSize == 0 {
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
}
if i%8 == 0 {
printnewline = true
}
if v != 0 {
if printnewline {
p("\n\t")
printnewline = false
}
p("%#03x:%#02x, ", i, v)
}
}
p("\n}\n\n")
return n, nv*4 + ni*2, err
}
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
sz += int(reflect.TypeOf(trie{}).Size())
return
}

107
vendor/golang.org/x/text/collate/build/trie_test.go generated vendored Normal file
View file

@ -0,0 +1,107 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"bytes"
"fmt"
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
func makeTestTrie(t *testing.T) trie {
n := newNode()
for i, r := range testRunes {
n.insert(r, uint32(i))
}
idx := newTrieBuilder()
idx.addTrie(n)
tr, err := idx.generate()
if err != nil {
t.Errorf(err.Error())
}
return *tr
}
func TestGenerateTrie(t *testing.T) {
testdata := makeTestTrie(t)
buf := &bytes.Buffer{}
testdata.printArrays(buf, "test")
fmt.Fprintf(buf, "var testTrie = ")
testdata.printStruct(buf, &trieHandle{19, 0}, "test")
if output != buf.String() {
t.Error("output differs")
}
}
var output = `// testValues: 832 entries, 3328 bytes
// Block 2 is the null block.
var testValues = [832]uint32 {
// Block 0x0, offset 0x0
0x000c:0x00000001,
// Block 0x1, offset 0x40
0x007f:0x00000002,
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x00c0:0x00000003,
// Block 0x4, offset 0x100
0x0100:0x00000004,
// Block 0x5, offset 0x140
0x0140:0x0000000c, 0x0141:0x0000000d, 0x0142:0x0000000e,
0x0150:0x0000000f,
0x0155:0x00000010,
// Block 0x6, offset 0x180
0x01bf:0x00000005,
// Block 0x7, offset 0x1c0
0x01c0:0x00000006,
// Block 0x8, offset 0x200
0x0219:0x00000007,
// Block 0x9, offset 0x240
0x027f:0x00000008,
// Block 0xa, offset 0x280
0x0280:0x00000009,
// Block 0xb, offset 0x2c0
0x02c1:0x0000000a,
// Block 0xc, offset 0x300
0x033f:0x0000000b,
}
// testLookup: 640 entries, 1280 bytes
// Block 0 is the null block.
var testLookup = [640]uint16 {
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0e0:0x05, 0x0e6:0x06,
// Block 0x4, offset 0x100
0x13f:0x07,
// Block 0x5, offset 0x140
0x140:0x08, 0x144:0x09,
// Block 0x6, offset 0x180
0x190:0x03,
// Block 0x7, offset 0x1c0
0x1ff:0x0a,
// Block 0x8, offset 0x200
0x20f:0x05,
// Block 0x9, offset 0x240
0x242:0x01, 0x244:0x02,
0x248:0x03,
0x25f:0x04,
0x260:0x01,
0x26f:0x02,
0x270:0x04, 0x274:0x06,
}
var testTrie = trie{ testLookup[1216:], testValues[0:], testLookup[:], testValues[:]}`