Checking in vendor folder for ease of using go get.
This commit is contained in:
parent
7a1251853b
commit
cdb4b5a1d0
3554 changed files with 1270116 additions and 0 deletions
702
vendor/golang.org/x/text/collate/build/builder.go
generated
vendored
Normal file
702
vendor/golang.org/x/text/collate/build/builder.go
generated
vendored
Normal file
|
@ -0,0 +1,702 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build // import "golang.org/x/text/collate/build"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// TODO: optimizations:
|
||||
// - expandElem is currently 20K. By putting unique colElems in a separate
|
||||
// table and having a byte array of indexes into this table, we can reduce
|
||||
// the total size to about 7K. By also factoring out the length bytes, we
|
||||
// can reduce this to about 6K.
|
||||
// - trie valueBlocks are currently 100K. There are a lot of sparse blocks
|
||||
// and many consecutive values with the same stride. This can be further
|
||||
// compacted.
|
||||
// - Compress secondary weights into 8 bits.
|
||||
// - Some LDML specs specify a context element. Currently we simply concatenate
|
||||
// those. Context can be implemented using the contraction trie. If Builder
|
||||
// could analyze and detect when using a context makes sense, there is no
|
||||
// need to expose this construct in the API.
|
||||
|
||||
// A Builder builds a root collation table. The user must specify the
|
||||
// collation elements for each entry. A common use will be to base the weights
|
||||
// on those specified in the allkeys* file as provided by the UCA or CLDR.
|
||||
type Builder struct {
|
||||
index *trieBuilder
|
||||
root ordering
|
||||
locale []*Tailoring
|
||||
t *table
|
||||
err error
|
||||
built bool
|
||||
|
||||
minNonVar int // lowest primary recorded for a variable
|
||||
varTop int // highest primary recorded for a non-variable
|
||||
|
||||
// indexes used for reusing expansions and contractions
|
||||
expIndex map[string]int // positions of expansions keyed by their string representation
|
||||
ctHandle map[string]ctHandle // contraction handles keyed by a concatenation of the suffixes
|
||||
ctElem map[string]int // contraction elements keyed by their string representation
|
||||
}
|
||||
|
||||
// A Tailoring builds a collation table based on another collation table.
|
||||
// The table is defined by specifying tailorings to the underlying table.
|
||||
// See http://unicode.org/reports/tr35/ for an overview of tailoring
|
||||
// collation tables. The CLDR contains pre-defined tailorings for a variety
|
||||
// of languages (See http://www.unicode.org/Public/cldr/<version>/core.zip.)
|
||||
type Tailoring struct {
|
||||
id string
|
||||
builder *Builder
|
||||
index *ordering
|
||||
|
||||
anchor *entry
|
||||
before bool
|
||||
}
|
||||
|
||||
// NewBuilder returns a new Builder.
|
||||
func NewBuilder() *Builder {
|
||||
return &Builder{
|
||||
index: newTrieBuilder(),
|
||||
root: makeRootOrdering(),
|
||||
expIndex: make(map[string]int),
|
||||
ctHandle: make(map[string]ctHandle),
|
||||
ctElem: make(map[string]int),
|
||||
}
|
||||
}
|
||||
|
||||
// Tailoring returns a Tailoring for the given locale. One should
|
||||
// have completed all calls to Add before calling Tailoring.
|
||||
func (b *Builder) Tailoring(loc language.Tag) *Tailoring {
|
||||
t := &Tailoring{
|
||||
id: loc.String(),
|
||||
builder: b,
|
||||
index: b.root.clone(),
|
||||
}
|
||||
t.index.id = t.id
|
||||
b.locale = append(b.locale, t)
|
||||
return t
|
||||
}
|
||||
|
||||
// Add adds an entry to the collation element table, mapping
|
||||
// a slice of runes to a sequence of collation elements.
|
||||
// A collation element is specified as list of weights: []int{primary, secondary, ...}.
|
||||
// The entries are typically obtained from a collation element table
|
||||
// as defined in http://www.unicode.org/reports/tr10/#Data_Table_Format.
|
||||
// Note that the collation elements specified by colelems are only used
|
||||
// as a guide. The actual weights generated by Builder may differ.
|
||||
// The argument variables is a list of indices into colelems that should contain
|
||||
// a value for each colelem that is a variable. (See the reference above.)
|
||||
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
str := string(runes)
|
||||
elems := make([]rawCE, len(colelems))
|
||||
for i, ce := range colelems {
|
||||
if len(ce) == 0 {
|
||||
break
|
||||
}
|
||||
elems[i] = makeRawCE(ce, 0)
|
||||
if len(ce) == 1 {
|
||||
elems[i].w[1] = defaultSecondary
|
||||
}
|
||||
if len(ce) <= 2 {
|
||||
elems[i].w[2] = defaultTertiary
|
||||
}
|
||||
if len(ce) <= 3 {
|
||||
elems[i].w[3] = ce[0]
|
||||
}
|
||||
}
|
||||
for i, ce := range elems {
|
||||
p := ce.w[0]
|
||||
isvar := false
|
||||
for _, j := range variables {
|
||||
if i == j {
|
||||
isvar = true
|
||||
}
|
||||
}
|
||||
if isvar {
|
||||
if p >= b.minNonVar && b.minNonVar > 0 {
|
||||
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
|
||||
}
|
||||
if p > b.varTop {
|
||||
b.varTop = p
|
||||
}
|
||||
} else if p > 1 { // 1 is a special primary value reserved for FFFE
|
||||
if p <= b.varTop {
|
||||
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
|
||||
}
|
||||
if b.minNonVar == 0 || p < b.minNonVar {
|
||||
b.minNonVar = p
|
||||
}
|
||||
}
|
||||
}
|
||||
elems, err := convertLargeWeights(elems)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cccs := []uint8{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for i := range nfd {
|
||||
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
|
||||
}
|
||||
if len(cccs) < len(elems) {
|
||||
if len(cccs) > 2 {
|
||||
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
|
||||
}
|
||||
p := len(elems) - 1
|
||||
for ; p > 0 && elems[p].w[0] == 0; p-- {
|
||||
elems[p].ccc = cccs[len(cccs)-1]
|
||||
}
|
||||
for ; p >= 0; p-- {
|
||||
elems[p].ccc = cccs[0]
|
||||
}
|
||||
} else {
|
||||
for i := range elems {
|
||||
elems[i].ccc = cccs[i]
|
||||
}
|
||||
}
|
||||
// doNorm in collate.go assumes that the following conditions hold.
|
||||
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
|
||||
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
|
||||
}
|
||||
b.root.newEntry(str, elems)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Tailoring) setAnchor(anchor string) error {
|
||||
anchor = norm.NFC.String(anchor)
|
||||
a := t.index.find(anchor)
|
||||
if a == nil {
|
||||
a = t.index.newEntry(anchor, nil)
|
||||
a.implicit = true
|
||||
a.modified = true
|
||||
for _, r := range []rune(anchor) {
|
||||
e := t.index.find(string(r))
|
||||
e.lock = true
|
||||
}
|
||||
}
|
||||
t.anchor = a
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchor sets the point after which elements passed in subsequent calls to
|
||||
// Insert will be inserted. It is equivalent to the reset directive in an LDML
|
||||
// specification. See Insert for an example.
|
||||
// SetAnchor supports the following logical reset positions:
|
||||
// <first_tertiary_ignorable/>, <last_teriary_ignorable/>, <first_primary_ignorable/>,
|
||||
// and <last_non_ignorable/>.
|
||||
func (t *Tailoring) SetAnchor(anchor string) error {
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetAnchorBefore is similar to SetAnchor, except that subsequent calls to
|
||||
// Insert will insert entries before the anchor.
|
||||
func (t *Tailoring) SetAnchorBefore(anchor string) error {
|
||||
if err := t.setAnchor(anchor); err != nil {
|
||||
return err
|
||||
}
|
||||
t.before = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// Insert sets the ordering of str relative to the entry set by the previous
|
||||
// call to SetAnchor or Insert. The argument extend corresponds
|
||||
// to the extend elements as defined in LDML. A non-empty value for extend
|
||||
// will cause the collation elements corresponding to extend to be appended
|
||||
// to the collation elements generated for the entry added by Insert.
|
||||
// This has the same net effect as sorting str after the string anchor+extend.
|
||||
// See http://www.unicode.org/reports/tr10/#Tailoring_Example for details
|
||||
// on parametric tailoring and http://unicode.org/reports/tr35/#Collation_Elements
|
||||
// for full details on LDML.
|
||||
//
|
||||
// Examples: create a tailoring for Swedish, where "ä" is ordered after "z"
|
||||
// at the primary sorting level:
|
||||
// t := b.Tailoring("se")
|
||||
// t.SetAnchor("z")
|
||||
// t.Insert(colltab.Primary, "ä", "")
|
||||
// Order "ü" after "ue" at the secondary sorting level:
|
||||
// t.SetAnchor("ue")
|
||||
// t.Insert(colltab.Secondary, "ü","")
|
||||
// or
|
||||
// t.SetAnchor("u")
|
||||
// t.Insert(colltab.Secondary, "ü", "e")
|
||||
// Order "q" afer "ab" at the secondary level and "Q" after "q"
|
||||
// at the tertiary level:
|
||||
// t.SetAnchor("ab")
|
||||
// t.Insert(colltab.Secondary, "q", "")
|
||||
// t.Insert(colltab.Tertiary, "Q", "")
|
||||
// Order "b" before "a":
|
||||
// t.SetAnchorBefore("a")
|
||||
// t.Insert(colltab.Primary, "b", "")
|
||||
// Order "0" after the last primary ignorable:
|
||||
// t.SetAnchor("<last_primary_ignorable/>")
|
||||
// t.Insert(colltab.Primary, "0", "")
|
||||
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error {
|
||||
if t.anchor == nil {
|
||||
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
|
||||
}
|
||||
str = norm.NFC.String(str)
|
||||
e := t.index.find(str)
|
||||
if e == nil {
|
||||
e = t.index.newEntry(str, nil)
|
||||
} else if e.logical != noAnchor {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert logical reset position %q", t.id, e.str)
|
||||
}
|
||||
if e.lock {
|
||||
return fmt.Errorf("%s:Insert: cannot reinsert element %q", t.id, e.str)
|
||||
}
|
||||
a := t.anchor
|
||||
// Find the first element after the anchor which differs at a level smaller or
|
||||
// equal to the given level. Then insert at this position.
|
||||
// See http://unicode.org/reports/tr35/#Collation_Elements, Section 5.14.5 for details.
|
||||
e.before = t.before
|
||||
if t.before {
|
||||
t.before = false
|
||||
if a.prev == nil {
|
||||
a.insertBefore(e)
|
||||
} else {
|
||||
for a = a.prev; a.level > level; a = a.prev {
|
||||
}
|
||||
a.insertAfter(e)
|
||||
}
|
||||
e.level = level
|
||||
} else {
|
||||
for ; a.level > level; a = a.next {
|
||||
}
|
||||
e.level = a.level
|
||||
if a != e {
|
||||
a.insertAfter(e)
|
||||
a.level = level
|
||||
} else {
|
||||
// We don't set a to prev itself. This has the effect of the entry
|
||||
// getting new collation elements that are an increment of itself.
|
||||
// This is intentional.
|
||||
a.prev.level = level
|
||||
}
|
||||
}
|
||||
e.extend = norm.NFD.String(extend)
|
||||
e.exclude = false
|
||||
e.modified = true
|
||||
e.elems = nil
|
||||
t.anchor = e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *ordering) getWeight(e *entry) []rawCE {
|
||||
if len(e.elems) == 0 && e.logical == noAnchor {
|
||||
if e.implicit {
|
||||
for _, r := range e.runes {
|
||||
e.elems = append(e.elems, o.getWeight(o.find(string(r)))...)
|
||||
}
|
||||
} else if e.before {
|
||||
count := [colltab.Identity + 1]int{}
|
||||
a := e
|
||||
for ; a.elems == nil && !a.implicit; a = a.next {
|
||||
count[a.level]++
|
||||
}
|
||||
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
|
||||
for i := colltab.Primary; i < colltab.Quaternary; i++ {
|
||||
if count[i] != 0 {
|
||||
e.elems[0].w[i] -= count[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
if e.prev != nil {
|
||||
o.verifyWeights(e.prev, e, e.prev.level)
|
||||
}
|
||||
} else {
|
||||
prev := e.prev
|
||||
e.elems = nextWeight(prev.level, o.getWeight(prev))
|
||||
o.verifyWeights(e, e.next, e.level)
|
||||
}
|
||||
}
|
||||
return e.elems
|
||||
}
|
||||
|
||||
func (o *ordering) addExtension(e *entry) {
|
||||
if ex := o.find(e.extend); ex != nil {
|
||||
e.elems = append(e.elems, ex.elems...)
|
||||
} else {
|
||||
for _, r := range []rune(e.extend) {
|
||||
e.elems = append(e.elems, o.find(string(r)).elems...)
|
||||
}
|
||||
}
|
||||
e.extend = ""
|
||||
}
|
||||
|
||||
func (o *ordering) verifyWeights(a, b *entry, level colltab.Level) error {
|
||||
if level == colltab.Identity || b == nil || b.elems == nil || a.elems == nil {
|
||||
return nil
|
||||
}
|
||||
for i := colltab.Primary; i < level; i++ {
|
||||
if a.elems[0].w[i] < b.elems[0].w[i] {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if a.elems[0].w[level] >= b.elems[0].w[level] {
|
||||
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
|
||||
log.Println(err)
|
||||
// TODO: return the error instead, or better, fix the conflicting entry by making room.
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) error(e error) {
|
||||
if e != nil {
|
||||
b.err = e
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) errorID(locale string, e error) {
|
||||
if e != nil {
|
||||
b.err = fmt.Errorf("%s:%v", locale, e)
|
||||
}
|
||||
}
|
||||
|
||||
// patchNorm ensures that NFC and NFD counterparts are consistent.
|
||||
func (o *ordering) patchNorm() {
|
||||
// Insert the NFD counterparts, if necessary.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if nfd != e.str {
|
||||
if e0 := o.find(nfd); e0 != nil && !e0.modified {
|
||||
e0.elems = e.elems
|
||||
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
|
||||
e := o.newEntry(nfd, e.elems)
|
||||
e.modified = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Update unchanged composed forms if one of their parts changed.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if e.modified || nfd == e.str {
|
||||
continue
|
||||
}
|
||||
if e0 := o.find(nfd); e0 != nil {
|
||||
e.elems = e0.elems
|
||||
} else {
|
||||
e.elems = o.genColElems(nfd)
|
||||
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
|
||||
r := []rune(nfd)
|
||||
head := string(r[0])
|
||||
tail := ""
|
||||
for i := 1; i < len(r); i++ {
|
||||
s := norm.NFC.String(head + string(r[i]))
|
||||
if e0 := o.find(s); e0 != nil && e0.modified {
|
||||
head = s
|
||||
} else {
|
||||
tail += string(r[i])
|
||||
}
|
||||
}
|
||||
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Exclude entries for which the individual runes generate the same collation elements.
|
||||
for _, e := range o.ordered {
|
||||
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
|
||||
e.exclude = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) buildOrdering(o *ordering) {
|
||||
for _, e := range o.ordered {
|
||||
o.getWeight(e)
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
o.addExtension(e)
|
||||
}
|
||||
o.patchNorm()
|
||||
o.sort()
|
||||
simplify(o)
|
||||
b.processExpansions(o) // requires simplify
|
||||
b.processContractions(o) // requires simplify
|
||||
|
||||
t := newNode()
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.skip() {
|
||||
ce, err := e.encode()
|
||||
b.errorID(o.id, err)
|
||||
t.insert(e.runes[0], ce)
|
||||
}
|
||||
}
|
||||
o.handle = b.index.addTrie(t)
|
||||
}
|
||||
|
||||
func (b *Builder) build() (*table, error) {
|
||||
if b.built {
|
||||
return b.t, b.err
|
||||
}
|
||||
b.built = true
|
||||
b.t = &table{
|
||||
Table: colltab.Table{
|
||||
MaxContractLen: utf8.UTFMax,
|
||||
VariableTop: uint32(b.varTop),
|
||||
},
|
||||
}
|
||||
|
||||
b.buildOrdering(&b.root)
|
||||
b.t.root = b.root.handle
|
||||
for _, t := range b.locale {
|
||||
b.buildOrdering(t.index)
|
||||
if b.err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
i, err := b.index.generate()
|
||||
b.t.trie = *i
|
||||
b.t.Index = colltab.Trie{
|
||||
Index: i.index,
|
||||
Values: i.values,
|
||||
Index0: i.index[blockSize*b.t.root.lookupStart:],
|
||||
Values0: i.values[blockSize*b.t.root.valueStart:],
|
||||
}
|
||||
b.error(err)
|
||||
return b.t, b.err
|
||||
}
|
||||
|
||||
// Build builds the root Collator.
|
||||
func (b *Builder) Build() (colltab.Weighter, error) {
|
||||
table, err := b.build()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return table, nil
|
||||
}
|
||||
|
||||
// Build builds a Collator for Tailoring t.
|
||||
func (t *Tailoring) Build() (colltab.Weighter, error) {
|
||||
// TODO: implement.
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Print prints the tables for b and all its Tailorings as a Go file
|
||||
// that can be included in the Collate package.
|
||||
func (b *Builder) Print(w io.Writer) (n int, err error) {
|
||||
p := func(nn int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
t, err := b.build()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
p(fmt.Fprintf(w, `var availableLocales = "und`))
|
||||
for _, loc := range b.locale {
|
||||
if loc.id != "und" {
|
||||
p(fmt.Fprintf(w, ",%s", loc.id))
|
||||
}
|
||||
}
|
||||
p(fmt.Fprint(w, "\"\n\n"))
|
||||
p(fmt.Fprintf(w, "const varTop = 0x%x\n\n", b.varTop))
|
||||
p(fmt.Fprintln(w, "var locales = [...]tableIndex{"))
|
||||
for _, loc := range b.locale {
|
||||
if loc.id == "und" {
|
||||
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
||||
}
|
||||
}
|
||||
for _, loc := range b.locale {
|
||||
if loc.id != "und" {
|
||||
p(t.fprintIndex(w, loc.index.handle, loc.id))
|
||||
}
|
||||
}
|
||||
p(fmt.Fprint(w, "}\n\n"))
|
||||
n, _, err = t.fprint(w, "main")
|
||||
return
|
||||
}
|
||||
|
||||
// reproducibleFromNFKD checks whether the given expansion could be generated
|
||||
// from an NFKD expansion.
|
||||
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
|
||||
// Length must be equal.
|
||||
if len(exp) != len(nfkd) {
|
||||
return false
|
||||
}
|
||||
for i, ce := range exp {
|
||||
// Primary and secondary values should be equal.
|
||||
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
|
||||
return false
|
||||
}
|
||||
// Tertiary values should be equal to maxTertiary for third element onwards.
|
||||
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
|
||||
// simply be dropped. Try this out by dropping the following code.
|
||||
if i >= 2 && ce.w[2] != maxTertiary {
|
||||
return false
|
||||
}
|
||||
if _, err := makeCE(ce); err != nil {
|
||||
// Simply return false. The error will be caught elsewhere.
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func simplify(o *ordering) {
|
||||
// Runes that are a starter of a contraction should not be removed.
|
||||
// (To date, there is only Kannada character 0CCA.)
|
||||
keep := make(map[rune]bool)
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if len(e.runes) > 1 {
|
||||
keep[e.runes[0]] = true
|
||||
}
|
||||
}
|
||||
// Tag entries for which the runes NFKD decompose to identical values.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfkd := norm.NFKD.String(s)
|
||||
nfd := norm.NFD.String(s)
|
||||
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
|
||||
continue
|
||||
}
|
||||
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
||||
e.decompose = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// appendExpansion converts the given collation sequence to
|
||||
// collation elements and adds them to the expansion table.
|
||||
// It returns an index to the expansion table.
|
||||
func (b *Builder) appendExpansion(e *entry) int {
|
||||
t := b.t
|
||||
i := len(t.ExpandElem)
|
||||
ce := uint32(len(e.elems))
|
||||
t.ExpandElem = append(t.ExpandElem, ce)
|
||||
for _, w := range e.elems {
|
||||
ce, err := makeCE(w)
|
||||
if err != nil {
|
||||
b.error(err)
|
||||
return -1
|
||||
}
|
||||
t.ExpandElem = append(t.ExpandElem, ce)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// processExpansions extracts data necessary to generate
|
||||
// the extraction tables.
|
||||
func (b *Builder) processExpansions(o *ordering) {
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.expansion() {
|
||||
continue
|
||||
}
|
||||
key := fmt.Sprintf("%v", e.elems)
|
||||
i, ok := b.expIndex[key]
|
||||
if !ok {
|
||||
i = b.appendExpansion(e)
|
||||
b.expIndex[key] = i
|
||||
}
|
||||
e.expansionIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) processContractions(o *ordering) {
|
||||
// Collate contractions per starter rune.
|
||||
starters := []rune{}
|
||||
cm := make(map[rune][]*entry)
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if e.contraction() {
|
||||
if len(e.str) > b.t.MaxContractLen {
|
||||
b.t.MaxContractLen = len(e.str)
|
||||
}
|
||||
r := e.runes[0]
|
||||
if _, ok := cm[r]; !ok {
|
||||
starters = append(starters, r)
|
||||
}
|
||||
cm[r] = append(cm[r], e)
|
||||
}
|
||||
}
|
||||
// Add entries of single runes that are at a start of a contraction.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if !e.contraction() {
|
||||
r := e.runes[0]
|
||||
if _, ok := cm[r]; ok {
|
||||
cm[r] = append(cm[r], e)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Build the tries for the contractions.
|
||||
t := b.t
|
||||
for _, r := range starters {
|
||||
l := cm[r]
|
||||
// Compute suffix strings. There are 31 different contraction suffix
|
||||
// sets for 715 contractions and 82 contraction starter runes as of
|
||||
// version 6.0.0.
|
||||
sufx := []string{}
|
||||
hasSingle := false
|
||||
for _, e := range l {
|
||||
if len(e.runes) > 1 {
|
||||
sufx = append(sufx, string(e.runes[1:]))
|
||||
} else {
|
||||
hasSingle = true
|
||||
}
|
||||
}
|
||||
if !hasSingle {
|
||||
b.error(fmt.Errorf("no single entry for starter rune %U found", r))
|
||||
continue
|
||||
}
|
||||
// Unique the suffix set.
|
||||
sort.Strings(sufx)
|
||||
key := strings.Join(sufx, "\n")
|
||||
handle, ok := b.ctHandle[key]
|
||||
if !ok {
|
||||
var err error
|
||||
handle, err = appendTrie(&t.ContractTries, sufx)
|
||||
if err != nil {
|
||||
b.error(err)
|
||||
}
|
||||
b.ctHandle[key] = handle
|
||||
}
|
||||
// Bucket sort entries in index order.
|
||||
es := make([]*entry, len(l))
|
||||
for _, e := range l {
|
||||
var p, sn int
|
||||
if len(e.runes) > 1 {
|
||||
str := []byte(string(e.runes[1:]))
|
||||
p, sn = lookup(&t.ContractTries, handle, str)
|
||||
if sn != len(str) {
|
||||
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
|
||||
}
|
||||
}
|
||||
if es[p] != nil {
|
||||
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
|
||||
}
|
||||
es[p] = e
|
||||
}
|
||||
// Create collation elements for contractions.
|
||||
elems := []uint32{}
|
||||
for _, e := range es {
|
||||
ce, err := e.encodeBase()
|
||||
b.errorID(o.id, err)
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
key = fmt.Sprintf("%v", elems)
|
||||
i, ok := b.ctElem[key]
|
||||
if !ok {
|
||||
i = len(t.ContractElem)
|
||||
b.ctElem[key] = i
|
||||
t.ContractElem = append(t.ContractElem, elems...)
|
||||
}
|
||||
// Store info in entry for starter rune.
|
||||
es[0].contractionIndex = i
|
||||
es[0].contractionHandle = handle
|
||||
}
|
||||
}
|
290
vendor/golang.org/x/text/collate/build/builder_test.go
generated
vendored
Normal file
290
vendor/golang.org/x/text/collate/build/builder_test.go
generated
vendored
Normal file
|
@ -0,0 +1,290 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import "testing"
|
||||
|
||||
// cjk returns an implicit collation element for a CJK rune.
|
||||
func cjk(r rune) []rawCE {
|
||||
// A CJK character C is represented in the DUCET as
|
||||
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
|
||||
// Where AAAA is the most significant 15 bits plus a base value.
|
||||
// Any base value will work for the test, so we pick the common value of FB40.
|
||||
const base = 0xFB40
|
||||
return []rawCE{
|
||||
{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
|
||||
{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
|
||||
}
|
||||
}
|
||||
|
||||
func pCE(p int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
|
||||
}
|
||||
|
||||
func pqCE(p, q int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
|
||||
}
|
||||
|
||||
func ptCE(p, t int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, t, 0}, 0)
|
||||
}
|
||||
|
||||
func ptcCE(p, t int, ccc uint8) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
|
||||
}
|
||||
|
||||
func sCE(s int) []rawCE {
|
||||
return mkCE([]int{0, s, defaultTertiary, 0}, 0)
|
||||
}
|
||||
|
||||
func stCE(s, t int) []rawCE {
|
||||
return mkCE([]int{0, s, t, 0}, 0)
|
||||
}
|
||||
|
||||
func scCE(s int, ccc uint8) []rawCE {
|
||||
return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
|
||||
}
|
||||
|
||||
func mkCE(w []int, ccc uint8) []rawCE {
|
||||
return []rawCE{rawCE{w, ccc}}
|
||||
}
|
||||
|
||||
// ducetElem is used to define test data that is used to generate a table.
|
||||
type ducetElem struct {
|
||||
str string
|
||||
ces []rawCE
|
||||
}
|
||||
|
||||
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
||||
b := NewBuilder()
|
||||
for _, e := range ducet {
|
||||
ces := [][]int{}
|
||||
for _, ce := range e.ces {
|
||||
ces = append(ces, ce.w)
|
||||
}
|
||||
if err := b.Add([]rune(e.str), ces, nil); err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
}
|
||||
b.t = &table{}
|
||||
b.root.sort()
|
||||
return b
|
||||
}
|
||||
|
||||
type convertTest struct {
|
||||
in, out []rawCE
|
||||
err bool
|
||||
}
|
||||
|
||||
var convLargeTests = []convertTest{
|
||||
{pCE(0xFB39), pCE(0xFB39), false},
|
||||
{cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
|
||||
{pCE(0xFB40), pCE(0), true},
|
||||
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
|
||||
{pCE(0xFFFE), pCE(illegalOffset), false},
|
||||
{pCE(0xFFFF), pCE(illegalOffset + 1), false},
|
||||
}
|
||||
|
||||
func TestConvertLarge(t *testing.T) {
|
||||
for i, tt := range convLargeTests {
|
||||
e := new(entry)
|
||||
for _, ce := range tt.in {
|
||||
e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
|
||||
}
|
||||
elems, err := convertLargeWeights(e.elems)
|
||||
if tt.err {
|
||||
if err == nil {
|
||||
t.Errorf("%d: expected error; none found", i)
|
||||
}
|
||||
continue
|
||||
} else if err != nil {
|
||||
t.Errorf("%d: unexpected error: %v", i, err)
|
||||
}
|
||||
if !equalCEArrays(elems, tt.out) {
|
||||
t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collation element table for simplify tests.
|
||||
var simplifyTest = []ducetElem{
|
||||
{"\u0300", sCE(30)}, // grave
|
||||
{"\u030C", sCE(40)}, // caron
|
||||
{"A", ptCE(100, 8)},
|
||||
{"D", ptCE(104, 8)},
|
||||
{"E", ptCE(105, 8)},
|
||||
{"I", ptCE(110, 8)},
|
||||
{"z", ptCE(130, 8)},
|
||||
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
|
||||
{"\u05B7", sCE(80)},
|
||||
{"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
|
||||
{"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
|
||||
{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
|
||||
{"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
|
||||
{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
|
||||
// no removal: tertiary value of third element is not maxTertiary
|
||||
{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
|
||||
}
|
||||
|
||||
var genColTests = []ducetElem{
|
||||
{"\uFA70", pqCE(0x1FA70, 0xFA70)},
|
||||
{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
|
||||
{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
|
||||
{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
|
||||
}
|
||||
|
||||
func TestGenColElems(t *testing.T) {
|
||||
b := newBuilder(t, simplifyTest[:5])
|
||||
|
||||
for i, tt := range genColTests {
|
||||
res := b.root.genColElems(tt.str)
|
||||
if !equalCEArrays(tt.ces, res) {
|
||||
t.Errorf("%d: result %X; want %X", i, res, tt.ces)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type strArray []string
|
||||
|
||||
func (sa strArray) contains(s string) bool {
|
||||
for _, e := range sa {
|
||||
if e == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
|
||||
var simplifyMarked = strArray{"\u01C5"}
|
||||
|
||||
func TestSimplify(t *testing.T) {
|
||||
b := newBuilder(t, simplifyTest)
|
||||
o := &b.root
|
||||
simplify(o)
|
||||
|
||||
for i, tt := range simplifyTest {
|
||||
if simplifyRemoved.contains(tt.str) {
|
||||
continue
|
||||
}
|
||||
e := o.find(tt.str)
|
||||
if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
|
||||
t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
|
||||
break
|
||||
}
|
||||
}
|
||||
var i, k int
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
gold := simplifyMarked.contains(e.str)
|
||||
if gold {
|
||||
k++
|
||||
}
|
||||
if gold != e.decompose {
|
||||
t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
|
||||
}
|
||||
i++
|
||||
}
|
||||
if k != len(simplifyMarked) {
|
||||
t.Errorf(" an entry that should be marked as decompose was deleted")
|
||||
}
|
||||
}
|
||||
|
||||
var expandTest = []ducetElem{
|
||||
{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
|
||||
{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
|
||||
{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
|
||||
{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
|
||||
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
|
||||
{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
|
||||
}
|
||||
|
||||
func TestExpand(t *testing.T) {
|
||||
const (
|
||||
totalExpansions = 5
|
||||
totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
|
||||
)
|
||||
b := newBuilder(t, expandTest)
|
||||
o := &b.root
|
||||
b.processExpansions(o)
|
||||
|
||||
e := o.front()
|
||||
for _, tt := range expandTest {
|
||||
exp := b.t.ExpandElem[e.expansionIndex:]
|
||||
if int(exp[0]) != len(tt.ces) {
|
||||
t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
|
||||
}
|
||||
exp = exp[1:]
|
||||
for j, w := range tt.ces {
|
||||
if ce, _ := makeCE(w); exp[j] != ce {
|
||||
t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
|
||||
}
|
||||
}
|
||||
e, _ = e.nextIndexed()
|
||||
}
|
||||
// Verify uniquing.
|
||||
if len(b.t.ExpandElem) != totalElements {
|
||||
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
|
||||
}
|
||||
}
|
||||
|
||||
var contractTest = []ducetElem{
|
||||
{"abc", pCE(102)},
|
||||
{"abd", pCE(103)},
|
||||
{"a", pCE(100)},
|
||||
{"ab", pCE(101)},
|
||||
{"ac", pCE(104)},
|
||||
{"bcd", pCE(202)},
|
||||
{"b", pCE(200)},
|
||||
{"bc", pCE(201)},
|
||||
{"bd", pCE(203)},
|
||||
// shares suffixes with a*
|
||||
{"Ab", pCE(301)},
|
||||
{"A", pCE(300)},
|
||||
{"Ac", pCE(304)},
|
||||
{"Abc", pCE(302)},
|
||||
{"Abd", pCE(303)},
|
||||
// starter to be ignored
|
||||
{"z", pCE(1000)},
|
||||
}
|
||||
|
||||
func TestContract(t *testing.T) {
|
||||
const (
|
||||
totalElements = 5 + 5 + 4
|
||||
)
|
||||
b := newBuilder(t, contractTest)
|
||||
o := &b.root
|
||||
b.processContractions(o)
|
||||
|
||||
indexMap := make(map[int]bool)
|
||||
handleMap := make(map[rune]*entry)
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
if e.contractionHandle.n > 0 {
|
||||
handleMap[e.runes[0]] = e
|
||||
indexMap[e.contractionHandle.index] = true
|
||||
}
|
||||
}
|
||||
// Verify uniquing.
|
||||
if len(indexMap) != 2 {
|
||||
t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
|
||||
}
|
||||
for _, tt := range contractTest {
|
||||
e, ok := handleMap[[]rune(tt.str)[0]]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
str := tt.str[1:]
|
||||
offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
|
||||
if len(str) != n {
|
||||
t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
|
||||
}
|
||||
ce := b.t.ContractElem[offset+e.contractionIndex]
|
||||
if want, _ := makeCE(tt.ces[0]); want != ce {
|
||||
t.Errorf("%s: element %X; want %X", tt.str, ce, want)
|
||||
}
|
||||
}
|
||||
if len(b.t.ContractElem) != totalElements {
|
||||
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
|
||||
}
|
||||
}
|
294
vendor/golang.org/x/text/collate/build/colelem.go
generated
vendored
Normal file
294
vendor/golang.org/x/text/collate/build/colelem.go
generated
vendored
Normal file
|
@ -0,0 +1,294 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
)
|
||||
|
||||
type rawCE struct {
|
||||
w []int
|
||||
ccc uint8
|
||||
}
|
||||
|
||||
func makeRawCE(w []int, ccc uint8) rawCE {
|
||||
ce := rawCE{w: make([]int, 4), ccc: ccc}
|
||||
copy(ce.w, w)
|
||||
return ce
|
||||
}
|
||||
|
||||
// A collation element is represented as an uint32.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
// then the collation element that is associated with a rune will have a special
|
||||
// form to represent such m to n mappings. Such special collation elements
|
||||
// have a value >= 0x80000000.
|
||||
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 12
|
||||
maxTertiaryBits = 8
|
||||
)
|
||||
|
||||
func makeCE(ce rawCE) (uint32, error) {
|
||||
v, e := colltab.MakeElem(ce.w[0], ce.w[1], ce.w[2], ce.ccc)
|
||||
return uint32(v), e
|
||||
}
|
||||
|
||||
// For contractions, collation elements are of the form
|
||||
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
|
||||
// - n* is the size of the first node in the contraction trie.
|
||||
// - i* is the index of the first node in the contraction trie.
|
||||
// - b* is the offset into the contraction collation element table.
|
||||
// See contract.go for details on the contraction trie.
|
||||
const (
|
||||
contractID = 0xC0000000
|
||||
maxNBits = 4
|
||||
maxTrieIndexBits = 12
|
||||
maxContractOffsetBits = 13
|
||||
)
|
||||
|
||||
func makeContractIndex(h ctHandle, offset int) (uint32, error) {
|
||||
if h.n >= 1<<maxNBits {
|
||||
return 0, fmt.Errorf("size of contraction trie node too large: %d >= %d", h.n, 1<<maxNBits)
|
||||
}
|
||||
if h.index >= 1<<maxTrieIndexBits {
|
||||
return 0, fmt.Errorf("size of contraction trie offset too large: %d >= %d", h.index, 1<<maxTrieIndexBits)
|
||||
}
|
||||
if offset >= 1<<maxContractOffsetBits {
|
||||
return 0, fmt.Errorf("contraction offset out of bounds: %x >= %x", offset, 1<<maxContractOffsetBits)
|
||||
}
|
||||
ce := uint32(contractID)
|
||||
ce += uint32(offset << (maxNBits + maxTrieIndexBits))
|
||||
ce += uint32(h.index << maxNBits)
|
||||
ce += uint32(h.n)
|
||||
return ce, nil
|
||||
}
|
||||
|
||||
// For expansions, collation elements are of the form
|
||||
// 11100000 00000000 bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const (
|
||||
expandID = 0xE0000000
|
||||
maxExpandIndexBits = 16
|
||||
)
|
||||
|
||||
func makeExpandIndex(index int) (uint32, error) {
|
||||
if index >= 1<<maxExpandIndexBits {
|
||||
return 0, fmt.Errorf("expansion index out of bounds: %x >= %x", index, 1<<maxExpandIndexBits)
|
||||
}
|
||||
return expandID + uint32(index), nil
|
||||
}
|
||||
|
||||
// Each list of collation elements corresponding to an expansion starts with
|
||||
// a header indicating the length of the sequence.
|
||||
func makeExpansionHeader(n int) (uint32, error) {
|
||||
return uint32(n), nil
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The collation element, in this case, is of the form
|
||||
// 11110000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
const (
|
||||
decompID = 0xF0000000
|
||||
)
|
||||
|
||||
func makeDecompose(t1, t2 int) (uint32, error) {
|
||||
if t1 >= 256 || t1 < 0 {
|
||||
return 0, fmt.Errorf("first tertiary weight out of bounds: %d >= 256", t1)
|
||||
}
|
||||
if t2 >= 256 || t2 < 0 {
|
||||
return 0, fmt.Errorf("second tertiary weight out of bounds: %d >= 256", t2)
|
||||
}
|
||||
return uint32(t2<<8+t1) + decompID, nil
|
||||
}
|
||||
|
||||
const (
|
||||
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
|
||||
minUnified rune = 0x4E00
|
||||
maxUnified = 0x9FFF
|
||||
minCompatibility = 0xF900
|
||||
maxCompatibility = 0xFAFF
|
||||
minRare = 0x3400
|
||||
maxRare = 0x4DBF
|
||||
)
|
||||
const (
|
||||
commonUnifiedOffset = 0x10000
|
||||
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
|
||||
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
|
||||
illegalOffset = otherOffset + int(unicode.MaxRune)
|
||||
maxPrimary = illegalOffset + 1
|
||||
)
|
||||
|
||||
// implicitPrimary returns the primary weight for the a rune
|
||||
// for which there is no entry for the rune in the collation table.
|
||||
// We take a different approach from the one specified in
|
||||
// http://unicode.org/reports/tr10/#Implicit_Weights,
|
||||
// but preserve the resulting relative ordering of the runes.
|
||||
func implicitPrimary(r rune) int {
|
||||
if unicode.Is(unicode.Ideographic, r) {
|
||||
if r >= minUnified && r <= maxUnified {
|
||||
// The most common case for CJK.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
if r >= minCompatibility && r <= maxCompatibility {
|
||||
// This will typically not hit. The DUCET explicitly specifies mappings
|
||||
// for all characters that do not decompose.
|
||||
return int(r) + commonUnifiedOffset
|
||||
}
|
||||
return int(r) + rareUnifiedOffset
|
||||
}
|
||||
return int(r) + otherOffset
|
||||
}
|
||||
|
||||
// convertLargeWeights converts collation elements with large
|
||||
// primaries (either double primaries or for illegal runes)
|
||||
// to our own representation.
|
||||
// A CJK character C is represented in the DUCET as
|
||||
// [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
|
||||
// We will rewrite these characters to a single CE.
|
||||
// We assume the CJK values start at 0x8000.
|
||||
// See http://unicode.org/reports/tr10/#Implicit_Weights
|
||||
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
|
||||
const (
|
||||
cjkPrimaryStart = 0xFB40
|
||||
rarePrimaryStart = 0xFB80
|
||||
otherPrimaryStart = 0xFBC0
|
||||
illegalPrimary = 0xFFFE
|
||||
highBitsMask = 0x3F
|
||||
lowBitsMask = 0x7FFF
|
||||
lowBitsFlag = 0x8000
|
||||
shiftBits = 15
|
||||
)
|
||||
for i := 0; i < len(elems); i++ {
|
||||
ce := elems[i].w
|
||||
p := ce[0]
|
||||
if p < cjkPrimaryStart {
|
||||
continue
|
||||
}
|
||||
if p > 0xFFFF {
|
||||
return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
|
||||
}
|
||||
if p >= illegalPrimary {
|
||||
ce[0] = illegalOffset + p - illegalPrimary
|
||||
} else {
|
||||
if i+1 >= len(elems) {
|
||||
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
|
||||
}
|
||||
if elems[i+1].w[0]&lowBitsFlag == 0 {
|
||||
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
|
||||
}
|
||||
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
|
||||
switch {
|
||||
case p < rarePrimaryStart:
|
||||
np += commonUnifiedOffset
|
||||
case p < otherPrimaryStart:
|
||||
np += rareUnifiedOffset
|
||||
default:
|
||||
p += otherOffset
|
||||
}
|
||||
ce[0] = np
|
||||
for j := i + 1; j+1 < len(elems); j++ {
|
||||
elems[j] = elems[j+1]
|
||||
}
|
||||
elems = elems[:len(elems)-1]
|
||||
}
|
||||
}
|
||||
return elems, nil
|
||||
}
|
||||
|
||||
// nextWeight computes the first possible collation weights following elems
|
||||
// for the given level.
|
||||
func nextWeight(level colltab.Level, elems []rawCE) []rawCE {
|
||||
if level == colltab.Identity {
|
||||
next := make([]rawCE, len(elems))
|
||||
copy(next, elems)
|
||||
return next
|
||||
}
|
||||
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
|
||||
next[0].w[level]++
|
||||
if level < colltab.Secondary {
|
||||
next[0].w[colltab.Secondary] = defaultSecondary
|
||||
}
|
||||
if level < colltab.Tertiary {
|
||||
next[0].w[colltab.Tertiary] = defaultTertiary
|
||||
}
|
||||
// Filter entries that cannot influence ordering.
|
||||
for _, ce := range elems[1:] {
|
||||
skip := true
|
||||
for i := colltab.Primary; i < level; i++ {
|
||||
skip = skip && ce.w[i] == 0
|
||||
}
|
||||
if !skip {
|
||||
next = append(next, ce)
|
||||
}
|
||||
}
|
||||
return next
|
||||
}
|
||||
|
||||
func nextVal(elems []rawCE, i int, level colltab.Level) (index, value int) {
|
||||
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
|
||||
}
|
||||
if i < len(elems) {
|
||||
return i, elems[i].w[level]
|
||||
}
|
||||
return i, 0
|
||||
}
|
||||
|
||||
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
|
||||
// It also returns the collation level at which the difference is found.
|
||||
func compareWeights(a, b []rawCE) (result int, level colltab.Level) {
|
||||
for level := colltab.Primary; level < colltab.Identity; level++ {
|
||||
var va, vb int
|
||||
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
|
||||
ia, va = nextVal(a, ia, level)
|
||||
ib, vb = nextVal(b, ib, level)
|
||||
if va != vb {
|
||||
if va < vb {
|
||||
return -1, level
|
||||
} else {
|
||||
return 1, level
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0, colltab.Identity
|
||||
}
|
||||
|
||||
func equalCE(a, b rawCE) bool {
|
||||
for i := 0; i < 3; i++ {
|
||||
if b.w[i] != a.w[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func equalCEArrays(a, b []rawCE) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if !equalCE(a[i], b[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
215
vendor/golang.org/x/text/collate/build/colelem_test.go
generated
vendored
Normal file
215
vendor/golang.org/x/text/collate/build/colelem_test.go
generated
vendored
Normal file
|
@ -0,0 +1,215 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
type ceTest struct {
|
||||
f func(in []int) (uint32, error)
|
||||
arg []int
|
||||
val uint32
|
||||
}
|
||||
|
||||
func normalCE(in []int) (ce uint32, err error) {
|
||||
return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
|
||||
}
|
||||
|
||||
func expandCE(in []int) (ce uint32, err error) {
|
||||
return makeExpandIndex(in[0])
|
||||
}
|
||||
|
||||
func contractCE(in []int) (ce uint32, err error) {
|
||||
return makeContractIndex(ctHandle{in[0], in[1]}, in[2])
|
||||
}
|
||||
|
||||
func decompCE(in []int) (ce uint32, err error) {
|
||||
return makeDecompose(in[0], in[1])
|
||||
}
|
||||
|
||||
var ceTests = []ceTest{
|
||||
{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
|
||||
{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
|
||||
{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
|
||||
{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
|
||||
// non-ignorable primary with non-default secondary
|
||||
{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
|
||||
{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
|
||||
{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
|
||||
{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
|
||||
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
|
||||
{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
|
||||
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
|
||||
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
|
||||
{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},
|
||||
|
||||
{contractCE, []int{0, 0, 0}, 0xC0000000},
|
||||
{contractCE, []int{1, 1, 1}, 0xC0010011},
|
||||
{contractCE, []int{1, (1 << maxNBits) - 1, 1}, 0xC001001F},
|
||||
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}, 0xC001FFF1},
|
||||
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}, 0xDFFF0011},
|
||||
{contractCE, []int{1, (1 << maxNBits), 1}, 0xFFFF},
|
||||
{contractCE, []int{(1 << maxTrieIndexBits), 1, 1}, 0xFFFF},
|
||||
{contractCE, []int{1, (1 << maxContractOffsetBits), 1}, 0xFFFF},
|
||||
|
||||
{expandCE, []int{0}, 0xE0000000},
|
||||
{expandCE, []int{5}, 0xE0000005},
|
||||
{expandCE, []int{(1 << maxExpandIndexBits) - 1}, 0xE000FFFF},
|
||||
{expandCE, []int{1 << maxExpandIndexBits}, 0xFFFF},
|
||||
|
||||
{decompCE, []int{0, 0}, 0xF0000000},
|
||||
{decompCE, []int{1, 1}, 0xF0000101},
|
||||
{decompCE, []int{0x1F, 0x1F}, 0xF0001F1F},
|
||||
{decompCE, []int{256, 0x1F}, 0xFFFF},
|
||||
{decompCE, []int{0x1F, 256}, 0xFFFF},
|
||||
}
|
||||
|
||||
func TestColElem(t *testing.T) {
|
||||
for i, tt := range ceTests {
|
||||
in := make([]int, len(tt.arg))
|
||||
copy(in, tt.arg)
|
||||
ce, err := tt.f(in)
|
||||
if tt.val == 0xFFFF {
|
||||
if err == nil {
|
||||
t.Errorf("%d: expected error for args %x", i, tt.arg)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("%d: unexpected error: %v", i, err.Error())
|
||||
}
|
||||
if ce != tt.val {
|
||||
t.Errorf("%d: colElem=%X; want %X", i, ce, tt.val)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mkRawCES(in [][]int) []rawCE {
|
||||
out := []rawCE{}
|
||||
for _, w := range in {
|
||||
out = append(out, rawCE{w: w})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type weightsTest struct {
|
||||
a, b [][]int
|
||||
level colltab.Level
|
||||
result int
|
||||
}
|
||||
|
||||
var nextWeightTests = []weightsTest{
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{101, defaultSecondary, defaultTertiary, 0}},
|
||||
level: colltab.Primary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 21, defaultTertiary, 0}},
|
||||
level: colltab.Secondary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 20, 6, 0}},
|
||||
level: colltab.Tertiary,
|
||||
},
|
||||
{
|
||||
a: [][]int{{100, 20, 5, 0}},
|
||||
b: [][]int{{100, 20, 5, 0}},
|
||||
level: colltab.Identity,
|
||||
},
|
||||
}
|
||||
|
||||
var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
|
||||
|
||||
func TestNextWeight(t *testing.T) {
|
||||
for i, tt := range nextWeightTests {
|
||||
test := func(l colltab.Level, tt weightsTest, a, gold [][]int) {
|
||||
res := nextWeight(tt.level, mkRawCES(a))
|
||||
if !equalCEArrays(mkRawCES(gold), res) {
|
||||
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
|
||||
}
|
||||
}
|
||||
test(-1, tt, tt.a, tt.b)
|
||||
for l := colltab.Primary; l <= colltab.Tertiary; l++ {
|
||||
if tt.level <= l {
|
||||
test(l, tt, append(tt.a, extra[l]), tt.b)
|
||||
} else {
|
||||
test(l, tt, append(tt.a, extra[l]), append(tt.b, extra[l]))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var compareTests = []weightsTest{
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
colltab.Identity,
|
||||
0,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}, extra[0]},
|
||||
[][]int{{100, 20, 5, 1}},
|
||||
colltab.Primary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{101, 20, 5, 0}},
|
||||
colltab.Primary,
|
||||
-1,
|
||||
},
|
||||
{
|
||||
[][]int{{101, 20, 5, 0}},
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
colltab.Primary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 0, 0, 0}, {0, 20, 5, 0}},
|
||||
[][]int{{0, 20, 5, 0}, {100, 0, 0, 0}},
|
||||
colltab.Identity,
|
||||
0,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 21, 5, 0}},
|
||||
colltab.Secondary,
|
||||
-1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 0}},
|
||||
[][]int{{100, 20, 2, 0}},
|
||||
colltab.Tertiary,
|
||||
1,
|
||||
},
|
||||
{
|
||||
[][]int{{100, 20, 5, 1}},
|
||||
[][]int{{100, 20, 5, 2}},
|
||||
colltab.Quaternary,
|
||||
-1,
|
||||
},
|
||||
}
|
||||
|
||||
func TestCompareWeights(t *testing.T) {
|
||||
for i, tt := range compareTests {
|
||||
test := func(tt weightsTest, a, b [][]int) {
|
||||
res, level := compareWeights(mkRawCES(a), mkRawCES(b))
|
||||
if res != tt.result {
|
||||
t.Errorf("%d: expected comparison result %d; found %d", i, tt.result, res)
|
||||
}
|
||||
if level != tt.level {
|
||||
t.Errorf("%d: expected level %d; found %d", i, tt.level, level)
|
||||
}
|
||||
}
|
||||
test(tt, tt.a, tt.b)
|
||||
test(tt, append(tt.a, extra[0]), append(tt.b, extra[0]))
|
||||
}
|
||||
}
|
309
vendor/golang.org/x/text/collate/build/contract.go
generated
vendored
Normal file
309
vendor/golang.org/x/text/collate/build/contract.go
generated
vendored
Normal file
|
@ -0,0 +1,309 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// This file contains code for detecting contractions and generating
|
||||
// the necessary tables.
|
||||
// Any Unicode Collation Algorithm (UCA) table entry that has more than
|
||||
// one rune one the left-hand side is called a contraction.
|
||||
// See http://www.unicode.org/reports/tr10/#Contractions for more details.
|
||||
//
|
||||
// We define the following terms:
|
||||
// initial: a rune that appears as the first rune in a contraction.
|
||||
// suffix: a sequence of runes succeeding the initial rune
|
||||
// in a given contraction.
|
||||
// non-initial: a rune that appears in a suffix.
|
||||
//
|
||||
// A rune may be both an initial and a non-initial and may be so in
|
||||
// many contractions. An initial may typically also appear by itself.
|
||||
// In case of ambiguities, the UCA requires we match the longest
|
||||
// contraction.
|
||||
//
|
||||
// Many contraction rules share the same set of possible suffixes.
|
||||
// We store sets of suffixes in a trie that associates an index with
|
||||
// each suffix in the set. This index can be used to look up a
|
||||
// collation element associated with the (starter rune, suffix) pair.
|
||||
//
|
||||
// The trie is defined on a UTF-8 byte sequence.
|
||||
// The overall trie is represented as an array of ctEntries. Each node of the trie
|
||||
// is represented as a subsequence of ctEntries, where each entry corresponds to
|
||||
// a possible match of a next character in the search string. An entry
|
||||
// also includes the length and offset to the next sequence of entries
|
||||
// to check in case of a match.
|
||||
|
||||
const (
|
||||
final = 0
|
||||
noIndex = 0xFF
|
||||
)
|
||||
|
||||
// ctEntry associates to a matching byte an offset and/or next sequence of
|
||||
// bytes to check. A ctEntry c is called final if a match means that the
|
||||
// longest suffix has been found. An entry c is final if c.N == 0.
|
||||
// A single final entry can match a range of characters to an offset.
|
||||
// A non-final entry always matches a single byte. Note that a non-final
|
||||
// entry might still resemble a completed suffix.
|
||||
// Examples:
|
||||
// The suffix strings "ab" and "ac" can be represented as:
|
||||
// []ctEntry{
|
||||
// {'a', 1, 1, noIndex}, // 'a' by itself does not match, so i is 0xFF.
|
||||
// {'b', 'c', 0, 1}, // "ab" -> 1, "ac" -> 2
|
||||
// }
|
||||
//
|
||||
// The suffix strings "ab", "abc", "abd", and "abcd" can be represented as:
|
||||
// []ctEntry{
|
||||
// {'a', 1, 1, noIndex}, // 'a' must be followed by 'b'.
|
||||
// {'b', 1, 2, 1}, // "ab" -> 1, may be followed by 'c' or 'd'.
|
||||
// {'d', 'd', final, 3}, // "abd" -> 3
|
||||
// {'c', 4, 1, 2}, // "abc" -> 2, may be followed by 'd'.
|
||||
// {'d', 'd', final, 4}, // "abcd" -> 4
|
||||
// }
|
||||
// See genStateTests in contract_test.go for more examples.
|
||||
type ctEntry struct {
|
||||
L uint8 // non-final: byte value to match; final: lowest match in range.
|
||||
H uint8 // non-final: relative index to next block; final: highest match in range.
|
||||
N uint8 // non-final: length of next block; final: final
|
||||
I uint8 // result offset. Will be noIndex if more bytes are needed to complete.
|
||||
}
|
||||
|
||||
// contractTrieSet holds a set of contraction tries. The tries are stored
|
||||
// consecutively in the entry field.
|
||||
type contractTrieSet []struct{ l, h, n, i uint8 }
|
||||
|
||||
// ctHandle is used to identify a trie in the trie set, consisting in an offset
|
||||
// in the array and the size of the first node.
|
||||
type ctHandle struct {
|
||||
index, n int
|
||||
}
|
||||
|
||||
// appendTrie adds a new trie for the given suffixes to the trie set and returns
|
||||
// a handle to it. The handle will be invalid on error.
|
||||
func appendTrie(ct *colltab.ContractTrieSet, suffixes []string) (ctHandle, error) {
|
||||
es := make([]stridx, len(suffixes))
|
||||
for i, s := range suffixes {
|
||||
es[i].str = s
|
||||
}
|
||||
sort.Sort(offsetSort(es))
|
||||
for i := range es {
|
||||
es[i].index = i + 1
|
||||
}
|
||||
sort.Sort(genidxSort(es))
|
||||
i := len(*ct)
|
||||
n, err := genStates(ct, es)
|
||||
if err != nil {
|
||||
*ct = (*ct)[:i]
|
||||
return ctHandle{}, err
|
||||
}
|
||||
return ctHandle{i, n}, nil
|
||||
}
|
||||
|
||||
// genStates generates ctEntries for a given suffix set and returns
|
||||
// the number of entries for the first node.
|
||||
func genStates(ct *colltab.ContractTrieSet, sis []stridx) (int, error) {
|
||||
if len(sis) == 0 {
|
||||
return 0, fmt.Errorf("genStates: list of suffices must be non-empty")
|
||||
}
|
||||
start := len(*ct)
|
||||
// create entries for differing first bytes.
|
||||
for _, si := range sis {
|
||||
s := si.str
|
||||
if len(s) == 0 {
|
||||
continue
|
||||
}
|
||||
added := false
|
||||
c := s[0]
|
||||
if len(s) > 1 {
|
||||
for j := len(*ct) - 1; j >= start; j-- {
|
||||
if (*ct)[j].L == c {
|
||||
added = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !added {
|
||||
*ct = append(*ct, ctEntry{L: c, I: noIndex})
|
||||
}
|
||||
} else {
|
||||
for j := len(*ct) - 1; j >= start; j-- {
|
||||
// Update the offset for longer suffixes with the same byte.
|
||||
if (*ct)[j].L == c {
|
||||
(*ct)[j].I = uint8(si.index)
|
||||
added = true
|
||||
}
|
||||
// Extend range of final ctEntry, if possible.
|
||||
if (*ct)[j].H+1 == c {
|
||||
(*ct)[j].H = c
|
||||
added = true
|
||||
}
|
||||
}
|
||||
if !added {
|
||||
*ct = append(*ct, ctEntry{L: c, H: c, N: final, I: uint8(si.index)})
|
||||
}
|
||||
}
|
||||
}
|
||||
n := len(*ct) - start
|
||||
// Append nodes for the remainder of the suffixes for each ctEntry.
|
||||
sp := 0
|
||||
for i, end := start, len(*ct); i < end; i++ {
|
||||
fe := (*ct)[i]
|
||||
if fe.H == 0 { // uninitialized non-final
|
||||
ln := len(*ct) - start - n
|
||||
if ln > 0xFF {
|
||||
return 0, fmt.Errorf("genStates: relative block offset too large: %d > 255", ln)
|
||||
}
|
||||
fe.H = uint8(ln)
|
||||
// Find first non-final strings with same byte as current entry.
|
||||
for ; sis[sp].str[0] != fe.L; sp++ {
|
||||
}
|
||||
se := sp + 1
|
||||
for ; se < len(sis) && len(sis[se].str) > 1 && sis[se].str[0] == fe.L; se++ {
|
||||
}
|
||||
sl := sis[sp:se]
|
||||
sp = se
|
||||
for i, si := range sl {
|
||||
sl[i].str = si.str[1:]
|
||||
}
|
||||
nn, err := genStates(ct, sl)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
fe.N = uint8(nn)
|
||||
(*ct)[i] = fe
|
||||
}
|
||||
}
|
||||
sort.Sort(entrySort((*ct)[start : start+n]))
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// There may be both a final and non-final entry for a byte if the byte
|
||||
// is implied in a range of matches in the final entry.
|
||||
// We need to ensure that the non-final entry comes first in that case.
|
||||
type entrySort colltab.ContractTrieSet
|
||||
|
||||
func (fe entrySort) Len() int { return len(fe) }
|
||||
func (fe entrySort) Swap(i, j int) { fe[i], fe[j] = fe[j], fe[i] }
|
||||
func (fe entrySort) Less(i, j int) bool {
|
||||
return fe[i].L > fe[j].L
|
||||
}
|
||||
|
||||
// stridx is used for sorting suffixes and their associated offsets.
|
||||
type stridx struct {
|
||||
str string
|
||||
index int
|
||||
}
|
||||
|
||||
// For computing the offsets, we first sort by size, and then by string.
|
||||
// This ensures that strings that only differ in the last byte by 1
|
||||
// are sorted consecutively in increasing order such that they can
|
||||
// be packed as a range in a final ctEntry.
|
||||
type offsetSort []stridx
|
||||
|
||||
func (si offsetSort) Len() int { return len(si) }
|
||||
func (si offsetSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
|
||||
func (si offsetSort) Less(i, j int) bool {
|
||||
if len(si[i].str) != len(si[j].str) {
|
||||
return len(si[i].str) > len(si[j].str)
|
||||
}
|
||||
return si[i].str < si[j].str
|
||||
}
|
||||
|
||||
// For indexing, we want to ensure that strings are sorted in string order, where
|
||||
// for strings with the same prefix, we put longer strings before shorter ones.
|
||||
type genidxSort []stridx
|
||||
|
||||
func (si genidxSort) Len() int { return len(si) }
|
||||
func (si genidxSort) Swap(i, j int) { si[i], si[j] = si[j], si[i] }
|
||||
func (si genidxSort) Less(i, j int) bool {
|
||||
if strings.HasPrefix(si[j].str, si[i].str) {
|
||||
return false
|
||||
}
|
||||
if strings.HasPrefix(si[i].str, si[j].str) {
|
||||
return true
|
||||
}
|
||||
return si[i].str < si[j].str
|
||||
}
|
||||
|
||||
// lookup matches the longest suffix in str and returns the associated offset
|
||||
// and the number of bytes consumed.
|
||||
func lookup(ct *colltab.ContractTrieSet, h ctHandle, str []byte) (index, ns int) {
|
||||
states := (*ct)[h.index:]
|
||||
p := 0
|
||||
n := h.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
if c >= e.L {
|
||||
if e.L == c {
|
||||
p++
|
||||
if e.I != noIndex {
|
||||
index, ns = int(e.I), p
|
||||
}
|
||||
if e.N != final {
|
||||
// set to new state
|
||||
i, states, n = 0, states[int(e.H)+n:], int(e.N)
|
||||
} else {
|
||||
return
|
||||
}
|
||||
continue
|
||||
} else if e.N == final && c <= e.H {
|
||||
p++
|
||||
return int(c-e.L) + int(e.I), p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// print writes the contractTrieSet t as compilable Go code to w. It returns
|
||||
// the total number of bytes written and the size of the resulting data structure in bytes.
|
||||
func print(t *colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
update3 := func(nn, sz int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
update2 := func(nn int, e error) { update3(nn, 0, e) }
|
||||
|
||||
update3(printArray(*t, w, name))
|
||||
update2(fmt.Fprintf(w, "var %sContractTrieSet = ", name))
|
||||
update3(printStruct(*t, w, name))
|
||||
update2(fmt.Fprintln(w))
|
||||
return
|
||||
}
|
||||
|
||||
func printArray(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
size = len(ct) * 4
|
||||
p("// %sCTEntries: %d entries, %d bytes\n", name, len(ct), size)
|
||||
p("var %sCTEntries = [%d]struct{L,H,N,I uint8}{\n", name, len(ct))
|
||||
for _, fe := range ct {
|
||||
p("\t{0x%X, 0x%X, %d, %d},\n", fe.L, fe.H, fe.N, fe.I)
|
||||
}
|
||||
p("}\n")
|
||||
return
|
||||
}
|
||||
|
||||
func printStruct(ct colltab.ContractTrieSet, w io.Writer, name string) (n, size int, err error) {
|
||||
n, err = fmt.Fprintf(w, "colltab.ContractTrieSet( %sCTEntries[:] )", name)
|
||||
size = int(reflect.TypeOf(ct).Size())
|
||||
return
|
||||
}
|
266
vendor/golang.org/x/text/collate/build/contract_test.go
generated
vendored
Normal file
266
vendor/golang.org/x/text/collate/build/contract_test.go
generated
vendored
Normal file
|
@ -0,0 +1,266 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
var largetosmall = []stridx{
|
||||
{"a", 5},
|
||||
{"ab", 4},
|
||||
{"abc", 3},
|
||||
{"abcd", 2},
|
||||
{"abcde", 1},
|
||||
{"abcdef", 0},
|
||||
}
|
||||
|
||||
var offsetSortTests = [][]stridx{
|
||||
{
|
||||
{"bcde", 1},
|
||||
{"bc", 5},
|
||||
{"ab", 4},
|
||||
{"bcd", 3},
|
||||
{"abcd", 0},
|
||||
{"abc", 2},
|
||||
},
|
||||
largetosmall,
|
||||
}
|
||||
|
||||
func TestOffsetSort(t *testing.T) {
|
||||
for i, st := range offsetSortTests {
|
||||
sort.Sort(offsetSort(st))
|
||||
for j, si := range st {
|
||||
if j != si.index {
|
||||
t.Errorf("%d: failed: %v", i, st)
|
||||
}
|
||||
}
|
||||
}
|
||||
for i, tt := range genStateTests {
|
||||
// ensure input is well-formed
|
||||
sort.Sort(offsetSort(tt.in))
|
||||
for j, si := range tt.in {
|
||||
if si.index != j+1 {
|
||||
t.Errorf("%dth sort failed: %v", i, tt.in)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var genidxtest1 = []stridx{
|
||||
{"bcde", 3},
|
||||
{"bc", 6},
|
||||
{"ab", 2},
|
||||
{"bcd", 5},
|
||||
{"abcd", 0},
|
||||
{"abc", 1},
|
||||
{"bcdf", 4},
|
||||
}
|
||||
|
||||
var genidxSortTests = [][]stridx{
|
||||
genidxtest1,
|
||||
largetosmall,
|
||||
}
|
||||
|
||||
func TestGenIdxSort(t *testing.T) {
|
||||
for i, st := range genidxSortTests {
|
||||
sort.Sort(genidxSort(st))
|
||||
for j, si := range st {
|
||||
if j != si.index {
|
||||
t.Errorf("%dth sort failed %v", i, st)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var entrySortTests = []colltab.ContractTrieSet{
|
||||
{
|
||||
{10, 0, 1, 3},
|
||||
{99, 0, 1, 0},
|
||||
{20, 50, 0, 2},
|
||||
{30, 0, 1, 1},
|
||||
},
|
||||
}
|
||||
|
||||
func TestEntrySort(t *testing.T) {
|
||||
for i, et := range entrySortTests {
|
||||
sort.Sort(entrySort(et))
|
||||
for j, fe := range et {
|
||||
if j != int(fe.I) {
|
||||
t.Errorf("%dth sort failed %v", i, et)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type GenStateTest struct {
|
||||
in []stridx
|
||||
firstBlockLen int
|
||||
out colltab.ContractTrieSet
|
||||
}
|
||||
|
||||
var genStateTests = []GenStateTest{
|
||||
{[]stridx{
|
||||
{"abc", 1},
|
||||
},
|
||||
1,
|
||||
colltab.ContractTrieSet{
|
||||
{'a', 0, 1, noIndex},
|
||||
{'b', 0, 1, noIndex},
|
||||
{'c', 'c', final, 1},
|
||||
},
|
||||
},
|
||||
{[]stridx{
|
||||
{"abc", 1},
|
||||
{"abd", 2},
|
||||
{"abe", 3},
|
||||
},
|
||||
1,
|
||||
colltab.ContractTrieSet{
|
||||
{'a', 0, 1, noIndex},
|
||||
{'b', 0, 1, noIndex},
|
||||
{'c', 'e', final, 1},
|
||||
},
|
||||
},
|
||||
{[]stridx{
|
||||
{"abc", 1},
|
||||
{"ab", 2},
|
||||
{"a", 3},
|
||||
},
|
||||
1,
|
||||
colltab.ContractTrieSet{
|
||||
{'a', 0, 1, 3},
|
||||
{'b', 0, 1, 2},
|
||||
{'c', 'c', final, 1},
|
||||
},
|
||||
},
|
||||
{[]stridx{
|
||||
{"abc", 1},
|
||||
{"abd", 2},
|
||||
{"ab", 3},
|
||||
{"ac", 4},
|
||||
{"a", 5},
|
||||
{"b", 6},
|
||||
},
|
||||
2,
|
||||
colltab.ContractTrieSet{
|
||||
{'b', 'b', final, 6},
|
||||
{'a', 0, 2, 5},
|
||||
{'c', 'c', final, 4},
|
||||
{'b', 0, 1, 3},
|
||||
{'c', 'd', final, 1},
|
||||
},
|
||||
},
|
||||
{[]stridx{
|
||||
{"bcde", 2},
|
||||
{"bc", 7},
|
||||
{"ab", 6},
|
||||
{"bcd", 5},
|
||||
{"abcd", 1},
|
||||
{"abc", 4},
|
||||
{"bcdf", 3},
|
||||
},
|
||||
2,
|
||||
colltab.ContractTrieSet{
|
||||
{'b', 3, 1, noIndex},
|
||||
{'a', 0, 1, noIndex},
|
||||
{'b', 0, 1, 6},
|
||||
{'c', 0, 1, 4},
|
||||
{'d', 'd', final, 1},
|
||||
{'c', 0, 1, 7},
|
||||
{'d', 0, 1, 5},
|
||||
{'e', 'f', final, 2},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestGenStates(t *testing.T) {
|
||||
for i, tt := range genStateTests {
|
||||
si := []stridx{}
|
||||
for _, e := range tt.in {
|
||||
si = append(si, e)
|
||||
}
|
||||
// ensure input is well-formed
|
||||
sort.Sort(genidxSort(si))
|
||||
ct := colltab.ContractTrieSet{}
|
||||
n, _ := genStates(&ct, si)
|
||||
if nn := tt.firstBlockLen; nn != n {
|
||||
t.Errorf("%d: block len %v; want %v", i, n, nn)
|
||||
}
|
||||
if lv, lw := len(ct), len(tt.out); lv != lw {
|
||||
t.Errorf("%d: len %v; want %v", i, lv, lw)
|
||||
continue
|
||||
}
|
||||
for j, fe := range tt.out {
|
||||
const msg = "%d:%d: value %s=%v; want %v"
|
||||
if fe.L != ct[j].L {
|
||||
t.Errorf(msg, i, j, "l", ct[j].L, fe.L)
|
||||
}
|
||||
if fe.H != ct[j].H {
|
||||
t.Errorf(msg, i, j, "h", ct[j].H, fe.H)
|
||||
}
|
||||
if fe.N != ct[j].N {
|
||||
t.Errorf(msg, i, j, "n", ct[j].N, fe.N)
|
||||
}
|
||||
if fe.I != ct[j].I {
|
||||
t.Errorf(msg, i, j, "i", ct[j].I, fe.I)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupContraction(t *testing.T) {
|
||||
for i, tt := range genStateTests {
|
||||
input := []string{}
|
||||
for _, e := range tt.in {
|
||||
input = append(input, e.str)
|
||||
}
|
||||
cts := colltab.ContractTrieSet{}
|
||||
h, _ := appendTrie(&cts, input)
|
||||
for j, si := range tt.in {
|
||||
str := si.str
|
||||
for _, s := range []string{str, str + "X"} {
|
||||
msg := "%d:%d: %s(%s) %v; want %v"
|
||||
idx, sn := lookup(&cts, h, []byte(s))
|
||||
if idx != si.index {
|
||||
t.Errorf(msg, i, j, "index", s, idx, si.index)
|
||||
}
|
||||
if sn != len(str) {
|
||||
t.Errorf(msg, i, j, "sn", s, sn, len(str))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrintContractionTrieSet(t *testing.T) {
|
||||
testdata := colltab.ContractTrieSet(genStateTests[4].out)
|
||||
buf := &bytes.Buffer{}
|
||||
print(&testdata, buf, "test")
|
||||
if contractTrieOutput != buf.String() {
|
||||
t.Errorf("output differs; found\n%s", buf.String())
|
||||
println(string(buf.Bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
const contractTrieOutput = `// testCTEntries: 8 entries, 32 bytes
|
||||
var testCTEntries = [8]struct{L,H,N,I uint8}{
|
||||
{0x62, 0x3, 1, 255},
|
||||
{0x61, 0x0, 1, 255},
|
||||
{0x62, 0x0, 1, 6},
|
||||
{0x63, 0x0, 1, 4},
|
||||
{0x64, 0x64, 0, 1},
|
||||
{0x63, 0x0, 1, 7},
|
||||
{0x64, 0x0, 1, 5},
|
||||
{0x65, 0x66, 0, 2},
|
||||
}
|
||||
var testContractTrieSet = colltab.ContractTrieSet( testCTEntries[:] )
|
||||
`
|
393
vendor/golang.org/x/text/collate/build/order.go
generated
vendored
Normal file
393
vendor/golang.org/x/text/collate/build/order.go
generated
vendored
Normal file
|
@ -0,0 +1,393 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
type logicalAnchor int
|
||||
|
||||
const (
|
||||
firstAnchor logicalAnchor = -1
|
||||
noAnchor = 0
|
||||
lastAnchor = 1
|
||||
)
|
||||
|
||||
// entry is used to keep track of a single entry in the collation element table
|
||||
// during building. Examples of entries can be found in the Default Unicode
|
||||
// Collation Element Table.
|
||||
// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
|
||||
type entry struct {
|
||||
str string // same as string(runes)
|
||||
runes []rune
|
||||
elems []rawCE // the collation elements
|
||||
extend string // weights of extend to be appended to elems
|
||||
before bool // weights relative to next instead of previous.
|
||||
lock bool // entry is used in extension and can no longer be moved.
|
||||
|
||||
// prev, next, and level are used to keep track of tailorings.
|
||||
prev, next *entry
|
||||
level colltab.Level // next differs at this level
|
||||
skipRemove bool // do not unlink when removed
|
||||
|
||||
decompose bool // can use NFKD decomposition to generate elems
|
||||
exclude bool // do not include in table
|
||||
implicit bool // derived, is not included in the list
|
||||
modified bool // entry was modified in tailoring
|
||||
logical logicalAnchor
|
||||
|
||||
expansionIndex int // used to store index into expansion table
|
||||
contractionHandle ctHandle
|
||||
contractionIndex int // index into contraction elements
|
||||
}
|
||||
|
||||
func (e *entry) String() string {
|
||||
return fmt.Sprintf("%X (%q) -> %X (ch:%x; ci:%d, ei:%d)",
|
||||
e.runes, e.str, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
|
||||
}
|
||||
|
||||
func (e *entry) skip() bool {
|
||||
return e.contraction()
|
||||
}
|
||||
|
||||
func (e *entry) expansion() bool {
|
||||
return !e.decompose && len(e.elems) > 1
|
||||
}
|
||||
|
||||
func (e *entry) contraction() bool {
|
||||
return len(e.runes) > 1
|
||||
}
|
||||
|
||||
func (e *entry) contractionStarter() bool {
|
||||
return e.contractionHandle.n != 0
|
||||
}
|
||||
|
||||
// nextIndexed gets the next entry that needs to be stored in the table.
|
||||
// It returns the entry and the collation level at which the next entry differs
|
||||
// from the current entry.
|
||||
// Entries that can be explicitly derived and logical reset positions are
|
||||
// examples of entries that will not be indexed.
|
||||
func (e *entry) nextIndexed() (*entry, colltab.Level) {
|
||||
level := e.level
|
||||
for e = e.next; e != nil && (e.exclude || len(e.elems) == 0); e = e.next {
|
||||
if e.level < level {
|
||||
level = e.level
|
||||
}
|
||||
}
|
||||
return e, level
|
||||
}
|
||||
|
||||
// remove unlinks entry e from the sorted chain and clears the collation
|
||||
// elements. e may not be at the front or end of the list. This should always
|
||||
// be the case, as the front and end of the list are always logical anchors,
|
||||
// which may not be removed.
|
||||
func (e *entry) remove() {
|
||||
if e.logical != noAnchor {
|
||||
log.Fatalf("may not remove anchor %q", e.str)
|
||||
}
|
||||
// TODO: need to set e.prev.level to e.level if e.level is smaller?
|
||||
e.elems = nil
|
||||
if !e.skipRemove {
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
}
|
||||
}
|
||||
e.skipRemove = false
|
||||
}
|
||||
|
||||
// insertAfter inserts n after e.
|
||||
func (e *entry) insertAfter(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.next = e.next
|
||||
n.prev = e
|
||||
if e.next != nil {
|
||||
e.next.prev = n
|
||||
}
|
||||
e.next = n
|
||||
}
|
||||
|
||||
// insertBefore inserts n before e.
|
||||
func (e *entry) insertBefore(n *entry) {
|
||||
if e == n {
|
||||
panic("e == anchor")
|
||||
}
|
||||
if e == nil {
|
||||
panic("unexpected nil anchor")
|
||||
}
|
||||
n.remove()
|
||||
n.decompose = false // redo decomposition test
|
||||
|
||||
n.prev = e.prev
|
||||
n.next = e
|
||||
if e.prev != nil {
|
||||
e.prev.next = n
|
||||
}
|
||||
e.prev = n
|
||||
}
|
||||
|
||||
func (e *entry) encodeBase() (ce uint32, err error) {
|
||||
switch {
|
||||
case e.expansion():
|
||||
ce, err = makeExpandIndex(e.expansionIndex)
|
||||
default:
|
||||
if e.decompose {
|
||||
log.Fatal("decompose should be handled elsewhere")
|
||||
}
|
||||
ce, err = makeCE(e.elems[0])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (e *entry) encode() (ce uint32, err error) {
|
||||
if e.skip() {
|
||||
log.Fatal("cannot build colElem for entry that should be skipped")
|
||||
}
|
||||
switch {
|
||||
case e.decompose:
|
||||
t1 := e.elems[0].w[2]
|
||||
t2 := 0
|
||||
if len(e.elems) > 1 {
|
||||
t2 = e.elems[1].w[2]
|
||||
}
|
||||
ce, err = makeDecompose(t1, t2)
|
||||
case e.contractionStarter():
|
||||
ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
|
||||
default:
|
||||
if len(e.runes) > 1 {
|
||||
log.Fatal("colElem: contractions are handled in contraction trie")
|
||||
}
|
||||
ce, err = e.encodeBase()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// entryLess returns true if a sorts before b and false otherwise.
|
||||
func entryLess(a, b *entry) bool {
|
||||
if res, _ := compareWeights(a.elems, b.elems); res != 0 {
|
||||
return res == -1
|
||||
}
|
||||
if a.logical != noAnchor {
|
||||
return a.logical == firstAnchor
|
||||
}
|
||||
if b.logical != noAnchor {
|
||||
return b.logical == lastAnchor
|
||||
}
|
||||
return a.str < b.str
|
||||
}
|
||||
|
||||
type sortedEntries []*entry
|
||||
|
||||
func (s sortedEntries) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortedEntries) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s sortedEntries) Less(i, j int) bool {
|
||||
return entryLess(s[i], s[j])
|
||||
}
|
||||
|
||||
type ordering struct {
|
||||
id string
|
||||
entryMap map[string]*entry
|
||||
ordered []*entry
|
||||
handle *trieHandle
|
||||
}
|
||||
|
||||
// insert inserts e into both entryMap and ordered.
|
||||
// Note that insert simply appends e to ordered. To reattain a sorted
|
||||
// order, o.sort() should be called.
|
||||
func (o *ordering) insert(e *entry) {
|
||||
if e.logical == noAnchor {
|
||||
o.entryMap[e.str] = e
|
||||
} else {
|
||||
// Use key format as used in UCA rules.
|
||||
o.entryMap[fmt.Sprintf("[%s]", e.str)] = e
|
||||
// Also add index entry for XML format.
|
||||
o.entryMap[fmt.Sprintf("<%s/>", strings.Replace(e.str, " ", "_", -1))] = e
|
||||
}
|
||||
o.ordered = append(o.ordered, e)
|
||||
}
|
||||
|
||||
// newEntry creates a new entry for the given info and inserts it into
|
||||
// the index.
|
||||
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
|
||||
e := &entry{
|
||||
runes: []rune(s),
|
||||
elems: ces,
|
||||
str: s,
|
||||
}
|
||||
o.insert(e)
|
||||
return e
|
||||
}
|
||||
|
||||
// find looks up and returns the entry for the given string.
|
||||
// It returns nil if str is not in the index and if an implicit value
|
||||
// cannot be derived, that is, if str represents more than one rune.
|
||||
func (o *ordering) find(str string) *entry {
|
||||
e := o.entryMap[str]
|
||||
if e == nil {
|
||||
r := []rune(str)
|
||||
if len(r) == 1 {
|
||||
const (
|
||||
firstHangul = 0xAC00
|
||||
lastHangul = 0xD7A3
|
||||
)
|
||||
if r[0] >= firstHangul && r[0] <= lastHangul {
|
||||
ce := []rawCE{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for _, r := range nfd {
|
||||
ce = append(ce, o.find(string(r)).elems...)
|
||||
}
|
||||
e = o.newEntry(nfd, ce)
|
||||
} else {
|
||||
e = o.newEntry(string(r[0]), []rawCE{
|
||||
{w: []int{
|
||||
implicitPrimary(r[0]),
|
||||
defaultSecondary,
|
||||
defaultTertiary,
|
||||
int(r[0]),
|
||||
},
|
||||
},
|
||||
})
|
||||
e.modified = true
|
||||
}
|
||||
e.exclude = true // do not index implicits
|
||||
}
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// makeRootOrdering returns a newly initialized ordering value and populates
|
||||
// it with a set of logical reset points that can be used as anchors.
|
||||
// The anchors first_tertiary_ignorable and __END__ will always sort at
|
||||
// the beginning and end, respectively. This means that prev and next are non-nil
|
||||
// for any indexed entry.
|
||||
func makeRootOrdering() ordering {
|
||||
const max = unicode.MaxRune
|
||||
o := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
insert := func(typ logicalAnchor, s string, ce []int) {
|
||||
e := &entry{
|
||||
elems: []rawCE{{w: ce}},
|
||||
str: s,
|
||||
exclude: true,
|
||||
logical: typ,
|
||||
}
|
||||
o.insert(e)
|
||||
}
|
||||
insert(firstAnchor, "first tertiary ignorable", []int{0, 0, 0, 0})
|
||||
insert(lastAnchor, "last tertiary ignorable", []int{0, 0, 0, max})
|
||||
insert(lastAnchor, "last primary ignorable", []int{0, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "last non ignorable", []int{maxPrimary, defaultSecondary, defaultTertiary, max})
|
||||
insert(lastAnchor, "__END__", []int{1 << maxPrimaryBits, defaultSecondary, defaultTertiary, max})
|
||||
return o
|
||||
}
|
||||
|
||||
// patchForInsert eleminates entries from the list with more than one collation element.
|
||||
// The next and prev fields of the eliminated entries still point to appropriate
|
||||
// values in the newly created list.
|
||||
// It requires that sort has been called.
|
||||
func (o *ordering) patchForInsert() {
|
||||
for i := 0; i < len(o.ordered)-1; {
|
||||
e := o.ordered[i]
|
||||
lev := e.level
|
||||
n := e.next
|
||||
for ; n != nil && len(n.elems) > 1; n = n.next {
|
||||
if n.level < lev {
|
||||
lev = n.level
|
||||
}
|
||||
n.skipRemove = true
|
||||
}
|
||||
for ; o.ordered[i] != n; i++ {
|
||||
o.ordered[i].level = lev
|
||||
o.ordered[i].next = n
|
||||
o.ordered[i+1].prev = e
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clone copies all ordering of es into a new ordering value.
|
||||
func (o *ordering) clone() *ordering {
|
||||
o.sort()
|
||||
oo := ordering{
|
||||
entryMap: make(map[string]*entry),
|
||||
}
|
||||
for _, e := range o.ordered {
|
||||
ne := &entry{
|
||||
runes: e.runes,
|
||||
elems: e.elems,
|
||||
str: e.str,
|
||||
decompose: e.decompose,
|
||||
exclude: e.exclude,
|
||||
logical: e.logical,
|
||||
}
|
||||
oo.insert(ne)
|
||||
}
|
||||
oo.sort() // link all ordering.
|
||||
oo.patchForInsert()
|
||||
return &oo
|
||||
}
|
||||
|
||||
// front returns the first entry to be indexed.
|
||||
// It assumes that sort() has been called.
|
||||
func (o *ordering) front() *entry {
|
||||
e := o.ordered[0]
|
||||
if e.prev != nil {
|
||||
log.Panicf("unexpected first entry: %v", e)
|
||||
}
|
||||
// The first entry is always a logical position, which should not be indexed.
|
||||
e, _ = e.nextIndexed()
|
||||
return e
|
||||
}
|
||||
|
||||
// sort sorts all ordering based on their collation elements and initializes
|
||||
// the prev, next, and level fields accordingly.
|
||||
func (o *ordering) sort() {
|
||||
sort.Sort(sortedEntries(o.ordered))
|
||||
l := o.ordered
|
||||
for i := 1; i < len(l); i++ {
|
||||
k := i - 1
|
||||
l[k].next = l[i]
|
||||
_, l[k].level = compareWeights(l[k].elems, l[i].elems)
|
||||
l[i].prev = l[k]
|
||||
}
|
||||
}
|
||||
|
||||
// genColElems generates a collation element array from the runes in str. This
|
||||
// assumes that all collation elements have already been added to the Builder.
|
||||
func (o *ordering) genColElems(str string) []rawCE {
|
||||
elems := []rawCE{}
|
||||
for _, r := range []rune(str) {
|
||||
for _, ce := range o.find(string(r)).elems {
|
||||
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
}
|
||||
}
|
||||
return elems
|
||||
}
|
229
vendor/golang.org/x/text/collate/build/order_test.go
generated
vendored
Normal file
229
vendor/golang.org/x/text/collate/build/order_test.go
generated
vendored
Normal file
|
@ -0,0 +1,229 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
type entryTest struct {
|
||||
f func(in []int) (uint32, error)
|
||||
arg []int
|
||||
val uint32
|
||||
}
|
||||
|
||||
// makeList returns a list of entries of length n+2, with n normal
|
||||
// entries plus a leading and trailing anchor.
|
||||
func makeList(n int) []*entry {
|
||||
es := make([]*entry, n+2)
|
||||
weights := []rawCE{{w: []int{100, 20, 5, 0}}}
|
||||
for i := range es {
|
||||
runes := []rune{rune(i)}
|
||||
es[i] = &entry{
|
||||
runes: runes,
|
||||
elems: weights,
|
||||
}
|
||||
weights = nextWeight(colltab.Primary, weights)
|
||||
}
|
||||
for i := 1; i < len(es); i++ {
|
||||
es[i-1].next = es[i]
|
||||
es[i].prev = es[i-1]
|
||||
_, es[i-1].level = compareWeights(es[i-1].elems, es[i].elems)
|
||||
}
|
||||
es[0].exclude = true
|
||||
es[0].logical = firstAnchor
|
||||
es[len(es)-1].exclude = true
|
||||
es[len(es)-1].logical = lastAnchor
|
||||
return es
|
||||
}
|
||||
|
||||
func TestNextIndexed(t *testing.T) {
|
||||
const n = 5
|
||||
es := makeList(n)
|
||||
for i := int64(0); i < 1<<n; i++ {
|
||||
mask := strconv.FormatInt(i+(1<<n), 2)
|
||||
for i, c := range mask {
|
||||
es[i].exclude = c == '1'
|
||||
}
|
||||
e := es[0]
|
||||
for i, c := range mask {
|
||||
if c == '0' {
|
||||
e, _ = e.nextIndexed()
|
||||
if e != es[i] {
|
||||
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
if e, _ = e.nextIndexed(); e != nil {
|
||||
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemove(t *testing.T) {
|
||||
const n = 5
|
||||
for i := int64(0); i < 1<<n; i++ {
|
||||
es := makeList(n)
|
||||
mask := strconv.FormatInt(i+(1<<n), 2)
|
||||
for i, c := range mask {
|
||||
if c == '0' {
|
||||
es[i].remove()
|
||||
}
|
||||
}
|
||||
e := es[0]
|
||||
for i, c := range mask {
|
||||
if c == '1' {
|
||||
if e != es[i] {
|
||||
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
|
||||
}
|
||||
e, _ = e.nextIndexed()
|
||||
}
|
||||
}
|
||||
if e != nil {
|
||||
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nextPerm generates the next permutation of the array. The starting
|
||||
// permutation is assumed to be a list of integers sorted in increasing order.
|
||||
// It returns false if there are no more permuations left.
|
||||
func nextPerm(a []int) bool {
|
||||
i := len(a) - 2
|
||||
for ; i >= 0; i-- {
|
||||
if a[i] < a[i+1] {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
return false
|
||||
}
|
||||
for j := len(a) - 1; j >= i; j-- {
|
||||
if a[j] > a[i] {
|
||||
a[i], a[j] = a[j], a[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
for j := i + 1; j < (len(a)+i+1)/2; j++ {
|
||||
a[j], a[len(a)+i-j] = a[len(a)+i-j], a[j]
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func TestInsertAfter(t *testing.T) {
|
||||
const n = 5
|
||||
orig := makeList(n)
|
||||
perm := make([]int, n)
|
||||
for i := range perm {
|
||||
perm[i] = i + 1
|
||||
}
|
||||
for ok := true; ok; ok = nextPerm(perm) {
|
||||
es := makeList(n)
|
||||
last := es[0]
|
||||
for _, i := range perm {
|
||||
last.insertAfter(es[i])
|
||||
last = es[i]
|
||||
}
|
||||
for _, e := range es {
|
||||
e.elems = es[0].elems
|
||||
}
|
||||
e := es[0]
|
||||
for _, i := range perm {
|
||||
e, _ = e.nextIndexed()
|
||||
if e.runes[0] != orig[i].runes[0] {
|
||||
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsertBefore(t *testing.T) {
|
||||
const n = 5
|
||||
orig := makeList(n)
|
||||
perm := make([]int, n)
|
||||
for i := range perm {
|
||||
perm[i] = i + 1
|
||||
}
|
||||
for ok := true; ok; ok = nextPerm(perm) {
|
||||
es := makeList(n)
|
||||
last := es[len(es)-1]
|
||||
for _, i := range perm {
|
||||
last.insertBefore(es[i])
|
||||
last = es[i]
|
||||
}
|
||||
for _, e := range es {
|
||||
e.elems = es[0].elems
|
||||
}
|
||||
e := es[0]
|
||||
for i := n - 1; i >= 0; i-- {
|
||||
e, _ = e.nextIndexed()
|
||||
if e.runes[0] != rune(perm[i]) {
|
||||
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type entryLessTest struct {
|
||||
a, b *entry
|
||||
res bool
|
||||
}
|
||||
|
||||
var (
|
||||
w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
|
||||
w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
|
||||
)
|
||||
|
||||
var entryLessTests = []entryLessTest{
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "a", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "a", elems: w2},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "b", elems: w1},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w2},
|
||||
&entry{str: "a", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "c", elems: w1},
|
||||
&entry{str: "b", elems: w1},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "a", elems: w1, logical: firstAnchor},
|
||||
&entry{str: "a", elems: w1},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1},
|
||||
&entry{str: "b", elems: w1, logical: firstAnchor},
|
||||
false,
|
||||
},
|
||||
{&entry{str: "b", elems: w1},
|
||||
&entry{str: "a", elems: w1, logical: lastAnchor},
|
||||
true,
|
||||
},
|
||||
{&entry{str: "a", elems: w1, logical: lastAnchor},
|
||||
&entry{str: "c", elems: w1},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
func TestEntryLess(t *testing.T) {
|
||||
for i, tt := range entryLessTests {
|
||||
if res := entryLess(tt.a, tt.b); res != tt.res {
|
||||
t.Errorf("%d: was %v; want %v", i, res, tt.res)
|
||||
}
|
||||
}
|
||||
}
|
81
vendor/golang.org/x/text/collate/build/table.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/collate/build/table.go
generated
vendored
Normal file
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"reflect"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// table is an intermediate structure that roughly resembles the table in collate.
|
||||
type table struct {
|
||||
colltab.Table
|
||||
trie trie
|
||||
root *trieHandle
|
||||
}
|
||||
|
||||
// print writes the table as Go compilable code to w. It prefixes the
|
||||
// variable names with name. It returns the number of bytes written
|
||||
// and the size of the resulting table.
|
||||
func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
|
||||
update := func(nn, sz int, e error) {
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
size += sz
|
||||
}
|
||||
// Write arrays needed for the structure.
|
||||
update(printColElems(w, t.ExpandElem, name+"ExpandElem"))
|
||||
update(printColElems(w, t.ContractElem, name+"ContractElem"))
|
||||
update(t.trie.printArrays(w, name))
|
||||
update(printArray(t.ContractTries, w, name))
|
||||
|
||||
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
|
||||
update(nn, 0, e)
|
||||
return
|
||||
}
|
||||
|
||||
func (t *table) fprintIndex(w io.Writer, h *trieHandle, id string) (n int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
p("\t{ // %s\n", id)
|
||||
p("\t\tlookupOffset: 0x%x,\n", h.lookupStart)
|
||||
p("\t\tvaluesOffset: 0x%x,\n", h.valueStart)
|
||||
p("\t},\n")
|
||||
return
|
||||
}
|
||||
|
||||
func printColElems(w io.Writer, a []uint32, name string) (n, sz int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
sz = len(a) * int(reflect.TypeOf(uint32(0)).Size())
|
||||
p("// %s: %d entries, %d bytes\n", name, len(a), sz)
|
||||
p("var %s = [%d]uint32 {", name, len(a))
|
||||
for i, c := range a {
|
||||
switch {
|
||||
case i%64 == 0:
|
||||
p("\n\t// Block %d, offset 0x%x\n", i/64, i)
|
||||
case (i%64)%6 == 0:
|
||||
p("\n\t")
|
||||
}
|
||||
p("0x%.8X, ", c)
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return
|
||||
}
|
290
vendor/golang.org/x/text/collate/build/trie.go
generated
vendored
Normal file
290
vendor/golang.org/x/text/collate/build/trie.go
generated
vendored
Normal file
|
@ -0,0 +1,290 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The trie in this file is used to associate the first full character
|
||||
// in a UTF-8 string to a collation element.
|
||||
// All but the last byte in a UTF-8 byte sequence are
|
||||
// used to look up offsets in the index table to be used for the next byte.
|
||||
// The last byte is used to index into a table of collation elements.
|
||||
// This file contains the code for the generation of the trie.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
const (
|
||||
blockSize = 64
|
||||
blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
|
||||
)
|
||||
|
||||
type trieHandle struct {
|
||||
lookupStart uint16 // offset in table for first byte
|
||||
valueStart uint16 // offset in table for first byte
|
||||
}
|
||||
|
||||
type trie struct {
|
||||
index []uint16
|
||||
values []uint32
|
||||
}
|
||||
|
||||
// trieNode is the intermediate trie structure used for generating a trie.
|
||||
type trieNode struct {
|
||||
index []*trieNode
|
||||
value []uint32
|
||||
b byte
|
||||
refValue uint16
|
||||
refIndex uint16
|
||||
}
|
||||
|
||||
func newNode() *trieNode {
|
||||
return &trieNode{
|
||||
index: make([]*trieNode, 64),
|
||||
value: make([]uint32, 128), // root node size is 128 instead of 64
|
||||
}
|
||||
}
|
||||
|
||||
func (n *trieNode) isInternal() bool {
|
||||
return n.value != nil
|
||||
}
|
||||
|
||||
func (n *trieNode) insert(r rune, value uint32) {
|
||||
const maskx = 0x3F // mask out two most-significant bits
|
||||
str := string(r)
|
||||
if len(str) == 1 {
|
||||
n.value[str[0]] = value
|
||||
return
|
||||
}
|
||||
for i := 0; i < len(str)-1; i++ {
|
||||
b := str[i] & maskx
|
||||
if n.index == nil {
|
||||
n.index = make([]*trieNode, blockSize)
|
||||
}
|
||||
nn := n.index[b]
|
||||
if nn == nil {
|
||||
nn = &trieNode{}
|
||||
nn.b = b
|
||||
n.index[b] = nn
|
||||
}
|
||||
n = nn
|
||||
}
|
||||
if n.value == nil {
|
||||
n.value = make([]uint32, blockSize)
|
||||
}
|
||||
b := str[len(str)-1] & maskx
|
||||
n.value[b] = value
|
||||
}
|
||||
|
||||
type trieBuilder struct {
|
||||
t *trie
|
||||
|
||||
roots []*trieHandle
|
||||
|
||||
lookupBlocks []*trieNode
|
||||
valueBlocks []*trieNode
|
||||
|
||||
lookupBlockIdx map[uint32]*trieNode
|
||||
valueBlockIdx map[uint32]*trieNode
|
||||
}
|
||||
|
||||
func newTrieBuilder() *trieBuilder {
|
||||
index := &trieBuilder{}
|
||||
index.lookupBlocks = make([]*trieNode, 0)
|
||||
index.valueBlocks = make([]*trieNode, 0)
|
||||
index.lookupBlockIdx = make(map[uint32]*trieNode)
|
||||
index.valueBlockIdx = make(map[uint32]*trieNode)
|
||||
// The third nil is the default null block. The other two blocks
|
||||
// are used to guarantee an offset of at least 3 for each block.
|
||||
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
|
||||
index.t = &trie{}
|
||||
return index
|
||||
}
|
||||
|
||||
func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
|
||||
hasher := fnv.New32()
|
||||
if n.index != nil {
|
||||
for i, nn := range n.index {
|
||||
var vi, vv uint16
|
||||
if nn != nil {
|
||||
nn = b.computeOffsets(nn)
|
||||
n.index[i] = nn
|
||||
vi = nn.refIndex
|
||||
vv = nn.refValue
|
||||
}
|
||||
hasher.Write([]byte{byte(vi >> 8), byte(vi)})
|
||||
hasher.Write([]byte{byte(vv >> 8), byte(vv)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.lookupBlockIdx[h]
|
||||
if !ok {
|
||||
n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
|
||||
b.lookupBlocks = append(b.lookupBlocks, n)
|
||||
b.lookupBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
} else {
|
||||
for _, v := range n.value {
|
||||
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.valueBlockIdx[h]
|
||||
if !ok {
|
||||
n.refValue = uint16(len(b.valueBlocks)) - blockOffset
|
||||
n.refIndex = n.refValue
|
||||
b.valueBlocks = append(b.valueBlocks, n)
|
||||
b.valueBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
|
||||
hasher := fnv.New32()
|
||||
for _, v := range n.value[:2*blockSize] {
|
||||
hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
|
||||
}
|
||||
h := hasher.Sum32()
|
||||
nn, ok := b.valueBlockIdx[h]
|
||||
if !ok {
|
||||
n.refValue = uint16(len(b.valueBlocks))
|
||||
n.refIndex = n.refValue
|
||||
b.valueBlocks = append(b.valueBlocks, n)
|
||||
// Add a dummy block to accommodate the double block size.
|
||||
b.valueBlocks = append(b.valueBlocks, nil)
|
||||
b.valueBlockIdx[h] = n
|
||||
} else {
|
||||
n = nn
|
||||
}
|
||||
return n.refValue
|
||||
}
|
||||
|
||||
func genValueBlock(t *trie, n *trieNode) {
|
||||
if n != nil {
|
||||
for _, v := range n.value {
|
||||
t.values = append(t.values, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func genLookupBlock(t *trie, n *trieNode) {
|
||||
for _, nn := range n.index {
|
||||
v := uint16(0)
|
||||
if nn != nil {
|
||||
if n.index != nil {
|
||||
v = nn.refIndex
|
||||
} else {
|
||||
v = nn.refValue
|
||||
}
|
||||
}
|
||||
t.index = append(t.index, v)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
|
||||
h := &trieHandle{}
|
||||
b.roots = append(b.roots, h)
|
||||
h.valueStart = b.addStartValueBlock(n)
|
||||
if len(b.roots) == 1 {
|
||||
// We insert a null block after the first start value block.
|
||||
// This ensures that continuation bytes UTF-8 sequences of length
|
||||
// greater than 2 will automatically hit a null block if there
|
||||
// was an undefined entry.
|
||||
b.valueBlocks = append(b.valueBlocks, nil)
|
||||
}
|
||||
n = b.computeOffsets(n)
|
||||
// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
|
||||
h.lookupStart = n.refIndex - 1
|
||||
return h
|
||||
}
|
||||
|
||||
// generate generates and returns the trie for n.
|
||||
func (b *trieBuilder) generate() (t *trie, err error) {
|
||||
t = b.t
|
||||
if len(b.valueBlocks) >= 1<<16 {
|
||||
return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
|
||||
}
|
||||
if len(b.lookupBlocks) >= 1<<16 {
|
||||
return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
|
||||
}
|
||||
genValueBlock(t, b.valueBlocks[0])
|
||||
genValueBlock(t, &trieNode{value: make([]uint32, 64)})
|
||||
for i := 2; i < len(b.valueBlocks); i++ {
|
||||
genValueBlock(t, b.valueBlocks[i])
|
||||
}
|
||||
n := &trieNode{index: make([]*trieNode, 64)}
|
||||
genLookupBlock(t, n)
|
||||
genLookupBlock(t, n)
|
||||
genLookupBlock(t, n)
|
||||
for i := 3; i < len(b.lookupBlocks); i++ {
|
||||
genLookupBlock(t, b.lookupBlocks[i])
|
||||
}
|
||||
return b.t, nil
|
||||
}
|
||||
|
||||
func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
n += nn
|
||||
if err == nil {
|
||||
err = e
|
||||
}
|
||||
}
|
||||
nv := len(t.values)
|
||||
p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
|
||||
p("// Block 2 is the null block.\n")
|
||||
p("var %sValues = [%d]uint32 {", name, nv)
|
||||
var printnewline bool
|
||||
for i, v := range t.values {
|
||||
if i%blockSize == 0 {
|
||||
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
|
||||
}
|
||||
if i%4 == 0 {
|
||||
printnewline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if printnewline {
|
||||
p("\n\t")
|
||||
printnewline = false
|
||||
}
|
||||
p("%#04x:%#08x, ", i, v)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
ni := len(t.index)
|
||||
p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
|
||||
p("// Block 0 is the null block.\n")
|
||||
p("var %sLookup = [%d]uint16 {", name, ni)
|
||||
printnewline = false
|
||||
for i, v := range t.index {
|
||||
if i%blockSize == 0 {
|
||||
p("\n\t// Block %#x, offset %#x", i/blockSize, i)
|
||||
}
|
||||
if i%8 == 0 {
|
||||
printnewline = true
|
||||
}
|
||||
if v != 0 {
|
||||
if printnewline {
|
||||
p("\n\t")
|
||||
printnewline = false
|
||||
}
|
||||
p("%#03x:%#02x, ", i, v)
|
||||
}
|
||||
}
|
||||
p("\n}\n\n")
|
||||
return n, nv*4 + ni*2, err
|
||||
}
|
||||
|
||||
func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
|
||||
const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
|
||||
n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
|
||||
sz += int(reflect.TypeOf(trie{}).Size())
|
||||
return
|
||||
}
|
107
vendor/golang.org/x/text/collate/build/trie_test.go
generated
vendored
Normal file
107
vendor/golang.org/x/text/collate/build/trie_test.go
generated
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package build
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// We take the smallest, largest and an arbitrary value for each
|
||||
// of the UTF-8 sequence lengths.
|
||||
var testRunes = []rune{
|
||||
0x01, 0x0C, 0x7F, // 1-byte sequences
|
||||
0x80, 0x100, 0x7FF, // 2-byte sequences
|
||||
0x800, 0x999, 0xFFFF, // 3-byte sequences
|
||||
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
|
||||
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
|
||||
}
|
||||
|
||||
func makeTestTrie(t *testing.T) trie {
|
||||
n := newNode()
|
||||
for i, r := range testRunes {
|
||||
n.insert(r, uint32(i))
|
||||
}
|
||||
idx := newTrieBuilder()
|
||||
idx.addTrie(n)
|
||||
tr, err := idx.generate()
|
||||
if err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
return *tr
|
||||
}
|
||||
|
||||
func TestGenerateTrie(t *testing.T) {
|
||||
testdata := makeTestTrie(t)
|
||||
buf := &bytes.Buffer{}
|
||||
testdata.printArrays(buf, "test")
|
||||
fmt.Fprintf(buf, "var testTrie = ")
|
||||
testdata.printStruct(buf, &trieHandle{19, 0}, "test")
|
||||
if output != buf.String() {
|
||||
t.Error("output differs")
|
||||
}
|
||||
}
|
||||
|
||||
var output = `// testValues: 832 entries, 3328 bytes
|
||||
// Block 2 is the null block.
|
||||
var testValues = [832]uint32 {
|
||||
// Block 0x0, offset 0x0
|
||||
0x000c:0x00000001,
|
||||
// Block 0x1, offset 0x40
|
||||
0x007f:0x00000002,
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0x00c0:0x00000003,
|
||||
// Block 0x4, offset 0x100
|
||||
0x0100:0x00000004,
|
||||
// Block 0x5, offset 0x140
|
||||
0x0140:0x0000000c, 0x0141:0x0000000d, 0x0142:0x0000000e,
|
||||
0x0150:0x0000000f,
|
||||
0x0155:0x00000010,
|
||||
// Block 0x6, offset 0x180
|
||||
0x01bf:0x00000005,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x01c0:0x00000006,
|
||||
// Block 0x8, offset 0x200
|
||||
0x0219:0x00000007,
|
||||
// Block 0x9, offset 0x240
|
||||
0x027f:0x00000008,
|
||||
// Block 0xa, offset 0x280
|
||||
0x0280:0x00000009,
|
||||
// Block 0xb, offset 0x2c0
|
||||
0x02c1:0x0000000a,
|
||||
// Block 0xc, offset 0x300
|
||||
0x033f:0x0000000b,
|
||||
}
|
||||
|
||||
// testLookup: 640 entries, 1280 bytes
|
||||
// Block 0 is the null block.
|
||||
var testLookup = [640]uint16 {
|
||||
// Block 0x0, offset 0x0
|
||||
// Block 0x1, offset 0x40
|
||||
// Block 0x2, offset 0x80
|
||||
// Block 0x3, offset 0xc0
|
||||
0x0e0:0x05, 0x0e6:0x06,
|
||||
// Block 0x4, offset 0x100
|
||||
0x13f:0x07,
|
||||
// Block 0x5, offset 0x140
|
||||
0x140:0x08, 0x144:0x09,
|
||||
// Block 0x6, offset 0x180
|
||||
0x190:0x03,
|
||||
// Block 0x7, offset 0x1c0
|
||||
0x1ff:0x0a,
|
||||
// Block 0x8, offset 0x200
|
||||
0x20f:0x05,
|
||||
// Block 0x9, offset 0x240
|
||||
0x242:0x01, 0x244:0x02,
|
||||
0x248:0x03,
|
||||
0x25f:0x04,
|
||||
0x260:0x01,
|
||||
0x26f:0x02,
|
||||
0x270:0x04, 0x274:0x06,
|
||||
}
|
||||
|
||||
var testTrie = trie{ testLookup[1216:], testValues[0:], testLookup[:], testValues[:]}`
|
403
vendor/golang.org/x/text/collate/collate.go
generated
vendored
Normal file
403
vendor/golang.org/x/text/collate/collate.go
generated
vendored
Normal file
|
@ -0,0 +1,403 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// TODO: remove hard-coded versions when we have implemented fractional weights.
|
||||
// The current implementation is incompatible with later CLDR versions.
|
||||
//go:generate go run maketables.go -cldr=23 -unicode=6.2.0
|
||||
|
||||
// Package collate contains types for comparing and sorting Unicode strings
|
||||
// according to a given collation order.
|
||||
package collate // import "golang.org/x/text/collate"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// Collator provides functionality for comparing strings for a given
|
||||
// collation order.
|
||||
type Collator struct {
|
||||
options
|
||||
|
||||
sorter sorter
|
||||
|
||||
_iter [2]iter
|
||||
}
|
||||
|
||||
func (c *Collator) iter(i int) *iter {
|
||||
// TODO: evaluate performance for making the second iterator optional.
|
||||
return &c._iter[i]
|
||||
}
|
||||
|
||||
// Supported returns the list of languages for which collating differs from its parent.
|
||||
func Supported() []language.Tag {
|
||||
// TODO: use language.Coverage instead.
|
||||
|
||||
t := make([]language.Tag, len(tags))
|
||||
copy(t, tags)
|
||||
return t
|
||||
}
|
||||
|
||||
func init() {
|
||||
ids := strings.Split(availableLocales, ",")
|
||||
tags = make([]language.Tag, len(ids))
|
||||
for i, s := range ids {
|
||||
tags[i] = language.Raw.MustParse(s)
|
||||
}
|
||||
}
|
||||
|
||||
var tags []language.Tag
|
||||
|
||||
// New returns a new Collator initialized for the given locale.
|
||||
func New(t language.Tag, o ...Option) *Collator {
|
||||
index := colltab.MatchLang(t, tags)
|
||||
c := newCollator(getTable(locales[index]))
|
||||
|
||||
// Set options from the user-supplied tag.
|
||||
c.setFromTag(t)
|
||||
|
||||
// Set the user-supplied options.
|
||||
c.setOptions(o)
|
||||
|
||||
c.init()
|
||||
return c
|
||||
}
|
||||
|
||||
// NewFromTable returns a new Collator for the given Weighter.
|
||||
func NewFromTable(w colltab.Weighter, o ...Option) *Collator {
|
||||
c := newCollator(w)
|
||||
c.setOptions(o)
|
||||
c.init()
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Collator) init() {
|
||||
if c.numeric {
|
||||
c.t = colltab.NewNumericWeighter(c.t)
|
||||
}
|
||||
c._iter[0].init(c)
|
||||
c._iter[1].init(c)
|
||||
}
|
||||
|
||||
// Buffer holds keys generated by Key and KeyString.
|
||||
type Buffer struct {
|
||||
buf [4096]byte
|
||||
key []byte
|
||||
}
|
||||
|
||||
func (b *Buffer) init() {
|
||||
if b.key == nil {
|
||||
b.key = b.buf[:0]
|
||||
}
|
||||
}
|
||||
|
||||
// Reset clears the buffer from previous results generated by Key and KeyString.
|
||||
func (b *Buffer) Reset() {
|
||||
b.key = b.key[:0]
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing the two byte slices.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
func (c *Collator) Compare(a, b []byte) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).SetInput(a)
|
||||
c.iter(1).SetInput(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Identity] {
|
||||
return bytes.Compare(a, b)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// CompareString returns an integer comparing the two strings.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
func (c *Collator) CompareString(a, b string) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).SetInputString(a)
|
||||
c.iter(1).SetInputString(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Identity] {
|
||||
if a < b {
|
||||
return -1
|
||||
} else if a > b {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func compareLevel(f func(i *iter) int, a, b *iter) int {
|
||||
a.pce = 0
|
||||
b.pce = 0
|
||||
for {
|
||||
va := f(a)
|
||||
vb := f(b)
|
||||
if va != vb {
|
||||
if va < vb {
|
||||
return -1
|
||||
}
|
||||
return 1
|
||||
} else if va == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *Collator) compare() int {
|
||||
ia, ib := c.iter(0), c.iter(1)
|
||||
// Process primary level
|
||||
if c.alternate != altShifted {
|
||||
// TODO: implement script reordering
|
||||
if res := compareLevel((*iter).nextPrimary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
} else {
|
||||
// TODO: handle shifted
|
||||
}
|
||||
if !c.ignore[colltab.Secondary] {
|
||||
f := (*iter).nextSecondary
|
||||
if c.backwards {
|
||||
f = (*iter).prevSecondary
|
||||
}
|
||||
if res := compareLevel(f, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
}
|
||||
// TODO: special case handling (Danish?)
|
||||
if !c.ignore[colltab.Tertiary] || c.caseLevel {
|
||||
if res := compareLevel((*iter).nextTertiary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
if !c.ignore[colltab.Quaternary] {
|
||||
if res := compareLevel((*iter).nextQuaternary, ia, ib); res != 0 {
|
||||
return res
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Key returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will remain
|
||||
// valid until the next call to buf.Reset().
|
||||
func (c *Collator) Key(buf *Buffer, str []byte) []byte {
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
return c.key(buf, c.getColElems(str))
|
||||
}
|
||||
|
||||
// KeyFromString returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will retain
|
||||
// valid until the next call to buf.ResetKeys().
|
||||
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
|
||||
// See http://www.unicode.org/reports/tr10/#Main_Algorithm for more details.
|
||||
buf.init()
|
||||
return c.key(buf, c.getColElemsString(str))
|
||||
}
|
||||
|
||||
func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
|
||||
processWeights(c.alternate, c.t.Top(), w)
|
||||
kn := len(buf.key)
|
||||
c.keyFromElems(buf, w)
|
||||
return buf.key[kn:]
|
||||
}
|
||||
|
||||
func (c *Collator) getColElems(str []byte) []colltab.Elem {
|
||||
i := c.iter(0)
|
||||
i.SetInput(str)
|
||||
for i.Next() {
|
||||
}
|
||||
return i.Elems
|
||||
}
|
||||
|
||||
func (c *Collator) getColElemsString(str string) []colltab.Elem {
|
||||
i := c.iter(0)
|
||||
i.SetInputString(str)
|
||||
for i.Next() {
|
||||
}
|
||||
return i.Elems
|
||||
}
|
||||
|
||||
type iter struct {
|
||||
wa [512]colltab.Elem
|
||||
|
||||
colltab.Iter
|
||||
pce int
|
||||
}
|
||||
|
||||
func (i *iter) init(c *Collator) {
|
||||
i.Weighter = c.t
|
||||
i.Elems = i.wa[:0]
|
||||
}
|
||||
|
||||
func (i *iter) nextPrimary() int {
|
||||
for {
|
||||
for ; i.pce < i.N; i.pce++ {
|
||||
if v := i.Elems[i.pce].Primary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
if !i.Next() {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
func (i *iter) nextSecondary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) prevSecondary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[len(i.Elems)-i.pce-1].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) nextTertiary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Tertiary(); v != 0 {
|
||||
i.pce++
|
||||
return int(v)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (i *iter) nextQuaternary() int {
|
||||
for ; i.pce < len(i.Elems); i.pce++ {
|
||||
if v := i.Elems[i.pce].Quaternary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func appendPrimary(key []byte, p int) []byte {
|
||||
// Convert to variable length encoding; supports up to 23 bits.
|
||||
if p <= 0x7FFF {
|
||||
key = append(key, uint8(p>>8), uint8(p))
|
||||
} else {
|
||||
key = append(key, uint8(p>>16)|0x80, uint8(p>>8), uint8(p))
|
||||
}
|
||||
return key
|
||||
}
|
||||
|
||||
// keyFromElems converts the weights ws to a compact sequence of bytes.
|
||||
// The result will be appended to the byte buffer in buf.
|
||||
func (c *Collator) keyFromElems(buf *Buffer, ws []colltab.Elem) {
|
||||
for _, v := range ws {
|
||||
if w := v.Primary(); w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
if !c.ignore[colltab.Secondary] {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
|
||||
if !c.backwards {
|
||||
for _, v := range ws {
|
||||
if w := v.Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := len(ws) - 1; i >= 0; i-- {
|
||||
if w := ws[i].Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if c.caseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
}
|
||||
if !c.ignore[colltab.Tertiary] || c.caseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Tertiary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w))
|
||||
}
|
||||
}
|
||||
// Derive the quaternary weights from the options and other levels.
|
||||
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
|
||||
// representation of a primary weight is always smaller than 0xFF,
|
||||
// so using this single byte value will compare correctly.
|
||||
if !c.ignore[colltab.Quaternary] && c.alternate >= altShifted {
|
||||
if c.alternate == altShiftTrimmed {
|
||||
lastNonFFFF := len(buf.key)
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Quaternary(); w == colltab.MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
lastNonFFFF = len(buf.key)
|
||||
}
|
||||
}
|
||||
buf.key = buf.key[:lastNonFFFF]
|
||||
} else {
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.Quaternary(); w == colltab.MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func processWeights(vw alternateHandling, top uint32, wa []colltab.Elem) {
|
||||
ignore := false
|
||||
vtop := int(top)
|
||||
switch vw {
|
||||
case altShifted, altShiftTrimmed:
|
||||
for i := range wa {
|
||||
if p := wa[i].Primary(); p <= vtop && p != 0 {
|
||||
wa[i] = colltab.MakeQuaternary(p)
|
||||
ignore = true
|
||||
} else if p == 0 {
|
||||
if ignore {
|
||||
wa[i] = colltab.Ignore
|
||||
}
|
||||
} else {
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
case altBlanked:
|
||||
for i := range wa {
|
||||
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
|
||||
wa[i] = colltab.Ignore
|
||||
ignore = true
|
||||
} else {
|
||||
ignore = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
482
vendor/golang.org/x/text/collate/collate_test.go
generated
vendored
Normal file
482
vendor/golang.org/x/text/collate/collate_test.go
generated
vendored
Normal file
|
@ -0,0 +1,482 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
type weightsTest struct {
|
||||
opt opts
|
||||
in, out ColElems
|
||||
}
|
||||
|
||||
type opts struct {
|
||||
lev int
|
||||
alt alternateHandling
|
||||
top int
|
||||
|
||||
backwards bool
|
||||
caseLevel bool
|
||||
}
|
||||
|
||||
// ignore returns an initialized boolean array based on the given Level.
|
||||
// A negative value means using the default setting of quaternary.
|
||||
func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
|
||||
if level < 0 {
|
||||
level = colltab.Quaternary
|
||||
}
|
||||
for i := range ignore {
|
||||
ignore[i] = level < colltab.Level(i)
|
||||
}
|
||||
return ignore
|
||||
}
|
||||
|
||||
func makeCE(w []int) colltab.Elem {
|
||||
ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return ce
|
||||
}
|
||||
|
||||
func (o opts) collator() *Collator {
|
||||
c := &Collator{
|
||||
options: options{
|
||||
ignore: ignore(colltab.Level(o.lev - 1)),
|
||||
alternate: o.alt,
|
||||
backwards: o.backwards,
|
||||
caseLevel: o.caseLevel,
|
||||
variableTop: uint32(o.top),
|
||||
},
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
const (
|
||||
maxQ = 0x1FFFFF
|
||||
)
|
||||
|
||||
func wpq(p, q int) Weights {
|
||||
return W(p, defaults.Secondary, defaults.Tertiary, q)
|
||||
}
|
||||
|
||||
func wsq(s, q int) Weights {
|
||||
return W(0, s, defaults.Tertiary, q)
|
||||
}
|
||||
|
||||
func wq(q int) Weights {
|
||||
return W(0, 0, 0, q)
|
||||
}
|
||||
|
||||
var zero = W(0, 0, 0, 0)
|
||||
|
||||
var processTests = []weightsTest{
|
||||
// Shifted
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: altShifted, top: 100},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: altShifted, top: 250},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: altShifted, top: 999},
|
||||
in: ColElems{W(1000), W(200), W(300), W(400)},
|
||||
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: altShifted, top: 999},
|
||||
in: ColElems{W(0, 10), W(1000)},
|
||||
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: altShifted, top: 250},
|
||||
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: altShifted, top: 250},
|
||||
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: altShifted, top: 250},
|
||||
in: ColElems{W(200), zero, W(300), zero, W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
|
||||
},
|
||||
|
||||
// ShiftTrimmed (same as Shifted)
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: altShiftTrimmed, top: 100},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: altShiftTrimmed, top: 250},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: altShiftTrimmed, top: 999},
|
||||
in: ColElems{W(1000), W(200), W(300), W(400)},
|
||||
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: altShiftTrimmed, top: 999},
|
||||
in: ColElems{W(0, 10), W(1000)},
|
||||
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: altShiftTrimmed, top: 250},
|
||||
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: altShiftTrimmed, top: 250},
|
||||
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: altShiftTrimmed, top: 250},
|
||||
in: ColElems{W(200), zero, W(300), zero, W(400)},
|
||||
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
|
||||
},
|
||||
|
||||
// Blanked
|
||||
{ // simple sequence of non-variables
|
||||
opt: opts{alt: altBlanked, top: 100},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{W(200), W(300), W(400)},
|
||||
},
|
||||
{ // first is a variable
|
||||
opt: opts{alt: altBlanked, top: 250},
|
||||
in: ColElems{W(200), W(300), W(400)},
|
||||
out: ColElems{zero, W(300), W(400)},
|
||||
},
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: altBlanked, top: 999},
|
||||
in: ColElems{W(1000), W(200), W(300), W(400)},
|
||||
out: ColElems{W(1000), zero, zero, zero},
|
||||
},
|
||||
{ // first is a modifier
|
||||
opt: opts{alt: altBlanked, top: 999},
|
||||
in: ColElems{W(0, 10), W(1000)},
|
||||
out: ColElems{W(0, 10), W(1000)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: altBlanked, top: 250},
|
||||
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
|
||||
out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: altBlanked, top: 250},
|
||||
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
|
||||
out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: altBlanked, top: 250},
|
||||
in: ColElems{W(200), zero, W(300), zero, W(400)},
|
||||
out: ColElems{zero, zero, W(300), zero, W(400)},
|
||||
},
|
||||
|
||||
// Non-ignorable: input is always equal to output.
|
||||
{ // all but first are variable
|
||||
opt: opts{alt: altNonIgnorable, top: 999},
|
||||
in: ColElems{W(1000), W(200), W(300), W(400)},
|
||||
out: ColElems{W(1000), W(200), W(300), W(400)},
|
||||
},
|
||||
{ // primary ignorables
|
||||
opt: opts{alt: altNonIgnorable, top: 250},
|
||||
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
|
||||
out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
|
||||
},
|
||||
{ // secondary ignorables
|
||||
opt: opts{alt: altNonIgnorable, top: 250},
|
||||
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
|
||||
out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
|
||||
},
|
||||
{ // tertiary ignorables, no change
|
||||
opt: opts{alt: altNonIgnorable, top: 250},
|
||||
in: ColElems{W(200), zero, W(300), zero, W(400)},
|
||||
out: ColElems{W(200), zero, W(300), zero, W(400)},
|
||||
},
|
||||
}
|
||||
|
||||
func TestProcessWeights(t *testing.T) {
|
||||
for i, tt := range processTests {
|
||||
in := convertFromWeights(tt.in)
|
||||
out := convertFromWeights(tt.out)
|
||||
processWeights(tt.opt.alt, uint32(tt.opt.top), in)
|
||||
for j, w := range in {
|
||||
if w != out[j] {
|
||||
t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type keyFromElemTest struct {
|
||||
opt opts
|
||||
in ColElems
|
||||
out []byte
|
||||
}
|
||||
|
||||
var defS = byte(defaults.Secondary)
|
||||
var defT = byte(defaults.Tertiary)
|
||||
|
||||
const sep = 0 // separator byte
|
||||
|
||||
var keyFromElemTests = []keyFromElemTest{
|
||||
{ // simple primary and secondary weights.
|
||||
opts{alt: altShifted},
|
||||
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, but with zero element that need to be removed
|
||||
opts{alt: altShifted},
|
||||
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, with large primary values
|
||||
opts{alt: altShifted},
|
||||
ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
|
||||
[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, but with the secondary level backwards
|
||||
opts{alt: altShifted, backwards: true},
|
||||
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring quaternary level
|
||||
opts{alt: altShifted, lev: 3},
|
||||
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring tertiary level
|
||||
opts{alt: altShifted, lev: 2},
|
||||
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
},
|
||||
},
|
||||
{ // same as first, ignoring secondary level
|
||||
opts{alt: altShifted, lev: 1},
|
||||
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
|
||||
},
|
||||
{ // simple primary and secondary weights.
|
||||
opts{alt: altShiftTrimmed, top: 0x250},
|
||||
ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
|
||||
[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
|
||||
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
sep, 0xFF, 0x2, 0, // quaternary
|
||||
},
|
||||
},
|
||||
{ // as first, primary with case level enabled
|
||||
opts{alt: altShifted, lev: 1, caseLevel: true},
|
||||
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
|
||||
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
|
||||
sep, sep, // secondary
|
||||
sep, sep, defT, defT, defT, defT, // tertiary
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestKeyFromElems(t *testing.T) {
|
||||
buf := Buffer{}
|
||||
for i, tt := range keyFromElemTests {
|
||||
buf.Reset()
|
||||
in := convertFromWeights(tt.in)
|
||||
processWeights(tt.opt.alt, uint32(tt.opt.top), in)
|
||||
tt.opt.collator().keyFromElems(&buf, in)
|
||||
res := buf.key
|
||||
if len(res) != len(tt.out) {
|
||||
t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
|
||||
}
|
||||
n := len(res)
|
||||
if len(tt.out) < n {
|
||||
n = len(tt.out)
|
||||
}
|
||||
for j, c := range res[:n] {
|
||||
if c != tt.out[j] {
|
||||
t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetColElems(t *testing.T) {
|
||||
for i, tt := range appendNextTests {
|
||||
c, err := makeTable(tt.in)
|
||||
if err != nil {
|
||||
// error is reported in TestAppendNext
|
||||
continue
|
||||
}
|
||||
// Create one large test per table
|
||||
str := make([]byte, 0, 4000)
|
||||
out := ColElems{}
|
||||
for len(str) < 3000 {
|
||||
for _, chk := range tt.chk {
|
||||
str = append(str, chk.in[:chk.n]...)
|
||||
out = append(out, chk.out...)
|
||||
}
|
||||
}
|
||||
for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
|
||||
out := convertFromWeights(chk.out)
|
||||
ce := c.getColElems([]byte(chk.in)[:chk.n])
|
||||
if len(ce) != len(out) {
|
||||
t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
|
||||
continue
|
||||
}
|
||||
cnt := 0
|
||||
for k, w := range ce {
|
||||
w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
|
||||
if w != out[k] {
|
||||
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
|
||||
cnt++
|
||||
}
|
||||
if cnt > 10 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type keyTest struct {
|
||||
in string
|
||||
out []byte
|
||||
}
|
||||
|
||||
var keyTests = []keyTest{
|
||||
{"abc",
|
||||
[]byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
|
||||
},
|
||||
{"a\u0301",
|
||||
[]byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
|
||||
},
|
||||
{"aaaaa",
|
||||
[]byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
|
||||
0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
|
||||
2, 2, 2, 2, 2, 0,
|
||||
255, 255, 255, 255, 255,
|
||||
},
|
||||
},
|
||||
// Issue 16391: incomplete rune at end of UTF-8 sequence.
|
||||
{"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
|
||||
{"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
|
||||
}
|
||||
|
||||
func TestKey(t *testing.T) {
|
||||
c, _ := makeTable(appendNextTests[4].in)
|
||||
c.alternate = altShifted
|
||||
c.ignore = ignore(colltab.Quaternary)
|
||||
buf := Buffer{}
|
||||
keys1 := [][]byte{}
|
||||
keys2 := [][]byte{}
|
||||
for _, tt := range keyTests {
|
||||
keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
|
||||
keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
|
||||
}
|
||||
// Separate generation from testing to ensure buffers are not overwritten.
|
||||
for i, tt := range keyTests {
|
||||
if !bytes.Equal(keys1[i], tt.out) {
|
||||
t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
|
||||
}
|
||||
if !bytes.Equal(keys2[i], tt.out) {
|
||||
t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type compareTest struct {
|
||||
a, b string
|
||||
res int // comparison result
|
||||
}
|
||||
|
||||
var compareTests = []compareTest{
|
||||
{"a\u0301", "a", 1},
|
||||
{"a\u0301b", "ab", 1},
|
||||
{"a", "a\u0301", -1},
|
||||
{"ab", "a\u0301b", -1},
|
||||
{"bc", "a\u0301c", 1},
|
||||
{"ab", "aB", -1},
|
||||
{"a\u0301", "a\u0301", 0},
|
||||
{"a", "a", 0},
|
||||
// Only clip prefixes of whole runes.
|
||||
{"\u302E", "\u302F", 1},
|
||||
// Don't clip prefixes when last rune of prefix may be part of contraction.
|
||||
{"a\u035E", "a\u0301\u035F", -1},
|
||||
{"a\u0301\u035Fb", "a\u0301\u035F", -1},
|
||||
}
|
||||
|
||||
func TestCompare(t *testing.T) {
|
||||
c, _ := makeTable(appendNextTests[4].in)
|
||||
for i, tt := range compareTests {
|
||||
if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
|
||||
t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
|
||||
}
|
||||
if res := c.CompareString(tt.a, tt.b); res != tt.res {
|
||||
t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNumeric(t *testing.T) {
|
||||
c := New(language.English, Loose, Numeric)
|
||||
|
||||
for i, tt := range []struct {
|
||||
a, b string
|
||||
want int
|
||||
}{
|
||||
{"1", "2", -1},
|
||||
{"2", "12", -1},
|
||||
{"2", "12", -1}, // Fullwidth is sorted as usual.
|
||||
{"₂", "₁₂", 1}, // Subscript is not sorted as numbers.
|
||||
{"②", "①②", 1}, // Circled is not sorted as numbers.
|
||||
{ // Imperial Aramaic, is not sorted as number.
|
||||
"\U00010859",
|
||||
"\U00010858\U00010859",
|
||||
1,
|
||||
},
|
||||
{"12", "2", 1},
|
||||
{"A-1", "A-2", -1},
|
||||
{"A-2", "A-12", -1},
|
||||
{"A-12", "A-2", 1},
|
||||
{"A-0001", "A-1", 0},
|
||||
} {
|
||||
if got := c.CompareString(tt.a, tt.b); got != tt.want {
|
||||
t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
51
vendor/golang.org/x/text/collate/export_test.go
generated
vendored
Normal file
51
vendor/golang.org/x/text/collate/export_test.go
generated
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
// Export for testing.
|
||||
// TODO: no longer necessary. Remove at some point.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
)
|
||||
|
||||
type Weights struct {
|
||||
Primary, Secondary, Tertiary, Quaternary int
|
||||
}
|
||||
|
||||
func W(ce ...int) Weights {
|
||||
w := Weights{ce[0], defaultSecondary, defaultTertiary, 0}
|
||||
if len(ce) > 1 {
|
||||
w.Secondary = ce[1]
|
||||
}
|
||||
if len(ce) > 2 {
|
||||
w.Tertiary = ce[2]
|
||||
}
|
||||
if len(ce) > 3 {
|
||||
w.Quaternary = ce[3]
|
||||
}
|
||||
return w
|
||||
}
|
||||
func (w Weights) String() string {
|
||||
return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
|
||||
}
|
||||
|
||||
func convertFromWeights(ws []Weights) []colltab.Elem {
|
||||
out := make([]colltab.Elem, len(ws))
|
||||
for i, w := range ws {
|
||||
out[i], _ = colltab.MakeElem(w.Primary, w.Secondary, w.Tertiary, 0)
|
||||
if out[i] == colltab.Ignore && w.Quaternary > 0 {
|
||||
out[i] = colltab.MakeQuaternary(w.Quaternary)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
32
vendor/golang.org/x/text/collate/index.go
generated
vendored
Normal file
32
vendor/golang.org/x/text/collate/index.go
generated
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import "golang.org/x/text/internal/colltab"
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
func getTable(t tableIndex) *colltab.Table {
|
||||
return &colltab.Table{
|
||||
Index: colltab.Trie{
|
||||
Index0: mainLookup[:][blockSize*t.lookupOffset:],
|
||||
Values0: mainValues[:][blockSize*t.valuesOffset:],
|
||||
Index: mainLookup[:],
|
||||
Values: mainValues[:],
|
||||
},
|
||||
ExpandElem: mainExpandElem[:],
|
||||
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
|
||||
ContractElem: mainContractElem[:],
|
||||
MaxContractLen: 18,
|
||||
VariableTop: varTop,
|
||||
}
|
||||
}
|
||||
|
||||
// tableIndex holds information for constructing a table
|
||||
// for a certain locale based on the main table.
|
||||
type tableIndex struct {
|
||||
lookupOffset uint32
|
||||
valuesOffset uint32
|
||||
}
|
553
vendor/golang.org/x/text/collate/maketables.go
generated
vendored
Normal file
553
vendor/golang.org/x/text/collate/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,553 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Collation table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test", false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
short = flag.Bool("short", false, `Use "short" alternatives, when available.`)
|
||||
draft = flag.Bool("draft", false, `Use draft versions, when available.`)
|
||||
tags = flag.String("tags", "", "build tags to be included after +build directive")
|
||||
pkg = flag.String("package", "collate",
|
||||
"the name of the package in which the generated file is to be included")
|
||||
|
||||
tables = flagStringSetAllowAll("tables", "collate", "collate,chars",
|
||||
"comma-spearated list of tables to generate.")
|
||||
exclude = flagStringSet("exclude", "zh2", "",
|
||||
"comma-separated list of languages to exclude.")
|
||||
include = flagStringSet("include", "", "",
|
||||
"comma-separated list of languages to include. Include trumps exclude.")
|
||||
// TODO: Not included: unihan gb2312han zhuyin big5han (for size reasons)
|
||||
// TODO: Not included: traditional (buggy for Bengali)
|
||||
types = flagStringSetAllowAll("types", "standard,phonebook,phonetic,reformed,pinyin,stroke", "",
|
||||
"comma-separated list of types that should be included.")
|
||||
)
|
||||
|
||||
// stringSet implements an ordered set based on a list. It implements flag.Value
|
||||
// to allow a set to be specified as a comma-separated list.
|
||||
type stringSet struct {
|
||||
s []string
|
||||
allowed *stringSet
|
||||
dirty bool // needs compaction if true
|
||||
all bool
|
||||
allowAll bool
|
||||
}
|
||||
|
||||
func flagStringSet(name, def, allowed, usage string) *stringSet {
|
||||
ss := &stringSet{}
|
||||
if allowed != "" {
|
||||
usage += fmt.Sprintf(" (allowed values: any of %s)", allowed)
|
||||
ss.allowed = &stringSet{}
|
||||
failOnError(ss.allowed.Set(allowed))
|
||||
}
|
||||
ss.Set(def)
|
||||
flag.Var(ss, name, usage)
|
||||
return ss
|
||||
}
|
||||
|
||||
func flagStringSetAllowAll(name, def, allowed, usage string) *stringSet {
|
||||
ss := &stringSet{allowAll: true}
|
||||
if allowed == "" {
|
||||
flag.Var(ss, name, usage+fmt.Sprintf(` Use "all" to select all.`))
|
||||
} else {
|
||||
ss.allowed = &stringSet{}
|
||||
failOnError(ss.allowed.Set(allowed))
|
||||
flag.Var(ss, name, usage+fmt.Sprintf(` (allowed values: "all" or any of %s)`, allowed))
|
||||
}
|
||||
ss.Set(def)
|
||||
return ss
|
||||
}
|
||||
|
||||
func (ss stringSet) Len() int {
|
||||
return len(ss.s)
|
||||
}
|
||||
|
||||
func (ss stringSet) String() string {
|
||||
return strings.Join(ss.s, ",")
|
||||
}
|
||||
|
||||
func (ss *stringSet) Set(s string) error {
|
||||
if ss.allowAll && s == "all" {
|
||||
ss.s = nil
|
||||
ss.all = true
|
||||
return nil
|
||||
}
|
||||
ss.s = ss.s[:0]
|
||||
for _, s := range strings.Split(s, ",") {
|
||||
if s := strings.TrimSpace(s); s != "" {
|
||||
if ss.allowed != nil && !ss.allowed.contains(s) {
|
||||
return fmt.Errorf("unsupported value %q; must be one of %s", s, ss.allowed)
|
||||
}
|
||||
ss.add(s)
|
||||
}
|
||||
}
|
||||
ss.compact()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ss *stringSet) add(s string) {
|
||||
ss.s = append(ss.s, s)
|
||||
ss.dirty = true
|
||||
}
|
||||
|
||||
func (ss *stringSet) values() []string {
|
||||
ss.compact()
|
||||
return ss.s
|
||||
}
|
||||
|
||||
func (ss *stringSet) contains(s string) bool {
|
||||
if ss.all {
|
||||
return true
|
||||
}
|
||||
for _, v := range ss.s {
|
||||
if v == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ss *stringSet) compact() {
|
||||
if !ss.dirty {
|
||||
return
|
||||
}
|
||||
a := ss.s
|
||||
sort.Strings(a)
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
a[k+1] = a[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
ss.s = a[:k+1]
|
||||
ss.dirty = false
|
||||
}
|
||||
|
||||
func skipLang(l string) bool {
|
||||
if include.Len() > 0 {
|
||||
return !include.contains(l)
|
||||
}
|
||||
return exclude.contains(l)
|
||||
}
|
||||
|
||||
// altInclude returns a list of alternatives (for the LDML alt attribute)
|
||||
// in order of preference. An empty string in this list indicates the
|
||||
// default entry.
|
||||
func altInclude() []string {
|
||||
l := []string{}
|
||||
if *short {
|
||||
l = append(l, "short")
|
||||
}
|
||||
l = append(l, "")
|
||||
// TODO: handle draft using cldr.SetDraftLevel
|
||||
if *draft {
|
||||
l = append(l, "proposed")
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
func failOnError(e error) {
|
||||
if e != nil {
|
||||
log.Panic(e)
|
||||
}
|
||||
}
|
||||
|
||||
func openArchive() *zip.Reader {
|
||||
f := gen.OpenCLDRCoreZip()
|
||||
buffer, err := ioutil.ReadAll(f)
|
||||
f.Close()
|
||||
failOnError(err)
|
||||
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
|
||||
failOnError(err)
|
||||
return archive
|
||||
}
|
||||
|
||||
// parseUCA parses a Default Unicode Collation Element Table of the format
|
||||
// specified in http://www.unicode.org/reports/tr10/#File_Format.
|
||||
// It returns the variable top.
|
||||
func parseUCA(builder *build.Builder) {
|
||||
var r io.ReadCloser
|
||||
var err error
|
||||
for _, f := range openArchive().File {
|
||||
if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
|
||||
r, err = f.Open()
|
||||
}
|
||||
}
|
||||
if r == nil {
|
||||
log.Fatal("File allkeys_CLDR.txt not found in archive.")
|
||||
}
|
||||
failOnError(err)
|
||||
defer r.Close()
|
||||
scanner := bufio.NewScanner(r)
|
||||
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
|
||||
for i := 1; scanner.Scan(); i++ {
|
||||
line := scanner.Text()
|
||||
if len(line) == 0 || line[0] == '#' {
|
||||
continue
|
||||
}
|
||||
if line[0] == '@' {
|
||||
// parse properties
|
||||
switch {
|
||||
case strings.HasPrefix(line[1:], "version "):
|
||||
a := strings.Split(line[1:], " ")
|
||||
if a[1] != gen.UnicodeVersion() {
|
||||
log.Fatalf("incompatible version %s; want %s", a[1], gen.UnicodeVersion())
|
||||
}
|
||||
case strings.HasPrefix(line[1:], "backwards "):
|
||||
log.Fatalf("%d: unsupported option backwards", i)
|
||||
default:
|
||||
log.Printf("%d: unknown option %s", i, line[1:])
|
||||
}
|
||||
} else {
|
||||
// parse entries
|
||||
part := strings.Split(line, " ; ")
|
||||
if len(part) != 2 {
|
||||
log.Fatalf("%d: production rule without ';': %v", i, line)
|
||||
}
|
||||
lhs := []rune{}
|
||||
for _, v := range strings.Split(part[0], " ") {
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
lhs = append(lhs, rune(convHex(i, v)))
|
||||
}
|
||||
var n int
|
||||
var vars []int
|
||||
rhs := [][]int{}
|
||||
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
|
||||
n += len(m[0])
|
||||
elem := []int{}
|
||||
for _, h := range strings.Split(m[2], ".") {
|
||||
elem = append(elem, convHex(i, h))
|
||||
}
|
||||
if m[1] == "*" {
|
||||
vars = append(vars, i)
|
||||
}
|
||||
rhs = append(rhs, elem)
|
||||
}
|
||||
if len(part[1]) < n+3 || part[1][n+1] != '#' {
|
||||
log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
|
||||
}
|
||||
if *test {
|
||||
testInput.add(string(lhs))
|
||||
}
|
||||
failOnError(builder.Add(lhs, rhs, vars))
|
||||
}
|
||||
}
|
||||
if scanner.Err() != nil {
|
||||
log.Fatal(scanner.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func convHex(line int, s string) int {
|
||||
r, e := strconv.ParseInt(s, 16, 32)
|
||||
if e != nil {
|
||||
log.Fatalf("%d: %v", line, e)
|
||||
}
|
||||
return int(r)
|
||||
}
|
||||
|
||||
var testInput = stringSet{}
|
||||
|
||||
var charRe = regexp.MustCompile(`&#x([0-9A-F]*);`)
|
||||
var tagRe = regexp.MustCompile(`<([a-z_]*) */>`)
|
||||
|
||||
var mainLocales = []string{}
|
||||
|
||||
// charsets holds a list of exemplar characters per category.
|
||||
type charSets map[string][]string
|
||||
|
||||
func (p charSets) fprint(w io.Writer) {
|
||||
fmt.Fprintln(w, "[exN]string{")
|
||||
for i, k := range []string{"", "contractions", "punctuation", "auxiliary", "currencySymbol", "index"} {
|
||||
if set := p[k]; len(set) != 0 {
|
||||
fmt.Fprintf(w, "\t\t%d: %q,\n", i, strings.Join(set, " "))
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "\t},")
|
||||
}
|
||||
|
||||
var localeChars = make(map[string]charSets)
|
||||
|
||||
const exemplarHeader = `
|
||||
type exemplarType int
|
||||
const (
|
||||
exCharacters exemplarType = iota
|
||||
exContractions
|
||||
exPunctuation
|
||||
exAuxiliary
|
||||
exCurrency
|
||||
exIndex
|
||||
exN
|
||||
)
|
||||
`
|
||||
|
||||
func printExemplarCharacters(w io.Writer) {
|
||||
fmt.Fprintln(w, exemplarHeader)
|
||||
fmt.Fprintln(w, "var exemplarCharacters = map[string][exN]string{")
|
||||
for _, loc := range mainLocales {
|
||||
fmt.Fprintf(w, "\t%q: ", loc)
|
||||
localeChars[loc].fprint(w)
|
||||
}
|
||||
fmt.Fprintln(w, "}")
|
||||
}
|
||||
|
||||
func decodeCLDR(d *cldr.Decoder) *cldr.CLDR {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
data, err := d.DecodeZip(r)
|
||||
failOnError(err)
|
||||
return data
|
||||
}
|
||||
|
||||
// parseMain parses XML files in the main directory of the CLDR core.zip file.
|
||||
func parseMain() {
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("main")
|
||||
d.SetSectionFilter("characters")
|
||||
data := decodeCLDR(d)
|
||||
for _, loc := range data.Locales() {
|
||||
x := data.RawLDML(loc)
|
||||
if skipLang(x.Identity.Language.Type) {
|
||||
continue
|
||||
}
|
||||
if x.Characters != nil {
|
||||
x, _ = data.LDML(loc)
|
||||
loc = language.Make(loc).String()
|
||||
for _, ec := range x.Characters.ExemplarCharacters {
|
||||
if ec.Draft != "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := localeChars[loc]; !ok {
|
||||
mainLocales = append(mainLocales, loc)
|
||||
localeChars[loc] = make(charSets)
|
||||
}
|
||||
localeChars[loc][ec.Type] = parseCharacters(ec.Data())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseCharacters(chars string) []string {
|
||||
parseSingle := func(s string) (r rune, tail string, escaped bool) {
|
||||
if s[0] == '\\' {
|
||||
return rune(s[1]), s[2:], true
|
||||
}
|
||||
r, sz := utf8.DecodeRuneInString(s)
|
||||
return r, s[sz:], false
|
||||
}
|
||||
chars = strings.TrimSpace(chars)
|
||||
if n := len(chars) - 1; chars[n] == ']' && chars[0] == '[' {
|
||||
chars = chars[1:n]
|
||||
}
|
||||
list := []string{}
|
||||
var r, last, end rune
|
||||
for len(chars) > 0 {
|
||||
if chars[0] == '{' { // character sequence
|
||||
buf := []rune{}
|
||||
for chars = chars[1:]; len(chars) > 0; {
|
||||
r, chars, _ = parseSingle(chars)
|
||||
if r == '}' {
|
||||
break
|
||||
}
|
||||
if r == ' ' {
|
||||
log.Fatalf("space not supported in sequence %q", chars)
|
||||
}
|
||||
buf = append(buf, r)
|
||||
}
|
||||
list = append(list, string(buf))
|
||||
last = 0
|
||||
} else { // single character
|
||||
escaped := false
|
||||
r, chars, escaped = parseSingle(chars)
|
||||
if r != ' ' {
|
||||
if r == '-' && !escaped {
|
||||
if last == 0 {
|
||||
log.Fatal("'-' should be preceded by a character")
|
||||
}
|
||||
end, chars, _ = parseSingle(chars)
|
||||
for ; last <= end; last++ {
|
||||
list = append(list, string(last))
|
||||
}
|
||||
last = 0
|
||||
} else {
|
||||
list = append(list, string(r))
|
||||
last = r
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
var fileRe = regexp.MustCompile(`.*/collation/(.*)\.xml`)
|
||||
|
||||
// typeMap translates legacy type keys to their BCP47 equivalent.
|
||||
var typeMap = map[string]string{
|
||||
"phonebook": "phonebk",
|
||||
"traditional": "trad",
|
||||
}
|
||||
|
||||
// parseCollation parses XML files in the collation directory of the CLDR core.zip file.
|
||||
func parseCollation(b *build.Builder) {
|
||||
d := &cldr.Decoder{}
|
||||
d.SetDirFilter("collation")
|
||||
data := decodeCLDR(d)
|
||||
for _, loc := range data.Locales() {
|
||||
x, err := data.LDML(loc)
|
||||
failOnError(err)
|
||||
if skipLang(x.Identity.Language.Type) {
|
||||
continue
|
||||
}
|
||||
cs := x.Collations.Collation
|
||||
sl := cldr.MakeSlice(&cs)
|
||||
if len(types.s) == 0 {
|
||||
sl.SelectAnyOf("type", x.Collations.Default())
|
||||
} else if !types.all {
|
||||
sl.SelectAnyOf("type", types.s...)
|
||||
}
|
||||
sl.SelectOnePerGroup("alt", altInclude())
|
||||
|
||||
for _, c := range cs {
|
||||
id, err := language.Parse(loc)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "invalid locale: %q", err)
|
||||
continue
|
||||
}
|
||||
// Support both old- and new-style defaults.
|
||||
d := c.Type
|
||||
if x.Collations.DefaultCollation == nil {
|
||||
d = x.Collations.Default()
|
||||
} else {
|
||||
d = x.Collations.DefaultCollation.Data()
|
||||
}
|
||||
// We assume tables are being built either for search or collation,
|
||||
// but not both. For search the default is always "search".
|
||||
if d != c.Type && c.Type != "search" {
|
||||
typ := c.Type
|
||||
if len(c.Type) > 8 {
|
||||
typ = typeMap[c.Type]
|
||||
}
|
||||
id, err = id.SetTypeForKey("co", typ)
|
||||
failOnError(err)
|
||||
}
|
||||
t := b.Tailoring(id)
|
||||
c.Process(processor{t})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type processor struct {
|
||||
t *build.Tailoring
|
||||
}
|
||||
|
||||
func (p processor) Reset(anchor string, before int) (err error) {
|
||||
if before != 0 {
|
||||
err = p.t.SetAnchorBefore(anchor)
|
||||
} else {
|
||||
err = p.t.SetAnchor(anchor)
|
||||
}
|
||||
failOnError(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p processor) Insert(level int, str, context, extend string) error {
|
||||
str = context + str
|
||||
if *test {
|
||||
testInput.add(str)
|
||||
}
|
||||
// TODO: mimic bug in old maketables: remove.
|
||||
err := p.t.Insert(colltab.Level(level-1), str, context+extend)
|
||||
failOnError(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p processor) Index(id string) {
|
||||
}
|
||||
|
||||
func testCollator(c *collate.Collator) {
|
||||
c0 := collate.New(language.Und)
|
||||
|
||||
// iterator over all characters for all locales and check
|
||||
// whether Key is equal.
|
||||
buf := collate.Buffer{}
|
||||
|
||||
// Add all common and not too uncommon runes to the test set.
|
||||
for i := rune(0); i < 0x30000; i++ {
|
||||
testInput.add(string(i))
|
||||
}
|
||||
for i := rune(0xE0000); i < 0xF0000; i++ {
|
||||
testInput.add(string(i))
|
||||
}
|
||||
for _, str := range testInput.values() {
|
||||
k0 := c0.KeyFromString(&buf, str)
|
||||
k := c.KeyFromString(&buf, str)
|
||||
if !bytes.Equal(k0, k) {
|
||||
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
|
||||
}
|
||||
buf.Reset()
|
||||
}
|
||||
fmt.Println("PASS")
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
b := build.NewBuilder()
|
||||
parseUCA(b)
|
||||
if tables.contains("chars") {
|
||||
parseMain()
|
||||
}
|
||||
parseCollation(b)
|
||||
|
||||
c, err := b.Build()
|
||||
failOnError(err)
|
||||
|
||||
if *test {
|
||||
testCollator(collate.NewFromTable(c))
|
||||
} else {
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
if tables.contains("collate") {
|
||||
_, err = b.Print(w)
|
||||
failOnError(err)
|
||||
}
|
||||
if tables.contains("chars") {
|
||||
printExemplarCharacters(w)
|
||||
}
|
||||
gen.WriteGoFile("tables.go", *pkg, w.Bytes())
|
||||
}
|
||||
}
|
239
vendor/golang.org/x/text/collate/option.go
generated
vendored
Normal file
239
vendor/golang.org/x/text/collate/option.go
generated
vendored
Normal file
|
@ -0,0 +1,239 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// newCollator creates a new collator with default options configured.
|
||||
func newCollator(t colltab.Weighter) *Collator {
|
||||
// Initialize a collator with default options.
|
||||
c := &Collator{
|
||||
options: options{
|
||||
ignore: [colltab.NumLevels]bool{
|
||||
colltab.Quaternary: true,
|
||||
colltab.Identity: true,
|
||||
},
|
||||
f: norm.NFD,
|
||||
t: t,
|
||||
},
|
||||
}
|
||||
|
||||
// TODO: store vt in tags or remove.
|
||||
c.variableTop = t.Top()
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// An Option is used to change the behavior of a Collator. Options override the
|
||||
// settings passed through the locale identifier.
|
||||
type Option struct {
|
||||
priority int
|
||||
f func(o *options)
|
||||
}
|
||||
|
||||
type prioritizedOptions []Option
|
||||
|
||||
func (p prioritizedOptions) Len() int {
|
||||
return len(p)
|
||||
}
|
||||
|
||||
func (p prioritizedOptions) Swap(i, j int) {
|
||||
p[i], p[j] = p[j], p[i]
|
||||
}
|
||||
|
||||
func (p prioritizedOptions) Less(i, j int) bool {
|
||||
return p[i].priority < p[j].priority
|
||||
}
|
||||
|
||||
type options struct {
|
||||
// ignore specifies which levels to ignore.
|
||||
ignore [colltab.NumLevels]bool
|
||||
|
||||
// caseLevel is true if there is an additional level of case matching
|
||||
// between the secondary and tertiary levels.
|
||||
caseLevel bool
|
||||
|
||||
// backwards specifies the order of sorting at the secondary level.
|
||||
// This option exists predominantly to support reverse sorting of accents in French.
|
||||
backwards bool
|
||||
|
||||
// numeric specifies whether any sequence of decimal digits (category is Nd)
|
||||
// is sorted at a primary level with its numeric value.
|
||||
// For example, "A-21" < "A-123".
|
||||
// This option is set by wrapping the main Weighter with NewNumericWeighter.
|
||||
numeric bool
|
||||
|
||||
// alternate specifies an alternative handling of variables.
|
||||
alternate alternateHandling
|
||||
|
||||
// variableTop is the largest primary value that is considered to be
|
||||
// variable.
|
||||
variableTop uint32
|
||||
|
||||
t colltab.Weighter
|
||||
|
||||
f norm.Form
|
||||
}
|
||||
|
||||
func (o *options) setOptions(opts []Option) {
|
||||
sort.Sort(prioritizedOptions(opts))
|
||||
for _, x := range opts {
|
||||
x.f(o)
|
||||
}
|
||||
}
|
||||
|
||||
// OptionsFromTag extracts the BCP47 collation options from the tag and
|
||||
// configures a collator accordingly. These options are set before any other
|
||||
// option.
|
||||
func OptionsFromTag(t language.Tag) Option {
|
||||
return Option{0, func(o *options) {
|
||||
o.setFromTag(t)
|
||||
}}
|
||||
}
|
||||
|
||||
func (o *options) setFromTag(t language.Tag) {
|
||||
o.caseLevel = ldmlBool(t, o.caseLevel, "kc")
|
||||
o.backwards = ldmlBool(t, o.backwards, "kb")
|
||||
o.numeric = ldmlBool(t, o.numeric, "kn")
|
||||
|
||||
// Extract settings from the BCP47 u extension.
|
||||
switch t.TypeForKey("ks") { // strength
|
||||
case "level1":
|
||||
o.ignore[colltab.Secondary] = true
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
case "level2":
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
case "level3", "":
|
||||
// The default.
|
||||
case "level4":
|
||||
o.ignore[colltab.Quaternary] = false
|
||||
case "identic":
|
||||
o.ignore[colltab.Quaternary] = false
|
||||
o.ignore[colltab.Identity] = false
|
||||
}
|
||||
|
||||
switch t.TypeForKey("ka") {
|
||||
case "shifted":
|
||||
o.alternate = altShifted
|
||||
// The following two types are not official BCP47, but we support them to
|
||||
// give access to this otherwise hidden functionality. The name blanked is
|
||||
// derived from the LDML name blanked and posix reflects the main use of
|
||||
// the shift-trimmed option.
|
||||
case "blanked":
|
||||
o.alternate = altBlanked
|
||||
case "posix":
|
||||
o.alternate = altShiftTrimmed
|
||||
}
|
||||
|
||||
// TODO: caseFirst ("kf"), reorder ("kr"), and maybe variableTop ("vt").
|
||||
|
||||
// Not used:
|
||||
// - normalization ("kk", not necessary for this implementation)
|
||||
// - hiraganaQuatenary ("kh", obsolete)
|
||||
}
|
||||
|
||||
func ldmlBool(t language.Tag, old bool, key string) bool {
|
||||
switch t.TypeForKey(key) {
|
||||
case "true":
|
||||
return true
|
||||
case "false":
|
||||
return false
|
||||
default:
|
||||
return old
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// IgnoreCase sets case-insensitive comparison.
|
||||
IgnoreCase Option = ignoreCase
|
||||
ignoreCase = Option{3, ignoreCaseF}
|
||||
|
||||
// IgnoreDiacritics causes diacritical marks to be ignored. ("o" == "ö").
|
||||
IgnoreDiacritics Option = ignoreDiacritics
|
||||
ignoreDiacritics = Option{3, ignoreDiacriticsF}
|
||||
|
||||
// IgnoreWidth causes full-width characters to match their half-width
|
||||
// equivalents.
|
||||
IgnoreWidth Option = ignoreWidth
|
||||
ignoreWidth = Option{2, ignoreWidthF}
|
||||
|
||||
// Loose sets the collator to ignore diacritics, case and weight.
|
||||
Loose Option = loose
|
||||
loose = Option{4, looseF}
|
||||
|
||||
// Force ordering if strings are equivalent but not equal.
|
||||
Force Option = force
|
||||
force = Option{5, forceF}
|
||||
|
||||
// Numeric specifies that numbers should sort numerically ("2" < "12").
|
||||
Numeric Option = numeric
|
||||
numeric = Option{5, numericF}
|
||||
)
|
||||
|
||||
func ignoreWidthF(o *options) {
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
o.caseLevel = true
|
||||
}
|
||||
|
||||
func ignoreDiacriticsF(o *options) {
|
||||
o.ignore[colltab.Secondary] = true
|
||||
}
|
||||
|
||||
func ignoreCaseF(o *options) {
|
||||
o.ignore[colltab.Tertiary] = true
|
||||
o.caseLevel = false
|
||||
}
|
||||
|
||||
func looseF(o *options) {
|
||||
ignoreWidthF(o)
|
||||
ignoreDiacriticsF(o)
|
||||
ignoreCaseF(o)
|
||||
}
|
||||
|
||||
func forceF(o *options) {
|
||||
o.ignore[colltab.Identity] = false
|
||||
}
|
||||
|
||||
func numericF(o *options) { o.numeric = true }
|
||||
|
||||
// Reorder overrides the pre-defined ordering of scripts and character sets.
|
||||
func Reorder(s ...string) Option {
|
||||
// TODO: need fractional weights to implement this.
|
||||
panic("TODO: implement")
|
||||
}
|
||||
|
||||
// TODO: consider making these public again. These options cannot be fully
|
||||
// specified in BCP47, so an API interface seems warranted. Still a higher-level
|
||||
// interface would be nice (e.g. a POSIX option for enabling altShiftTrimmed)
|
||||
|
||||
// alternateHandling identifies the various ways in which variables are handled.
|
||||
// A rune with a primary weight lower than the variable top is considered a
|
||||
// variable.
|
||||
// See http://www.unicode.org/reports/tr10/#Variable_Weighting for details.
|
||||
type alternateHandling int
|
||||
|
||||
const (
|
||||
// altNonIgnorable turns off special handling of variables.
|
||||
altNonIgnorable alternateHandling = iota
|
||||
|
||||
// altBlanked sets variables and all subsequent primary ignorables to be
|
||||
// ignorable at all levels. This is identical to removing all variables
|
||||
// and subsequent primary ignorables from the input.
|
||||
altBlanked
|
||||
|
||||
// altShifted sets variables to be ignorable for levels one through three and
|
||||
// adds a fourth level based on the values of the ignored levels.
|
||||
altShifted
|
||||
|
||||
// altShiftTrimmed is a slight variant of altShifted that is used to
|
||||
// emulate POSIX.
|
||||
altShiftTrimmed
|
||||
)
|
209
vendor/golang.org/x/text/collate/option_test.go
generated
vendored
Normal file
209
vendor/golang.org/x/text/collate/option_test.go
generated
vendored
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package collate
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultIgnore = ignore(colltab.Tertiary)
|
||||
defaultTable = getTable(locales[0])
|
||||
)
|
||||
|
||||
func TestOptions(t *testing.T) {
|
||||
for i, tt := range []struct {
|
||||
in []Option
|
||||
out options
|
||||
}{
|
||||
0: {
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
},
|
||||
},
|
||||
1: {
|
||||
in: []Option{IgnoreDiacritics},
|
||||
out: options{
|
||||
ignore: [colltab.NumLevels]bool{false, true, false, true, true},
|
||||
},
|
||||
},
|
||||
2: {
|
||||
in: []Option{IgnoreCase, IgnoreDiacritics},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
},
|
||||
},
|
||||
3: {
|
||||
in: []Option{ignoreDiacritics, IgnoreWidth},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
caseLevel: true,
|
||||
},
|
||||
},
|
||||
4: {
|
||||
in: []Option{IgnoreWidth, ignoreDiacritics},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
caseLevel: true,
|
||||
},
|
||||
},
|
||||
5: {
|
||||
in: []Option{IgnoreCase, IgnoreWidth},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Secondary),
|
||||
},
|
||||
},
|
||||
6: {
|
||||
in: []Option{IgnoreCase, IgnoreWidth, Loose},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
},
|
||||
},
|
||||
7: {
|
||||
in: []Option{Force, IgnoreCase, IgnoreWidth, Loose},
|
||||
out: options{
|
||||
ignore: [colltab.NumLevels]bool{false, true, true, true, false},
|
||||
},
|
||||
},
|
||||
8: {
|
||||
in: []Option{IgnoreDiacritics, IgnoreCase},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
},
|
||||
},
|
||||
9: {
|
||||
in: []Option{Numeric},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
numeric: true,
|
||||
},
|
||||
},
|
||||
10: {
|
||||
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-level1"))},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Primary),
|
||||
},
|
||||
},
|
||||
11: {
|
||||
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-level4"))},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Quaternary),
|
||||
},
|
||||
},
|
||||
12: {
|
||||
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-identic"))},
|
||||
out: options{},
|
||||
},
|
||||
13: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
caseLevel: true,
|
||||
backwards: true,
|
||||
numeric: true,
|
||||
},
|
||||
},
|
||||
14: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
|
||||
OptionsFromTag(language.MustParse("und-u-kn-false-kb-false-kc-false")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
},
|
||||
},
|
||||
15: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
|
||||
OptionsFromTag(language.MustParse("und-u-kn-foo-kb-foo-kc-foo")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
caseLevel: true,
|
||||
backwards: true,
|
||||
numeric: true,
|
||||
},
|
||||
},
|
||||
16: { // Normal options take precedence over tag options.
|
||||
in: []Option{
|
||||
Numeric, IgnoreCase,
|
||||
OptionsFromTag(language.MustParse("und-u-kn-false-kc-true")),
|
||||
},
|
||||
out: options{
|
||||
ignore: ignore(colltab.Secondary),
|
||||
caseLevel: false,
|
||||
numeric: true,
|
||||
},
|
||||
},
|
||||
17: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-ka-shifted")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
alternate: altShifted,
|
||||
},
|
||||
},
|
||||
18: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-ka-blanked")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
alternate: altBlanked,
|
||||
},
|
||||
},
|
||||
19: {
|
||||
in: []Option{
|
||||
OptionsFromTag(language.MustParse("und-u-ka-posix")),
|
||||
},
|
||||
out: options{
|
||||
ignore: defaultIgnore,
|
||||
alternate: altShiftTrimmed,
|
||||
},
|
||||
},
|
||||
} {
|
||||
c := newCollator(defaultTable)
|
||||
c.t = nil
|
||||
c.variableTop = 0
|
||||
c.f = 0
|
||||
|
||||
c.setOptions(tt.in)
|
||||
if !reflect.DeepEqual(c.options, tt.out) {
|
||||
t.Errorf("%d: got %v; want %v", i, c.options, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlternateSortTypes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
lang string
|
||||
in []string
|
||||
want []string
|
||||
}{{
|
||||
lang: "zh,cmn,zh-Hant-u-co-pinyin,zh-HK-u-co-pinyin,zh-pinyin",
|
||||
in: []string{"爸爸", "妈妈", "儿子", "女儿"},
|
||||
want: []string{"爸爸", "儿子", "妈妈", "女儿"},
|
||||
}, {
|
||||
lang: "zh-Hant,zh-u-co-stroke,zh-Hant-u-co-stroke",
|
||||
in: []string{"爸爸", "妈妈", "儿子", "女儿"},
|
||||
want: []string{"儿子", "女儿", "妈妈", "爸爸"},
|
||||
}}
|
||||
for _, tc := range testCases {
|
||||
for _, tag := range strings.Split(tc.lang, ",") {
|
||||
got := append([]string{}, tc.in...)
|
||||
New(language.MustParse(tag)).SortStrings(got)
|
||||
if !reflect.DeepEqual(got, tc.want) {
|
||||
t.Errorf("New(%s).SortStrings(%v) = %v; want %v", tag, tc.in, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
230
vendor/golang.org/x/text/collate/reg_test.go
generated
vendored
Normal file
230
vendor/golang.org/x/text/collate/reg_test.go
generated
vendored
Normal file
|
@ -0,0 +1,230 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"path"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
var long = flag.Bool("long", false,
|
||||
"run time-consuming tests, such as tests that fetch data online")
|
||||
|
||||
// This regression test runs tests for the test files in CollationTest.zip
|
||||
// (taken from http://www.unicode.org/Public/UCA/<gen.UnicodeVersion()>/).
|
||||
//
|
||||
// The test files have the following form:
|
||||
// # header
|
||||
// 0009 0021; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 025E]
|
||||
// 0009 003F; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 0263]
|
||||
// 000A 0021; # ('\u000A') <LINE FEED (LF)> [| | | 0202 025E]
|
||||
// 000A 003F; # ('\u000A') <LINE FEED (LF)> [| | | 0202 0263]
|
||||
//
|
||||
// The part before the semicolon is the hex representation of a sequence
|
||||
// of runes. After the hash mark is a comment. The strings
|
||||
// represented by rune sequence are in the file in sorted order, as
|
||||
// defined by the DUCET.
|
||||
|
||||
type Test struct {
|
||||
name string
|
||||
str [][]byte
|
||||
comment []string
|
||||
}
|
||||
|
||||
var versionRe = regexp.MustCompile(`# UCA Version: (.*)\n?$`)
|
||||
var testRe = regexp.MustCompile(`^([\dA-F ]+);.*# (.*)\n?$`)
|
||||
|
||||
func TestCollation(t *testing.T) {
|
||||
if !gen.IsLocal() && !*long {
|
||||
t.Skip("skipping test to prevent downloading; to run use -long or use -local to specify a local source")
|
||||
}
|
||||
t.Skip("must first update to new file format to support test")
|
||||
for _, test := range loadTestData() {
|
||||
doTest(t, test)
|
||||
}
|
||||
}
|
||||
|
||||
func Error(e error) {
|
||||
if e != nil {
|
||||
log.Fatal(e)
|
||||
}
|
||||
}
|
||||
|
||||
// parseUCA parses a Default Unicode Collation Element Table of the format
|
||||
// specified in http://www.unicode.org/reports/tr10/#File_Format.
|
||||
// It returns the variable top.
|
||||
func parseUCA(builder *build.Builder) {
|
||||
r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt")
|
||||
defer r.Close()
|
||||
input := bufio.NewReader(r)
|
||||
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
|
||||
for i := 1; true; i++ {
|
||||
l, prefix, err := input.ReadLine()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
Error(err)
|
||||
line := string(l)
|
||||
if prefix {
|
||||
log.Fatalf("%d: buffer overflow", i)
|
||||
}
|
||||
if len(line) == 0 || line[0] == '#' {
|
||||
continue
|
||||
}
|
||||
if line[0] == '@' {
|
||||
if strings.HasPrefix(line[1:], "version ") {
|
||||
if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() {
|
||||
log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// parse entries
|
||||
part := strings.Split(line, " ; ")
|
||||
if len(part) != 2 {
|
||||
log.Fatalf("%d: production rule without ';': %v", i, line)
|
||||
}
|
||||
lhs := []rune{}
|
||||
for _, v := range strings.Split(part[0], " ") {
|
||||
if v != "" {
|
||||
lhs = append(lhs, rune(convHex(i, v)))
|
||||
}
|
||||
}
|
||||
vars := []int{}
|
||||
rhs := [][]int{}
|
||||
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
|
||||
if m[1] == "*" {
|
||||
vars = append(vars, i)
|
||||
}
|
||||
elem := []int{}
|
||||
for _, h := range strings.Split(m[2], ".") {
|
||||
elem = append(elem, convHex(i, h))
|
||||
}
|
||||
rhs = append(rhs, elem)
|
||||
}
|
||||
builder.Add(lhs, rhs, vars)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func convHex(line int, s string) int {
|
||||
r, e := strconv.ParseInt(s, 16, 32)
|
||||
if e != nil {
|
||||
log.Fatalf("%d: %v", line, e)
|
||||
}
|
||||
return int(r)
|
||||
}
|
||||
|
||||
func loadTestData() []Test {
|
||||
f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip")
|
||||
buffer, err := ioutil.ReadAll(f)
|
||||
f.Close()
|
||||
Error(err)
|
||||
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
|
||||
Error(err)
|
||||
tests := []Test{}
|
||||
for _, f := range archive.File {
|
||||
// Skip the short versions, which are simply duplicates of the long versions.
|
||||
if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() {
|
||||
continue
|
||||
}
|
||||
ff, err := f.Open()
|
||||
Error(err)
|
||||
defer ff.Close()
|
||||
scanner := bufio.NewScanner(ff)
|
||||
test := Test{name: path.Base(f.Name)}
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if len(line) <= 1 || line[0] == '#' {
|
||||
if m := versionRe.FindStringSubmatch(line); m != nil {
|
||||
if m[1] != gen.UnicodeVersion() {
|
||||
log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], gen.UnicodeVersion())
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
m := testRe.FindStringSubmatch(line)
|
||||
if m == nil || len(m) < 3 {
|
||||
log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
|
||||
}
|
||||
str := []byte{}
|
||||
// In the regression test data (unpaired) surrogates are assigned a weight
|
||||
// corresponding to their code point value. However, utf8.DecodeRune,
|
||||
// which is used to compute the implicit weight, assigns FFFD to surrogates.
|
||||
// We therefore skip tests with surrogates. This skips about 35 entries
|
||||
// per test.
|
||||
valid := true
|
||||
for _, split := range strings.Split(m[1], " ") {
|
||||
r, err := strconv.ParseUint(split, 16, 64)
|
||||
Error(err)
|
||||
valid = valid && utf8.ValidRune(rune(r))
|
||||
str = append(str, string(rune(r))...)
|
||||
}
|
||||
if valid {
|
||||
test.str = append(test.str, str)
|
||||
test.comment = append(test.comment, m[2])
|
||||
}
|
||||
}
|
||||
if scanner.Err() != nil {
|
||||
log.Fatal(scanner.Err())
|
||||
}
|
||||
tests = append(tests, test)
|
||||
}
|
||||
return tests
|
||||
}
|
||||
|
||||
var errorCount int
|
||||
|
||||
func runes(b []byte) []rune {
|
||||
return []rune(string(b))
|
||||
}
|
||||
|
||||
var shifted = language.MustParse("und-u-ka-shifted-ks-level4")
|
||||
|
||||
func doTest(t *testing.T, tc Test) {
|
||||
bld := build.NewBuilder()
|
||||
parseUCA(bld)
|
||||
w, err := bld.Build()
|
||||
Error(err)
|
||||
var tag language.Tag
|
||||
if !strings.Contains(tc.name, "NON_IGNOR") {
|
||||
tag = shifted
|
||||
}
|
||||
c := NewFromTable(w, OptionsFromTag(tag))
|
||||
b := &Buffer{}
|
||||
prev := tc.str[0]
|
||||
for i := 1; i < len(tc.str); i++ {
|
||||
b.Reset()
|
||||
s := tc.str[i]
|
||||
ka := c.Key(b, prev)
|
||||
kb := c.Key(b, s)
|
||||
if r := bytes.Compare(ka, kb); r == 1 {
|
||||
t.Errorf("%s:%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", tc.name, i, []rune(string(prev)), []rune(string(s)), ka, kb, r)
|
||||
prev = s
|
||||
continue
|
||||
}
|
||||
if r := c.Compare(prev, s); r == 1 {
|
||||
t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want -1 or 0", tc.name, i, runes(prev), runes(s), r)
|
||||
}
|
||||
if r := c.Compare(s, prev); r == -1 {
|
||||
t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want 1 or 0", tc.name, i, runes(s), runes(prev), r)
|
||||
}
|
||||
prev = s
|
||||
}
|
||||
}
|
81
vendor/golang.org/x/text/collate/sort.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/collate/sort.go
generated
vendored
Normal file
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
maxSortBuffer = 40960
|
||||
maxSortEntries = 4096
|
||||
)
|
||||
|
||||
type swapper interface {
|
||||
Swap(i, j int)
|
||||
}
|
||||
|
||||
type sorter struct {
|
||||
buf *Buffer
|
||||
keys [][]byte
|
||||
src swapper
|
||||
}
|
||||
|
||||
func (s *sorter) init(n int) {
|
||||
if s.buf == nil {
|
||||
s.buf = &Buffer{}
|
||||
s.buf.init()
|
||||
}
|
||||
if cap(s.keys) < n {
|
||||
s.keys = make([][]byte, n)
|
||||
}
|
||||
s.keys = s.keys[0:n]
|
||||
}
|
||||
|
||||
func (s *sorter) sort(src swapper) {
|
||||
s.src = src
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s.keys)
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return bytes.Compare(s.keys[i], s.keys[j]) == -1
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
|
||||
s.src.Swap(i, j)
|
||||
}
|
||||
|
||||
// A Lister can be sorted by Collator's Sort method.
|
||||
type Lister interface {
|
||||
Len() int
|
||||
Swap(i, j int)
|
||||
// Bytes returns the bytes of the text at index i.
|
||||
Bytes(i int) []byte
|
||||
}
|
||||
|
||||
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
|
||||
func (c *Collator) Sort(x Lister) {
|
||||
n := x.Len()
|
||||
c.sorter.init(n)
|
||||
for i := 0; i < n; i++ {
|
||||
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
|
||||
}
|
||||
c.sorter.sort(x)
|
||||
}
|
||||
|
||||
// SortStrings uses sort.Sort to sort the strings in x using the rules of c.
|
||||
func (c *Collator) SortStrings(x []string) {
|
||||
c.sorter.init(len(x))
|
||||
for i, s := range x {
|
||||
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
|
||||
}
|
||||
c.sorter.sort(sort.StringSlice(x))
|
||||
}
|
55
vendor/golang.org/x/text/collate/sort_test.go
generated
vendored
Normal file
55
vendor/golang.org/x/text/collate/sort_test.go
generated
vendored
Normal file
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
func ExampleCollator_Strings() {
|
||||
c := collate.New(language.Und)
|
||||
strings := []string{
|
||||
"ad",
|
||||
"ab",
|
||||
"äb",
|
||||
"ac",
|
||||
}
|
||||
c.SortStrings(strings)
|
||||
fmt.Println(strings)
|
||||
// Output: [ab äb ac ad]
|
||||
}
|
||||
|
||||
type sorter []string
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sorter) Bytes(i int) []byte {
|
||||
return []byte(s[i])
|
||||
}
|
||||
|
||||
func TestSort(t *testing.T) {
|
||||
c := collate.New(language.English)
|
||||
strings := []string{
|
||||
"bcd",
|
||||
"abc",
|
||||
"ddd",
|
||||
}
|
||||
c.Sort(sorter(strings))
|
||||
res := fmt.Sprint(strings)
|
||||
want := "[abc bcd ddd]"
|
||||
if res != want {
|
||||
t.Errorf("found %s; want %s", res, want)
|
||||
}
|
||||
}
|
291
vendor/golang.org/x/text/collate/table_test.go
generated
vendored
Normal file
291
vendor/golang.org/x/text/collate/table_test.go
generated
vendored
Normal file
|
@ -0,0 +1,291 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/collate/build"
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
type ColElems []Weights
|
||||
|
||||
type input struct {
|
||||
str string
|
||||
ces [][]int
|
||||
}
|
||||
|
||||
type check struct {
|
||||
in string
|
||||
n int
|
||||
out ColElems
|
||||
}
|
||||
|
||||
type tableTest struct {
|
||||
in []input
|
||||
chk []check
|
||||
}
|
||||
|
||||
func w(ce ...int) Weights {
|
||||
return W(ce...)
|
||||
}
|
||||
|
||||
var defaults = w(0)
|
||||
|
||||
func pt(p, t int) []int {
|
||||
return []int{p, defaults.Secondary, t}
|
||||
}
|
||||
|
||||
func makeTable(in []input) (*Collator, error) {
|
||||
b := build.NewBuilder()
|
||||
for _, r := range in {
|
||||
if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
|
||||
panic(e)
|
||||
}
|
||||
}
|
||||
t, err := b.Build()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewFromTable(t), nil
|
||||
}
|
||||
|
||||
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
|
||||
// to cause a segment overflow if not handled correctly. The last rune in this
|
||||
// list has a CCC of 214.
|
||||
var modSeq = []rune{
|
||||
0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
|
||||
0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
|
||||
0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
|
||||
0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
|
||||
}
|
||||
|
||||
var mods []input
|
||||
var modW = func() ColElems {
|
||||
ws := ColElems{}
|
||||
for _, r := range modSeq {
|
||||
rune := norm.NFC.PropertiesString(string(r))
|
||||
ws = append(ws, w(0, int(rune.CCC())))
|
||||
mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
|
||||
}
|
||||
return ws
|
||||
}()
|
||||
|
||||
var appendNextTests = []tableTest{
|
||||
{ // test getWeights
|
||||
[]input{
|
||||
{"a", [][]int{{100}}},
|
||||
{"b", [][]int{{105}}},
|
||||
{"c", [][]int{{110}}},
|
||||
{"ß", [][]int{{120}}},
|
||||
},
|
||||
[]check{
|
||||
{"a", 1, ColElems{w(100)}},
|
||||
{"b", 1, ColElems{w(105)}},
|
||||
{"c", 1, ColElems{w(110)}},
|
||||
{"d", 1, ColElems{w(0x50064)}},
|
||||
{"ab", 1, ColElems{w(100)}},
|
||||
{"bc", 1, ColElems{w(105)}},
|
||||
{"dd", 1, ColElems{w(0x50064)}},
|
||||
{"ß", 2, ColElems{w(120)}},
|
||||
},
|
||||
},
|
||||
{ // test expansion
|
||||
[]input{
|
||||
{"u", [][]int{{100}}},
|
||||
{"U", [][]int{{100}, {0, 25}}},
|
||||
{"w", [][]int{{100}, {100}}},
|
||||
{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
|
||||
},
|
||||
[]check{
|
||||
{"u", 1, ColElems{w(100)}},
|
||||
{"U", 1, ColElems{w(100), w(0, 25)}},
|
||||
{"w", 1, ColElems{w(100), w(100)}},
|
||||
{"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
|
||||
},
|
||||
},
|
||||
{ // test decompose
|
||||
[]input{
|
||||
{"D", [][]int{pt(104, 8)}},
|
||||
{"z", [][]int{pt(130, 8)}},
|
||||
{"\u030C", [][]int{{0, 40}}}, // Caron
|
||||
{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
|
||||
},
|
||||
[]check{
|
||||
{"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
|
||||
},
|
||||
},
|
||||
{ // test basic contraction
|
||||
[]input{
|
||||
{"a", [][]int{{100}}},
|
||||
{"ab", [][]int{{101}}},
|
||||
{"aab", [][]int{{101}, {101}}},
|
||||
{"abc", [][]int{{102}}},
|
||||
{"b", [][]int{{200}}},
|
||||
{"c", [][]int{{300}}},
|
||||
{"d", [][]int{{400}}},
|
||||
},
|
||||
[]check{
|
||||
{"a", 1, ColElems{w(100)}},
|
||||
{"aa", 1, ColElems{w(100)}},
|
||||
{"aac", 1, ColElems{w(100)}},
|
||||
{"d", 1, ColElems{w(400)}},
|
||||
{"ab", 2, ColElems{w(101)}},
|
||||
{"abb", 2, ColElems{w(101)}},
|
||||
{"aab", 3, ColElems{w(101), w(101)}},
|
||||
{"aaba", 3, ColElems{w(101), w(101)}},
|
||||
{"abc", 3, ColElems{w(102)}},
|
||||
{"abcd", 3, ColElems{w(102)}},
|
||||
},
|
||||
},
|
||||
{ // test discontinuous contraction
|
||||
append(mods, []input{
|
||||
// modifiers; secondary weight equals ccc
|
||||
{"\u0316", [][]int{{0, 220}}},
|
||||
{"\u0317", [][]int{{0, 220}, {0, 220}}},
|
||||
{"\u302D", [][]int{{0, 222}}},
|
||||
{"\u302E", [][]int{{0, 225}}}, // used as starter
|
||||
{"\u302F", [][]int{{0, 224}}}, // used as starter
|
||||
{"\u18A9", [][]int{{0, 228}}},
|
||||
{"\u0300", [][]int{{0, 230}}},
|
||||
{"\u0301", [][]int{{0, 230}}},
|
||||
{"\u0315", [][]int{{0, 232}}},
|
||||
{"\u031A", [][]int{{0, 232}}},
|
||||
{"\u035C", [][]int{{0, 233}}},
|
||||
{"\u035F", [][]int{{0, 233}}},
|
||||
{"\u035D", [][]int{{0, 234}}},
|
||||
{"\u035E", [][]int{{0, 234}}},
|
||||
{"\u0345", [][]int{{0, 240}}},
|
||||
|
||||
// starters
|
||||
{"a", [][]int{{100}}},
|
||||
{"b", [][]int{{200}}},
|
||||
{"c", [][]int{{300}}},
|
||||
{"\u03B1", [][]int{{900}}},
|
||||
{"\x01", [][]int{{0, 0, 0, 0}}},
|
||||
|
||||
// contractions
|
||||
{"a\u0300", [][]int{{101}}},
|
||||
{"a\u0301", [][]int{{102}}},
|
||||
{"a\u035E", [][]int{{110}}},
|
||||
{"a\u035Eb\u035E", [][]int{{115}}},
|
||||
{"ac\u035Eaca\u035E", [][]int{{116}}},
|
||||
{"a\u035Db\u035D", [][]int{{117}}},
|
||||
{"a\u0301\u035Db", [][]int{{120}}},
|
||||
{"a\u0301\u035F", [][]int{{121}}},
|
||||
{"a\u0301\u035Fb", [][]int{{119}}},
|
||||
{"\u03B1\u0345", [][]int{{901}, {902}}},
|
||||
{"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
|
||||
{"\u302F\u18A9", [][]int{{0, 130}}},
|
||||
}...),
|
||||
[]check{
|
||||
{"a\x01\u0300", 1, ColElems{w(100)}},
|
||||
{"ab", 1, ColElems{w(100)}}, // closing segment
|
||||
{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
|
||||
|
||||
{"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
|
||||
{"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
|
||||
{"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
||||
{"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
|
||||
|
||||
// match blocked by modifier with same ccc
|
||||
{"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
|
||||
|
||||
// multiple gaps
|
||||
{"a\u0301\u035Db", 6, ColElems{w(120)}},
|
||||
{"a\u0301\u035F", 5, ColElems{w(121)}},
|
||||
{"a\u0301\u035Fb", 6, ColElems{w(119)}},
|
||||
{"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
|
||||
{"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
|
||||
{"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
{"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
|
||||
|
||||
// handling of segment overflow
|
||||
{ // just fits within segment
|
||||
"a" + string(modSeq[:30]) + "\u0301",
|
||||
3 + len(string(modSeq[:30])),
|
||||
append(ColElems{w(102)}, modW[:30]...),
|
||||
},
|
||||
{"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
|
||||
{"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
|
||||
{ // just fits within segment with two interstitial runes
|
||||
"a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
|
||||
7 + len(string(modSeq[:28])),
|
||||
append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
|
||||
},
|
||||
{ // second half does not fit within segment
|
||||
"a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
|
||||
3 + len(string(modSeq[:29])),
|
||||
append(ColElems{w(102)}, modW[:29]...),
|
||||
},
|
||||
|
||||
// discontinuity can only occur in last normalization segment
|
||||
{"a\u035Eb\u035E", 6, ColElems{w(115)}},
|
||||
{"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
|
||||
{"a\u035Db\u035D", 6, ColElems{w(117)}},
|
||||
{"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
|
||||
{"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
|
||||
{"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
|
||||
{"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
|
||||
{"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
|
||||
{"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
|
||||
|
||||
// expanding contraction
|
||||
{"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
|
||||
|
||||
// Theoretical possibilities
|
||||
// contraction within a gap
|
||||
{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
|
||||
// expansion within a gap
|
||||
{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
|
||||
// repeating CCC blocks last modifier
|
||||
{"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
|
||||
// The trailing combining characters (with lower CCC) should block the first one.
|
||||
// TODO: make the following pass.
|
||||
// {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
|
||||
{"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
|
||||
// Last combiner should match after normalization.
|
||||
// TODO: make the following pass.
|
||||
// {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
|
||||
// The first combiner is blocking the second one as they have the same CCC.
|
||||
{"a\u035D\u035Eb", 1, ColElems{w(100)}},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func TestAppendNext(t *testing.T) {
|
||||
for i, tt := range appendNextTests {
|
||||
c, err := makeTable(tt.in)
|
||||
if err != nil {
|
||||
t.Errorf("%d: error creating table: %v", i, err)
|
||||
continue
|
||||
}
|
||||
for j, chk := range tt.chk {
|
||||
ws, n := c.t.AppendNext(nil, []byte(chk.in))
|
||||
if n != chk.n {
|
||||
t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
|
||||
}
|
||||
out := convertFromWeights(chk.out)
|
||||
if len(ws) != len(out) {
|
||||
t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
|
||||
continue
|
||||
}
|
||||
for k, w := range ws {
|
||||
w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
|
||||
if w != out[k] {
|
||||
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
73789
vendor/golang.org/x/text/collate/tables.go
generated
vendored
Normal file
73789
vendor/golang.org/x/text/collate/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
7
vendor/golang.org/x/text/collate/tools/colcmp/Makefile
generated
vendored
Normal file
7
vendor/golang.org/x/text/collate/tools/colcmp/Makefile
generated
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright 2012 The Go Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
chars:
|
||||
go run ../../maketables.go -tables=chars -package=main > chars.go
|
||||
gofmt -w -s chars.go
|
1156
vendor/golang.org/x/text/collate/tools/colcmp/chars.go
generated
vendored
Normal file
1156
vendor/golang.org/x/text/collate/tools/colcmp/chars.go
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
97
vendor/golang.org/x/text/collate/tools/colcmp/col.go
generated
vendored
Normal file
97
vendor/golang.org/x/text/collate/tools/colcmp/col.go
generated
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"unicode/utf16"
|
||||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// Input holds an input string in both UTF-8 and UTF-16 format.
|
||||
type Input struct {
|
||||
index int // used for restoring to original random order
|
||||
UTF8 []byte
|
||||
UTF16 []uint16
|
||||
key []byte // used for sorting
|
||||
}
|
||||
|
||||
func (i Input) String() string {
|
||||
return string(i.UTF8)
|
||||
}
|
||||
|
||||
func makeInput(s8 []byte, s16 []uint16) Input {
|
||||
return Input{UTF8: s8, UTF16: s16}
|
||||
}
|
||||
|
||||
func makeInputString(s string) Input {
|
||||
return Input{
|
||||
UTF8: []byte(s),
|
||||
UTF16: utf16.Encode([]rune(s)),
|
||||
}
|
||||
}
|
||||
|
||||
// Collator is an interface for architecture-specific implementations of collation.
|
||||
type Collator interface {
|
||||
// Key generates a sort key for the given input. Implemenations
|
||||
// may return nil if a collator does not support sort keys.
|
||||
Key(s Input) []byte
|
||||
|
||||
// Compare returns -1 if a < b, 1 if a > b and 0 if a == b.
|
||||
Compare(a, b Input) int
|
||||
}
|
||||
|
||||
// CollatorFactory creates a Collator for a given language tag.
|
||||
type CollatorFactory struct {
|
||||
name string
|
||||
makeFn func(tag string) (Collator, error)
|
||||
description string
|
||||
}
|
||||
|
||||
var collators = []CollatorFactory{}
|
||||
|
||||
// AddFactory registers f as a factory for an implementation of Collator.
|
||||
func AddFactory(f CollatorFactory) {
|
||||
collators = append(collators, f)
|
||||
}
|
||||
|
||||
func getCollator(name, locale string) Collator {
|
||||
for _, f := range collators {
|
||||
if f.name == name {
|
||||
col, err := f.makeFn(locale)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return col
|
||||
}
|
||||
}
|
||||
log.Fatalf("collator of type %q not found", name)
|
||||
return nil
|
||||
}
|
||||
|
||||
// goCollator is an implemention of Collator using go's own collator.
|
||||
type goCollator struct {
|
||||
c *collate.Collator
|
||||
buf collate.Buffer
|
||||
}
|
||||
|
||||
func init() {
|
||||
AddFactory(CollatorFactory{"go", newGoCollator, "Go's native collator implementation."})
|
||||
}
|
||||
|
||||
func newGoCollator(loc string) (Collator, error) {
|
||||
c := &goCollator{c: collate.New(language.Make(loc))}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *goCollator) Key(b Input) []byte {
|
||||
return c.c.Key(&c.buf, b.UTF8)
|
||||
}
|
||||
|
||||
func (c *goCollator) Compare(a, b Input) int {
|
||||
return c.c.Compare(a.UTF8, b.UTF8)
|
||||
}
|
529
vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go
generated
vendored
Normal file
529
vendor/golang.org/x/text/collate/tools/colcmp/colcmp.go
generated
vendored
Normal file
|
@ -0,0 +1,529 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main // import "golang.org/x/text/collate/tools/colcmp"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"runtime/pprof"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var (
|
||||
doNorm = flag.Bool("norm", false, "normalize input strings")
|
||||
cases = flag.Bool("case", false, "generate case variants")
|
||||
verbose = flag.Bool("verbose", false, "print results")
|
||||
debug = flag.Bool("debug", false, "output debug information")
|
||||
locales = flag.String("locale", "en_US", "the locale to use. May be a comma-separated list for some commands.")
|
||||
col = flag.String("col", "go", "collator to test")
|
||||
gold = flag.String("gold", "go", "collator used as the gold standard")
|
||||
usecmp = flag.Bool("usecmp", false,
|
||||
`use comparison instead of sort keys when sorting. Must be "test", "gold" or "both"`)
|
||||
cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
|
||||
exclude = flag.String("exclude", "", "exclude errors that contain any of the characters")
|
||||
limit = flag.Int("limit", 5000000, "maximum number of samples to generate for one run")
|
||||
)
|
||||
|
||||
func failOnError(err error) {
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Test holds test data for testing a locale-collator pair.
|
||||
// Test also provides functionality that is commonly used by the various commands.
|
||||
type Test struct {
|
||||
ctxt *Context
|
||||
Name string
|
||||
Locale string
|
||||
ColName string
|
||||
|
||||
Col Collator
|
||||
UseCompare bool
|
||||
|
||||
Input []Input
|
||||
Duration time.Duration
|
||||
|
||||
start time.Time
|
||||
msg string
|
||||
count int
|
||||
}
|
||||
|
||||
func (t *Test) clear() {
|
||||
t.Col = nil
|
||||
t.Input = nil
|
||||
}
|
||||
|
||||
const (
|
||||
msgGeneratingInput = "generating input"
|
||||
msgGeneratingKeys = "generating keys"
|
||||
msgSorting = "sorting"
|
||||
)
|
||||
|
||||
var lastLen = 0
|
||||
|
||||
func (t *Test) SetStatus(msg string) {
|
||||
if *debug || *verbose {
|
||||
fmt.Printf("%s: %s...\n", t.Name, msg)
|
||||
} else if t.ctxt.out != nil {
|
||||
fmt.Fprint(t.ctxt.out, strings.Repeat(" ", lastLen))
|
||||
fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
|
||||
fmt.Fprint(t.ctxt.out, msg, "...")
|
||||
lastLen = len(msg) + 3
|
||||
fmt.Fprint(t.ctxt.out, strings.Repeat("\b", lastLen))
|
||||
}
|
||||
}
|
||||
|
||||
// Start is used by commands to signal the start of an operation.
|
||||
func (t *Test) Start(msg string) {
|
||||
t.SetStatus(msg)
|
||||
t.count = 0
|
||||
t.msg = msg
|
||||
t.start = time.Now()
|
||||
}
|
||||
|
||||
// Stop is used by commands to signal the end of an operation.
|
||||
func (t *Test) Stop() (time.Duration, int) {
|
||||
d := time.Now().Sub(t.start)
|
||||
t.Duration += d
|
||||
if *debug || *verbose {
|
||||
fmt.Printf("%s: %s done. (%.3fs /%dK ops)\n", t.Name, t.msg, d.Seconds(), t.count/1000)
|
||||
}
|
||||
return d, t.count
|
||||
}
|
||||
|
||||
// generateKeys generates sort keys for all the inputs.
|
||||
func (t *Test) generateKeys() {
|
||||
for i, s := range t.Input {
|
||||
b := t.Col.Key(s)
|
||||
t.Input[i].key = b
|
||||
if *debug {
|
||||
fmt.Printf("%s (%X): %X\n", string(s.UTF8), s.UTF16, b)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort sorts the inputs. It generates sort keys if this is required by the
|
||||
// chosen sort method.
|
||||
func (t *Test) Sort() (tkey, tsort time.Duration, nkey, nsort int) {
|
||||
if *cpuprofile != "" {
|
||||
f, err := os.Create(*cpuprofile)
|
||||
failOnError(err)
|
||||
pprof.StartCPUProfile(f)
|
||||
defer pprof.StopCPUProfile()
|
||||
}
|
||||
if t.UseCompare || t.Col.Key(t.Input[0]) == nil {
|
||||
t.Start(msgSorting)
|
||||
sort.Sort(&testCompare{*t})
|
||||
tsort, nsort = t.Stop()
|
||||
} else {
|
||||
t.Start(msgGeneratingKeys)
|
||||
t.generateKeys()
|
||||
t.count = len(t.Input)
|
||||
tkey, nkey = t.Stop()
|
||||
t.Start(msgSorting)
|
||||
sort.Sort(t)
|
||||
tsort, nsort = t.Stop()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (t *Test) Swap(a, b int) {
|
||||
t.Input[a], t.Input[b] = t.Input[b], t.Input[a]
|
||||
}
|
||||
|
||||
func (t *Test) Less(a, b int) bool {
|
||||
t.count++
|
||||
return bytes.Compare(t.Input[a].key, t.Input[b].key) == -1
|
||||
}
|
||||
|
||||
func (t Test) Len() int {
|
||||
return len(t.Input)
|
||||
}
|
||||
|
||||
type testCompare struct {
|
||||
Test
|
||||
}
|
||||
|
||||
func (t *testCompare) Less(a, b int) bool {
|
||||
t.count++
|
||||
return t.Col.Compare(t.Input[a], t.Input[b]) == -1
|
||||
}
|
||||
|
||||
type testRestore struct {
|
||||
Test
|
||||
}
|
||||
|
||||
func (t *testRestore) Less(a, b int) bool {
|
||||
return t.Input[a].index < t.Input[b].index
|
||||
}
|
||||
|
||||
// GenerateInput generates input phrases for the locale tested by t.
|
||||
func (t *Test) GenerateInput() {
|
||||
t.Input = nil
|
||||
if t.ctxt.lastLocale != t.Locale {
|
||||
gen := phraseGenerator{}
|
||||
gen.init(t.Locale)
|
||||
t.SetStatus(msgGeneratingInput)
|
||||
t.ctxt.lastInput = nil // allow the previous value to be garbage collected.
|
||||
t.Input = gen.generate(*doNorm)
|
||||
t.ctxt.lastInput = t.Input
|
||||
t.ctxt.lastLocale = t.Locale
|
||||
} else {
|
||||
t.Input = t.ctxt.lastInput
|
||||
for i := range t.Input {
|
||||
t.Input[i].key = nil
|
||||
}
|
||||
sort.Sort(&testRestore{*t})
|
||||
}
|
||||
}
|
||||
|
||||
// Context holds all tests and settings translated from command line options.
|
||||
type Context struct {
|
||||
test []*Test
|
||||
last *Test
|
||||
|
||||
lastLocale string
|
||||
lastInput []Input
|
||||
|
||||
out io.Writer
|
||||
}
|
||||
|
||||
func (ts *Context) Printf(format string, a ...interface{}) {
|
||||
ts.assertBuf()
|
||||
fmt.Fprintf(ts.out, format, a...)
|
||||
}
|
||||
|
||||
func (ts *Context) Print(a ...interface{}) {
|
||||
ts.assertBuf()
|
||||
fmt.Fprint(ts.out, a...)
|
||||
}
|
||||
|
||||
// assertBuf sets up an io.Writer for output, if it doesn't already exist.
|
||||
// In debug and verbose mode, output is buffered so that the regular output
|
||||
// will not interfere with the additional output. Otherwise, output is
|
||||
// written directly to stdout for a more responsive feel.
|
||||
func (ts *Context) assertBuf() {
|
||||
if ts.out != nil {
|
||||
return
|
||||
}
|
||||
if *debug || *verbose {
|
||||
ts.out = &bytes.Buffer{}
|
||||
} else {
|
||||
ts.out = os.Stdout
|
||||
}
|
||||
}
|
||||
|
||||
// flush flushes the contents of ts.out to stdout, if it is not stdout already.
|
||||
func (ts *Context) flush() {
|
||||
if ts.out != nil {
|
||||
if _, ok := ts.out.(io.ReadCloser); !ok {
|
||||
io.Copy(os.Stdout, ts.out.(io.Reader))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseTests creates all tests from command lines and returns
|
||||
// a Context to hold them.
|
||||
func parseTests() *Context {
|
||||
ctxt := &Context{}
|
||||
colls := strings.Split(*col, ",")
|
||||
for _, loc := range strings.Split(*locales, ",") {
|
||||
loc = strings.TrimSpace(loc)
|
||||
for _, name := range colls {
|
||||
name = strings.TrimSpace(name)
|
||||
col := getCollator(name, loc)
|
||||
ctxt.test = append(ctxt.test, &Test{
|
||||
ctxt: ctxt,
|
||||
Locale: loc,
|
||||
ColName: name,
|
||||
UseCompare: *usecmp,
|
||||
Col: col,
|
||||
})
|
||||
}
|
||||
}
|
||||
return ctxt
|
||||
}
|
||||
|
||||
func (c *Context) Len() int {
|
||||
return len(c.test)
|
||||
}
|
||||
|
||||
func (c *Context) Test(i int) *Test {
|
||||
if c.last != nil {
|
||||
c.last.clear()
|
||||
}
|
||||
c.last = c.test[i]
|
||||
return c.last
|
||||
}
|
||||
|
||||
func parseInput(args []string) []Input {
|
||||
input := []Input{}
|
||||
for _, s := range args {
|
||||
rs := []rune{}
|
||||
for len(s) > 0 {
|
||||
var r rune
|
||||
r, _, s, _ = strconv.UnquoteChar(s, '\'')
|
||||
rs = append(rs, r)
|
||||
}
|
||||
s = string(rs)
|
||||
if *doNorm {
|
||||
s = norm.NFD.String(s)
|
||||
}
|
||||
input = append(input, makeInputString(s))
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
// A Command is an implementation of a colcmp command.
|
||||
type Command struct {
|
||||
Run func(cmd *Context, args []string)
|
||||
Usage string
|
||||
Short string
|
||||
Long string
|
||||
}
|
||||
|
||||
func (cmd Command) Name() string {
|
||||
return strings.SplitN(cmd.Usage, " ", 2)[0]
|
||||
}
|
||||
|
||||
var commands = []*Command{
|
||||
cmdSort,
|
||||
cmdBench,
|
||||
cmdRegress,
|
||||
}
|
||||
|
||||
const sortHelp = `
|
||||
Sort sorts a given list of strings. Strings are separated by whitespace.
|
||||
`
|
||||
|
||||
var cmdSort = &Command{
|
||||
Run: runSort,
|
||||
Usage: "sort <string>*",
|
||||
Short: "sort a given list of strings",
|
||||
Long: sortHelp,
|
||||
}
|
||||
|
||||
func runSort(ctxt *Context, args []string) {
|
||||
input := parseInput(args)
|
||||
if len(input) == 0 {
|
||||
log.Fatalf("Nothing to sort.")
|
||||
}
|
||||
if ctxt.Len() > 1 {
|
||||
ctxt.Print("COLL LOCALE RESULT\n")
|
||||
}
|
||||
for i := 0; i < ctxt.Len(); i++ {
|
||||
t := ctxt.Test(i)
|
||||
t.Input = append(t.Input, input...)
|
||||
t.Sort()
|
||||
if ctxt.Len() > 1 {
|
||||
ctxt.Printf("%-5s %-5s ", t.ColName, t.Locale)
|
||||
}
|
||||
for _, s := range t.Input {
|
||||
ctxt.Print(string(s.UTF8), " ")
|
||||
}
|
||||
ctxt.Print("\n")
|
||||
}
|
||||
}
|
||||
|
||||
const benchHelp = `
|
||||
Bench runs a benchmark for the given list of collator implementations.
|
||||
If no collator implementations are given, the go collator will be used.
|
||||
`
|
||||
|
||||
var cmdBench = &Command{
|
||||
Run: runBench,
|
||||
Usage: "bench",
|
||||
Short: "benchmark a given list of collator implementations",
|
||||
Long: benchHelp,
|
||||
}
|
||||
|
||||
func runBench(ctxt *Context, args []string) {
|
||||
ctxt.Printf("%-7s %-5s %-6s %-24s %-24s %-5s %s\n", "LOCALE", "COLL", "N", "KEYS", "SORT", "AVGLN", "TOTAL")
|
||||
for i := 0; i < ctxt.Len(); i++ {
|
||||
t := ctxt.Test(i)
|
||||
ctxt.Printf("%-7s %-5s ", t.Locale, t.ColName)
|
||||
t.GenerateInput()
|
||||
ctxt.Printf("%-6s ", fmt.Sprintf("%dK", t.Len()/1000))
|
||||
tkey, tsort, nkey, nsort := t.Sort()
|
||||
p := func(dur time.Duration, n int) {
|
||||
s := ""
|
||||
if dur > 0 {
|
||||
s = fmt.Sprintf("%6.3fs ", dur.Seconds())
|
||||
if n > 0 {
|
||||
s += fmt.Sprintf("%15s", fmt.Sprintf("(%4.2f ns/op)", float64(dur)/float64(n)))
|
||||
}
|
||||
}
|
||||
ctxt.Printf("%-24s ", s)
|
||||
}
|
||||
p(tkey, nkey)
|
||||
p(tsort, nsort)
|
||||
|
||||
total := 0
|
||||
for _, s := range t.Input {
|
||||
total += len(s.key)
|
||||
}
|
||||
ctxt.Printf("%-5d ", total/t.Len())
|
||||
ctxt.Printf("%6.3fs\n", t.Duration.Seconds())
|
||||
if *debug {
|
||||
for _, s := range t.Input {
|
||||
fmt.Print(string(s.UTF8), " ")
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const regressHelp = `
|
||||
Regress runs a monkey test by comparing the results of randomly generated tests
|
||||
between two implementations of a collator. The user may optionally pass a list
|
||||
of strings to regress against instead of the default test set.
|
||||
`
|
||||
|
||||
var cmdRegress = &Command{
|
||||
Run: runRegress,
|
||||
Usage: "regress -gold=<col> -test=<col> [string]*",
|
||||
Short: "run a monkey test between two collators",
|
||||
Long: regressHelp,
|
||||
}
|
||||
|
||||
const failedKeyCompare = `
|
||||
%s:%d: incorrect comparison result for input:
|
||||
a: %q (%.4X)
|
||||
key: %s
|
||||
b: %q (%.4X)
|
||||
key: %s
|
||||
Compare(a, b) = %d; want %d.
|
||||
|
||||
gold keys:
|
||||
a: %s
|
||||
b: %s
|
||||
`
|
||||
|
||||
const failedCompare = `
|
||||
%s:%d: incorrect comparison result for input:
|
||||
a: %q (%.4X)
|
||||
b: %q (%.4X)
|
||||
Compare(a, b) = %d; want %d.
|
||||
`
|
||||
|
||||
func keyStr(b []byte) string {
|
||||
buf := &bytes.Buffer{}
|
||||
for _, v := range b {
|
||||
fmt.Fprintf(buf, "%.2X ", v)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func runRegress(ctxt *Context, args []string) {
|
||||
input := parseInput(args)
|
||||
for i := 0; i < ctxt.Len(); i++ {
|
||||
t := ctxt.Test(i)
|
||||
if len(input) > 0 {
|
||||
t.Input = append(t.Input, input...)
|
||||
} else {
|
||||
t.GenerateInput()
|
||||
}
|
||||
t.Sort()
|
||||
count := 0
|
||||
gold := getCollator(*gold, t.Locale)
|
||||
for i := 1; i < len(t.Input); i++ {
|
||||
ia := t.Input[i-1]
|
||||
ib := t.Input[i]
|
||||
if bytes.IndexAny(ib.UTF8, *exclude) != -1 {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if bytes.IndexAny(ia.UTF8, *exclude) != -1 {
|
||||
continue
|
||||
}
|
||||
goldCmp := gold.Compare(ia, ib)
|
||||
if cmp := bytes.Compare(ia.key, ib.key); cmp != goldCmp {
|
||||
count++
|
||||
a := string(ia.UTF8)
|
||||
b := string(ib.UTF8)
|
||||
fmt.Printf(failedKeyCompare, t.Locale, i-1, a, []rune(a), keyStr(ia.key), b, []rune(b), keyStr(ib.key), cmp, goldCmp, keyStr(gold.Key(ia)), keyStr(gold.Key(ib)))
|
||||
} else if cmp := t.Col.Compare(ia, ib); cmp != goldCmp {
|
||||
count++
|
||||
a := string(ia.UTF8)
|
||||
b := string(ib.UTF8)
|
||||
fmt.Printf(failedCompare, t.Locale, i-1, a, []rune(a), b, []rune(b), cmp, goldCmp)
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
ctxt.Printf("Found %d inconsistencies in %d entries.\n", count, t.Len()-1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const helpTemplate = `
|
||||
colcmp is a tool for testing and benchmarking collation
|
||||
|
||||
Usage: colcmp command [arguments]
|
||||
|
||||
The commands are:
|
||||
{{range .}}
|
||||
{{.Name | printf "%-11s"}} {{.Short}}{{end}}
|
||||
|
||||
Use "col help [topic]" for more information about that topic.
|
||||
`
|
||||
|
||||
const detailedHelpTemplate = `
|
||||
Usage: colcmp {{.Usage}}
|
||||
|
||||
{{.Long | trim}}
|
||||
`
|
||||
|
||||
func runHelp(args []string) {
|
||||
t := template.New("help")
|
||||
t.Funcs(template.FuncMap{"trim": strings.TrimSpace})
|
||||
if len(args) < 1 {
|
||||
template.Must(t.Parse(helpTemplate))
|
||||
failOnError(t.Execute(os.Stderr, &commands))
|
||||
} else {
|
||||
for _, cmd := range commands {
|
||||
if cmd.Name() == args[0] {
|
||||
template.Must(t.Parse(detailedHelpTemplate))
|
||||
failOnError(t.Execute(os.Stderr, cmd))
|
||||
os.Exit(0)
|
||||
}
|
||||
}
|
||||
log.Fatalf("Unknown command %q. Run 'colcmp help'.", args[0])
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
log.SetFlags(0)
|
||||
|
||||
ctxt := parseTests()
|
||||
|
||||
if flag.NArg() < 1 {
|
||||
runHelp(nil)
|
||||
}
|
||||
args := flag.Args()[1:]
|
||||
if flag.Arg(0) == "help" {
|
||||
runHelp(args)
|
||||
}
|
||||
for _, cmd := range commands {
|
||||
if cmd.Name() == flag.Arg(0) {
|
||||
cmd.Run(ctxt, args)
|
||||
ctxt.flush()
|
||||
return
|
||||
}
|
||||
}
|
||||
runHelp(flag.Args())
|
||||
}
|
111
vendor/golang.org/x/text/collate/tools/colcmp/darwin.go
generated
vendored
Normal file
111
vendor/golang.org/x/text/collate/tools/colcmp/darwin.go
generated
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build darwin
|
||||
|
||||
package main
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -framework CoreFoundation
|
||||
#include <CoreFoundation/CFBase.h>
|
||||
#include <CoreFoundation/CoreFoundation.h>
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() {
|
||||
AddFactory(CollatorFactory{"osx", newOSX16Collator,
|
||||
"OS X/Darwin collator, using native strings."})
|
||||
AddFactory(CollatorFactory{"osx8", newOSX8Collator,
|
||||
"OS X/Darwin collator for UTF-8."})
|
||||
}
|
||||
|
||||
func osxUInt8P(s []byte) *C.UInt8 {
|
||||
return (*C.UInt8)(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
|
||||
func osxCharP(s []uint16) *C.UniChar {
|
||||
return (*C.UniChar)(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
|
||||
// osxCollator implements an Collator based on OS X's CoreFoundation.
|
||||
type osxCollator struct {
|
||||
loc C.CFLocaleRef
|
||||
opt C.CFStringCompareFlags
|
||||
}
|
||||
|
||||
func (c *osxCollator) init(locale string) {
|
||||
l := C.CFStringCreateWithBytes(
|
||||
C.kCFAllocatorDefault,
|
||||
osxUInt8P([]byte(locale)),
|
||||
C.CFIndex(len(locale)),
|
||||
C.kCFStringEncodingUTF8,
|
||||
C.Boolean(0),
|
||||
)
|
||||
c.loc = C.CFLocaleCreate(C.kCFAllocatorDefault, l)
|
||||
}
|
||||
|
||||
func newOSX8Collator(locale string) (Collator, error) {
|
||||
c := &osx8Collator{}
|
||||
c.init(locale)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func newOSX16Collator(locale string) (Collator, error) {
|
||||
c := &osx16Collator{}
|
||||
c.init(locale)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c osxCollator) Key(s Input) []byte {
|
||||
return nil // sort keys not supported by OS X CoreFoundation
|
||||
}
|
||||
|
||||
type osx8Collator struct {
|
||||
osxCollator
|
||||
}
|
||||
|
||||
type osx16Collator struct {
|
||||
osxCollator
|
||||
}
|
||||
|
||||
func (c osx16Collator) Compare(a, b Input) int {
|
||||
sa := C.CFStringCreateWithCharactersNoCopy(
|
||||
C.kCFAllocatorDefault,
|
||||
osxCharP(a.UTF16),
|
||||
C.CFIndex(len(a.UTF16)),
|
||||
C.kCFAllocatorDefault,
|
||||
)
|
||||
sb := C.CFStringCreateWithCharactersNoCopy(
|
||||
C.kCFAllocatorDefault,
|
||||
osxCharP(b.UTF16),
|
||||
C.CFIndex(len(b.UTF16)),
|
||||
C.kCFAllocatorDefault,
|
||||
)
|
||||
_range := C.CFRangeMake(0, C.CFStringGetLength(sa))
|
||||
return int(C.CFStringCompareWithOptionsAndLocale(sa, sb, _range, c.opt, c.loc))
|
||||
}
|
||||
|
||||
func (c osx8Collator) Compare(a, b Input) int {
|
||||
sa := C.CFStringCreateWithBytesNoCopy(
|
||||
C.kCFAllocatorDefault,
|
||||
osxUInt8P(a.UTF8),
|
||||
C.CFIndex(len(a.UTF8)),
|
||||
C.kCFStringEncodingUTF8,
|
||||
C.Boolean(0),
|
||||
C.kCFAllocatorDefault,
|
||||
)
|
||||
sb := C.CFStringCreateWithBytesNoCopy(
|
||||
C.kCFAllocatorDefault,
|
||||
osxUInt8P(b.UTF8),
|
||||
C.CFIndex(len(b.UTF8)),
|
||||
C.kCFStringEncodingUTF8,
|
||||
C.Boolean(0),
|
||||
C.kCFAllocatorDefault,
|
||||
)
|
||||
_range := C.CFRangeMake(0, C.CFStringGetLength(sa))
|
||||
return int(C.CFStringCompareWithOptionsAndLocale(sa, sb, _range, c.opt, c.loc))
|
||||
}
|
183
vendor/golang.org/x/text/collate/tools/colcmp/gen.go
generated
vendored
Normal file
183
vendor/golang.org/x/text/collate/tools/colcmp/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,183 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
// TODO: replace with functionality in language package.
|
||||
// parent computes the parent language for the given language.
|
||||
// It returns false if the parent is already root.
|
||||
func parent(locale string) (parent string, ok bool) {
|
||||
if locale == "und" {
|
||||
return "", false
|
||||
}
|
||||
if i := strings.LastIndex(locale, "-"); i != -1 {
|
||||
return locale[:i], true
|
||||
}
|
||||
return "und", true
|
||||
}
|
||||
|
||||
// rewriter is used to both unique strings and create variants of strings
|
||||
// to add to the test set.
|
||||
type rewriter struct {
|
||||
seen map[string]bool
|
||||
addCases bool
|
||||
}
|
||||
|
||||
func newRewriter() *rewriter {
|
||||
return &rewriter{
|
||||
seen: make(map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rewriter) insert(a []string, s string) []string {
|
||||
if !r.seen[s] {
|
||||
r.seen[s] = true
|
||||
a = append(a, s)
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// rewrite takes a sequence of strings in, adds variants of the these strings
|
||||
// based on options and removes duplicates.
|
||||
func (r *rewriter) rewrite(ss []string) []string {
|
||||
ns := []string{}
|
||||
for _, s := range ss {
|
||||
ns = r.insert(ns, s)
|
||||
if r.addCases {
|
||||
rs := []rune(s)
|
||||
rn := rs[0]
|
||||
for c := unicode.SimpleFold(rn); c != rn; c = unicode.SimpleFold(c) {
|
||||
rs[0] = c
|
||||
ns = r.insert(ns, string(rs))
|
||||
}
|
||||
}
|
||||
}
|
||||
return ns
|
||||
}
|
||||
|
||||
// exemplarySet holds a parsed set of characters from the exemplarCharacters table.
|
||||
type exemplarySet struct {
|
||||
typ exemplarType
|
||||
set []string
|
||||
charIndex int // cumulative total of phrases, including this set
|
||||
}
|
||||
|
||||
type phraseGenerator struct {
|
||||
sets [exN]exemplarySet
|
||||
n int
|
||||
}
|
||||
|
||||
func (g *phraseGenerator) init(id string) {
|
||||
ec := exemplarCharacters
|
||||
loc := language.Make(id).String()
|
||||
// get sets for locale or parent locale if the set is not defined.
|
||||
for i := range g.sets {
|
||||
for p, ok := loc, true; ok; p, ok = parent(p) {
|
||||
if set, ok := ec[p]; ok && set[i] != "" {
|
||||
g.sets[i].set = strings.Split(set[i], " ")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
r := newRewriter()
|
||||
r.addCases = *cases
|
||||
for i := range g.sets {
|
||||
g.sets[i].set = r.rewrite(g.sets[i].set)
|
||||
}
|
||||
// compute indexes
|
||||
for i, set := range g.sets {
|
||||
g.n += len(set.set)
|
||||
g.sets[i].charIndex = g.n
|
||||
}
|
||||
}
|
||||
|
||||
// phrase returns the ith phrase, where i < g.n.
|
||||
func (g *phraseGenerator) phrase(i int) string {
|
||||
for _, set := range g.sets {
|
||||
if i < set.charIndex {
|
||||
return set.set[i-(set.charIndex-len(set.set))]
|
||||
}
|
||||
}
|
||||
panic("index out of range")
|
||||
}
|
||||
|
||||
// generate generates inputs by combining all pairs of examplar strings.
|
||||
// If doNorm is true, all input strings are normalized to NFC.
|
||||
// TODO: allow other variations, statistical models, and random
|
||||
// trailing sequences.
|
||||
func (g *phraseGenerator) generate(doNorm bool) []Input {
|
||||
const (
|
||||
M = 1024 * 1024
|
||||
buf8Size = 30 * M
|
||||
buf16Size = 10 * M
|
||||
)
|
||||
// TODO: use a better way to limit the input size.
|
||||
if sq := int(math.Sqrt(float64(*limit))); g.n > sq {
|
||||
g.n = sq
|
||||
}
|
||||
size := g.n * g.n
|
||||
a := make([]Input, 0, size)
|
||||
buf8 := make([]byte, 0, buf8Size)
|
||||
buf16 := make([]uint16, 0, buf16Size)
|
||||
|
||||
addInput := func(str string) {
|
||||
buf8 = buf8[len(buf8):]
|
||||
buf16 = buf16[len(buf16):]
|
||||
if len(str) > cap(buf8) {
|
||||
buf8 = make([]byte, 0, buf8Size)
|
||||
}
|
||||
if len(str) > cap(buf16) {
|
||||
buf16 = make([]uint16, 0, buf16Size)
|
||||
}
|
||||
if doNorm {
|
||||
buf8 = norm.NFD.AppendString(buf8, str)
|
||||
} else {
|
||||
buf8 = append(buf8, str...)
|
||||
}
|
||||
buf16 = appendUTF16(buf16, buf8)
|
||||
a = append(a, makeInput(buf8, buf16))
|
||||
}
|
||||
for i := 0; i < g.n; i++ {
|
||||
p1 := g.phrase(i)
|
||||
addInput(p1)
|
||||
for j := 0; j < g.n; j++ {
|
||||
p2 := g.phrase(j)
|
||||
addInput(p1 + p2)
|
||||
}
|
||||
}
|
||||
// permutate
|
||||
rnd := rand.New(rand.NewSource(int64(rand.Int())))
|
||||
for i := range a {
|
||||
j := i + rnd.Intn(len(a)-i)
|
||||
a[i], a[j] = a[j], a[i]
|
||||
a[i].index = i // allow restoring this order if input is used multiple times.
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func appendUTF16(buf []uint16, s []byte) []uint16 {
|
||||
for len(s) > 0 {
|
||||
r, sz := utf8.DecodeRune(s)
|
||||
s = s[sz:]
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
if r1 != 0xFFFD {
|
||||
buf = append(buf, uint16(r1), uint16(r2))
|
||||
} else {
|
||||
buf = append(buf, uint16(r))
|
||||
}
|
||||
}
|
||||
return buf
|
||||
}
|
209
vendor/golang.org/x/text/collate/tools/colcmp/icu.go
generated
vendored
Normal file
209
vendor/golang.org/x/text/collate/tools/colcmp/icu.go
generated
vendored
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build icu
|
||||
|
||||
package main
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -licui18n -licuuc
|
||||
#include <stdlib.h>
|
||||
#include <unicode/ucol.h>
|
||||
#include <unicode/uiter.h>
|
||||
#include <unicode/utypes.h>
|
||||
*/
|
||||
import "C"
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func init() {
|
||||
AddFactory(CollatorFactory{"icu", newUTF16,
|
||||
"Main ICU collator, using native strings."})
|
||||
AddFactory(CollatorFactory{"icu8", newUTF8iter,
|
||||
"ICU collator using ICU iterators to process UTF8."})
|
||||
AddFactory(CollatorFactory{"icu16", newUTF8conv,
|
||||
"ICU collation by first converting UTF8 to UTF16."})
|
||||
}
|
||||
|
||||
func icuCharP(s []byte) *C.char {
|
||||
return (*C.char)(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
|
||||
func icuUInt8P(s []byte) *C.uint8_t {
|
||||
return (*C.uint8_t)(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
|
||||
func icuUCharP(s []uint16) *C.UChar {
|
||||
return (*C.UChar)(unsafe.Pointer(&s[0]))
|
||||
}
|
||||
func icuULen(s []uint16) C.int32_t {
|
||||
return C.int32_t(len(s))
|
||||
}
|
||||
func icuSLen(s []byte) C.int32_t {
|
||||
return C.int32_t(len(s))
|
||||
}
|
||||
|
||||
// icuCollator implements a Collator based on ICU.
|
||||
type icuCollator struct {
|
||||
loc *C.char
|
||||
col *C.UCollator
|
||||
keyBuf []byte
|
||||
}
|
||||
|
||||
const growBufSize = 10 * 1024 * 1024
|
||||
|
||||
func (c *icuCollator) init(locale string) error {
|
||||
err := C.UErrorCode(0)
|
||||
c.loc = C.CString(locale)
|
||||
c.col = C.ucol_open(c.loc, &err)
|
||||
if err > 0 {
|
||||
return fmt.Errorf("failed opening collator for %q", locale)
|
||||
} else if err < 0 {
|
||||
loc := C.ucol_getLocaleByType(c.col, 0, &err)
|
||||
fmt, ok := map[int]string{
|
||||
-127: "warning: using default collator: %s",
|
||||
-128: "warning: using fallback collator: %s",
|
||||
}[int(err)]
|
||||
if ok {
|
||||
log.Printf(fmt, C.GoString(loc))
|
||||
}
|
||||
}
|
||||
c.keyBuf = make([]byte, 0, growBufSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *icuCollator) buf() (*C.uint8_t, C.int32_t) {
|
||||
if len(c.keyBuf) == cap(c.keyBuf) {
|
||||
c.keyBuf = make([]byte, 0, growBufSize)
|
||||
}
|
||||
b := c.keyBuf[len(c.keyBuf):cap(c.keyBuf)]
|
||||
return icuUInt8P(b), icuSLen(b)
|
||||
}
|
||||
|
||||
func (c *icuCollator) extendBuf(n C.int32_t) []byte {
|
||||
end := len(c.keyBuf) + int(n)
|
||||
if end > cap(c.keyBuf) {
|
||||
if len(c.keyBuf) == 0 {
|
||||
log.Fatalf("icuCollator: max string size exceeded: %v > %v", n, growBufSize)
|
||||
}
|
||||
c.keyBuf = make([]byte, 0, growBufSize)
|
||||
return nil
|
||||
}
|
||||
b := c.keyBuf[len(c.keyBuf):end]
|
||||
c.keyBuf = c.keyBuf[:end]
|
||||
return b
|
||||
}
|
||||
|
||||
func (c *icuCollator) Close() error {
|
||||
C.ucol_close(c.col)
|
||||
C.free(unsafe.Pointer(c.loc))
|
||||
return nil
|
||||
}
|
||||
|
||||
// icuUTF16 implements the Collator interface.
|
||||
type icuUTF16 struct {
|
||||
icuCollator
|
||||
}
|
||||
|
||||
func newUTF16(locale string) (Collator, error) {
|
||||
c := &icuUTF16{}
|
||||
return c, c.init(locale)
|
||||
}
|
||||
|
||||
func (c *icuUTF16) Compare(a, b Input) int {
|
||||
return int(C.ucol_strcoll(c.col, icuUCharP(a.UTF16), icuULen(a.UTF16), icuUCharP(b.UTF16), icuULen(b.UTF16)))
|
||||
}
|
||||
|
||||
func (c *icuUTF16) Key(s Input) []byte {
|
||||
bp, bn := c.buf()
|
||||
n := C.ucol_getSortKey(c.col, icuUCharP(s.UTF16), icuULen(s.UTF16), bp, bn)
|
||||
if b := c.extendBuf(n); b != nil {
|
||||
return b
|
||||
}
|
||||
return c.Key(s)
|
||||
}
|
||||
|
||||
// icuUTF8iter implements the Collator interface
|
||||
// This implementation wraps the UTF8 string in an iterator
|
||||
// which is passed to the collator.
|
||||
type icuUTF8iter struct {
|
||||
icuCollator
|
||||
a, b C.UCharIterator
|
||||
}
|
||||
|
||||
func newUTF8iter(locale string) (Collator, error) {
|
||||
c := &icuUTF8iter{}
|
||||
return c, c.init(locale)
|
||||
}
|
||||
|
||||
func (c *icuUTF8iter) Compare(a, b Input) int {
|
||||
err := C.UErrorCode(0)
|
||||
C.uiter_setUTF8(&c.a, icuCharP(a.UTF8), icuSLen(a.UTF8))
|
||||
C.uiter_setUTF8(&c.b, icuCharP(b.UTF8), icuSLen(b.UTF8))
|
||||
return int(C.ucol_strcollIter(c.col, &c.a, &c.b, &err))
|
||||
}
|
||||
|
||||
func (c *icuUTF8iter) Key(s Input) []byte {
|
||||
err := C.UErrorCode(0)
|
||||
state := [2]C.uint32_t{}
|
||||
C.uiter_setUTF8(&c.a, icuCharP(s.UTF8), icuSLen(s.UTF8))
|
||||
bp, bn := c.buf()
|
||||
n := C.ucol_nextSortKeyPart(c.col, &c.a, &(state[0]), bp, bn, &err)
|
||||
if n >= bn {
|
||||
// Force failure.
|
||||
if c.extendBuf(n+1) != nil {
|
||||
log.Fatal("expected extension to fail")
|
||||
}
|
||||
return c.Key(s)
|
||||
}
|
||||
return c.extendBuf(n)
|
||||
}
|
||||
|
||||
// icuUTF8conv implements the Collator interface.
|
||||
// This implementation first converts the give UTF8 string
|
||||
// to UTF16 and then calls the main ICU collation function.
|
||||
type icuUTF8conv struct {
|
||||
icuCollator
|
||||
}
|
||||
|
||||
func newUTF8conv(locale string) (Collator, error) {
|
||||
c := &icuUTF8conv{}
|
||||
return c, c.init(locale)
|
||||
}
|
||||
|
||||
func (c *icuUTF8conv) Compare(sa, sb Input) int {
|
||||
a := encodeUTF16(sa.UTF8)
|
||||
b := encodeUTF16(sb.UTF8)
|
||||
return int(C.ucol_strcoll(c.col, icuUCharP(a), icuULen(a), icuUCharP(b), icuULen(b)))
|
||||
}
|
||||
|
||||
func (c *icuUTF8conv) Key(s Input) []byte {
|
||||
a := encodeUTF16(s.UTF8)
|
||||
bp, bn := c.buf()
|
||||
n := C.ucol_getSortKey(c.col, icuUCharP(a), icuULen(a), bp, bn)
|
||||
if b := c.extendBuf(n); b != nil {
|
||||
return b
|
||||
}
|
||||
return c.Key(s)
|
||||
}
|
||||
|
||||
func encodeUTF16(b []byte) []uint16 {
|
||||
a := []uint16{}
|
||||
for len(b) > 0 {
|
||||
r, sz := utf8.DecodeRune(b)
|
||||
b = b[sz:]
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
if r1 != 0xFFFD {
|
||||
a = append(a, uint16(r1), uint16(r2))
|
||||
} else {
|
||||
a = append(a, uint16(r))
|
||||
}
|
||||
}
|
||||
return a
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue