Checking in vendor folder for ease of using go get.
This commit is contained in:
parent
7a1251853b
commit
cdb4b5a1d0
3554 changed files with 1270116 additions and 0 deletions
35
vendor/golang.org/x/text/search/index.go
generated
vendored
Normal file
35
vendor/golang.org/x/text/search/index.go
generated
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Note: this file is identical to the file text/collate/index.go. Both files
|
||||
// will be removed when the new colltab package is finished and in use.
|
||||
|
||||
package search
|
||||
|
||||
import "golang.org/x/text/internal/colltab"
|
||||
|
||||
const blockSize = 64
|
||||
|
||||
func getTable(t tableIndex) *colltab.Table {
|
||||
return &colltab.Table{
|
||||
Index: colltab.Trie{
|
||||
Index0: mainLookup[:][blockSize*t.lookupOffset:],
|
||||
Values0: mainValues[:][blockSize*t.valuesOffset:],
|
||||
Index: mainLookup[:],
|
||||
Values: mainValues[:],
|
||||
},
|
||||
ExpandElem: mainExpandElem[:],
|
||||
ContractTries: colltab.ContractTrieSet(mainCTEntries[:]),
|
||||
ContractElem: mainContractElem[:],
|
||||
MaxContractLen: 18,
|
||||
VariableTop: varTop,
|
||||
}
|
||||
}
|
||||
|
||||
// tableIndex holds information for constructing a table
|
||||
// for a certain locale based on the main table.
|
||||
type tableIndex struct {
|
||||
lookupOffset uint32
|
||||
valuesOffset uint32
|
||||
}
|
155
vendor/golang.org/x/text/search/pattern.go
generated
vendored
Normal file
155
vendor/golang.org/x/text/search/pattern.go
generated
vendored
Normal file
|
@ -0,0 +1,155 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"golang.org/x/text/internal/colltab"
|
||||
)
|
||||
|
||||
// TODO: handle variable primary weights?
|
||||
|
||||
func (p *Pattern) deleteEmptyElements() {
|
||||
k := 0
|
||||
for _, e := range p.ce {
|
||||
if !isIgnorable(p.m, e) {
|
||||
p.ce[k] = e
|
||||
k++
|
||||
}
|
||||
}
|
||||
p.ce = p.ce[:k]
|
||||
}
|
||||
|
||||
func isIgnorable(m *Matcher, e colltab.Elem) bool {
|
||||
if e.Primary() > 0 {
|
||||
return false
|
||||
}
|
||||
if e.Secondary() > 0 {
|
||||
if !m.ignoreDiacritics {
|
||||
return false
|
||||
}
|
||||
// Primary value is 0 and ignoreDiacritics is true. In this case we
|
||||
// ignore the tertiary element, as it only pertains to the modifier.
|
||||
return true
|
||||
}
|
||||
// TODO: further distinguish once we have the new implementation.
|
||||
if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
|
||||
return false
|
||||
}
|
||||
// TODO: we ignore the Quaternary level for now.
|
||||
return true
|
||||
}
|
||||
|
||||
// TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
|
||||
|
||||
func (p *Pattern) forwardSearch(it *colltab.Iter) (start, end int) {
|
||||
for start := 0; it.Next(); it.Reset(start) {
|
||||
nextStart := it.End()
|
||||
if end := p.searchOnce(it); end != -1 {
|
||||
return start, end
|
||||
}
|
||||
start = nextStart
|
||||
}
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
func (p *Pattern) anchoredForwardSearch(it *colltab.Iter) (start, end int) {
|
||||
if it.Next() {
|
||||
if end := p.searchOnce(it); end != -1 {
|
||||
return 0, end
|
||||
}
|
||||
}
|
||||
return -1, -1
|
||||
}
|
||||
|
||||
// next advances to the next weight in a pattern. f must return one of the
|
||||
// weights of a collation element. next will advance to the first non-zero
|
||||
// weight and return this weight and true if it exists, or 0, false otherwise.
|
||||
func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
|
||||
for *i < len(p.ce) {
|
||||
v := f(p.ce[*i])
|
||||
*i++
|
||||
if v != 0 {
|
||||
// Skip successive ignorable values.
|
||||
for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
|
||||
}
|
||||
return v, true
|
||||
}
|
||||
}
|
||||
return 0, false
|
||||
}
|
||||
|
||||
// TODO: remove this function once Elem is internal and Tertiary returns int.
|
||||
func tertiary(e colltab.Elem) int {
|
||||
return int(e.Tertiary())
|
||||
}
|
||||
|
||||
// searchOnce tries to match the pattern s.p at the text position i. s.buf needs
|
||||
// to be filled with collation elements of the first segment, where n is the
|
||||
// number of source bytes consumed for this segment. It will return the end
|
||||
// position of the match or -1.
|
||||
func (p *Pattern) searchOnce(it *colltab.Iter) (end int) {
|
||||
var pLevel [4]int
|
||||
|
||||
m := p.m
|
||||
for {
|
||||
k := 0
|
||||
for ; k < it.N; k++ {
|
||||
if v := it.Elems[k].Primary(); v > 0 {
|
||||
if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
if !m.ignoreDiacritics {
|
||||
if v := it.Elems[k].Secondary(); v > 0 {
|
||||
if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
} else if it.Elems[k].Primary() == 0 {
|
||||
// We ignore tertiary values of collation elements of the
|
||||
// secondary level.
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO: distinguish between case and width. This will be easier to
|
||||
// implement after we moved to the new collation implementation.
|
||||
if !m.ignoreWidth && !m.ignoreCase {
|
||||
if v := it.Elems[k].Tertiary(); v > 0 {
|
||||
if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: check quaternary weight
|
||||
}
|
||||
it.Discard() // Remove the current segment from the buffer.
|
||||
|
||||
// Check for completion.
|
||||
switch {
|
||||
// If any of these cases match, we are not at the end.
|
||||
case pLevel[0] < len(p.ce):
|
||||
case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
|
||||
case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
|
||||
default:
|
||||
// At this point, both the segment and pattern has matched fully.
|
||||
// However, the segment may still be have trailing modifiers.
|
||||
// This can be verified by another call to next.
|
||||
end = it.End()
|
||||
if it.Next() && it.Elems[0].Primary() == 0 {
|
||||
if !m.ignoreDiacritics {
|
||||
return -1
|
||||
}
|
||||
end = it.End()
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Fill the buffer with the next batch of collation elements.
|
||||
if !it.Next() {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
}
|
357
vendor/golang.org/x/text/search/pattern_test.go
generated
vendored
Normal file
357
vendor/golang.org/x/text/search/pattern_test.go
generated
vendored
Normal file
|
@ -0,0 +1,357 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
func TestCompile(t *testing.T) {
|
||||
for i, tc := range []struct {
|
||||
desc string
|
||||
pattern string
|
||||
options []Option
|
||||
n int
|
||||
}{{
|
||||
desc: "empty",
|
||||
pattern: "",
|
||||
n: 0,
|
||||
}, {
|
||||
desc: "single",
|
||||
pattern: "a",
|
||||
n: 1,
|
||||
}, {
|
||||
desc: "keep modifier",
|
||||
pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
|
||||
n: 2,
|
||||
}, {
|
||||
desc: "remove modifier",
|
||||
pattern: "a\u0300", // U+0300: COMBINING GRAVE ACCENT
|
||||
options: []Option{IgnoreDiacritics},
|
||||
n: 1,
|
||||
}, {
|
||||
desc: "single with double collation element",
|
||||
pattern: "ä",
|
||||
n: 2,
|
||||
}, {
|
||||
desc: "leading variable",
|
||||
pattern: " a",
|
||||
n: 2,
|
||||
}, {
|
||||
desc: "trailing variable",
|
||||
pattern: "aa ",
|
||||
n: 3,
|
||||
}, {
|
||||
desc: "leading and trailing variable",
|
||||
pattern: " äb ",
|
||||
n: 5,
|
||||
}, {
|
||||
desc: "keep interior variable",
|
||||
pattern: " ä b ",
|
||||
n: 6,
|
||||
}, {
|
||||
desc: "keep interior variables",
|
||||
pattern: " b ä ",
|
||||
n: 7,
|
||||
}, {
|
||||
desc: "remove ignoreables (zero-weights across the board)",
|
||||
pattern: "\u009Db\u009Dä\u009D", // U+009D: OPERATING SYSTEM COMMAND
|
||||
n: 3,
|
||||
}} {
|
||||
m := New(language.Und, tc.options...)
|
||||
p := m.CompileString(tc.pattern)
|
||||
if len(p.ce) != tc.n {
|
||||
t.Errorf("%d:%s: Compile(%+q): got %d; want %d", i, tc.desc, tc.pattern, len(p.ce), tc.n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNorm(t *testing.T) {
|
||||
// U+0300: COMBINING GRAVE ACCENT (CCC=230)
|
||||
// U+031B: COMBINING HORN (CCC=216)
|
||||
for _, tc := range []struct {
|
||||
desc string
|
||||
a string
|
||||
b string
|
||||
want bool // a and b compile into the same pattern?
|
||||
}{{
|
||||
"simple",
|
||||
"eee\u0300\u031b",
|
||||
"eee\u031b\u0300",
|
||||
true,
|
||||
}, {
|
||||
"large number of modifiers in pattern",
|
||||
strings.Repeat("\u0300", 29) + "\u0318",
|
||||
"\u0318" + strings.Repeat("\u0300", 29),
|
||||
true,
|
||||
}, {
|
||||
"modifier overflow in pattern",
|
||||
strings.Repeat("\u0300", 30) + "\u0318",
|
||||
"\u0318" + strings.Repeat("\u0300", 30),
|
||||
false,
|
||||
}} {
|
||||
m := New(language.Und)
|
||||
a := m.CompileString(tc.a)
|
||||
b := m.CompileString(tc.b)
|
||||
if got := reflect.DeepEqual(a, b); got != tc.want {
|
||||
t.Errorf("Compile(a) == Compile(b) == %v; want %v", got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestForwardSearch(t *testing.T) {
|
||||
for i, tc := range []struct {
|
||||
desc string
|
||||
tag string
|
||||
options []Option
|
||||
pattern string
|
||||
text string
|
||||
want []int
|
||||
}{{
|
||||
// The semantics of an empty search is to match nothing.
|
||||
// TODO: change this to be in line with strings.Index? It is quite a
|
||||
// different beast, so not sure yet.
|
||||
|
||||
desc: "empty pattern and text",
|
||||
tag: "und",
|
||||
pattern: "",
|
||||
text: "",
|
||||
want: nil, // TODO: consider: []int{0, 0},
|
||||
}, {
|
||||
desc: "non-empty pattern and empty text",
|
||||
tag: "und",
|
||||
pattern: " ",
|
||||
text: "",
|
||||
want: nil,
|
||||
}, {
|
||||
desc: "empty pattern and non-empty text",
|
||||
tag: "und",
|
||||
pattern: "",
|
||||
text: "abc",
|
||||
want: nil, // TODO: consider: []int{0, 0, 1, 1, 2, 2, 3, 3},
|
||||
}, {
|
||||
// Variable-only patterns. We don't support variables at the moment,
|
||||
// but verify that, given this, the behavior is indeed as expected.
|
||||
|
||||
desc: "exact match of variable",
|
||||
tag: "und",
|
||||
pattern: " ",
|
||||
text: " ",
|
||||
want: []int{0, 1},
|
||||
}, {
|
||||
desc: "variables not handled by default",
|
||||
tag: "und",
|
||||
pattern: "- ",
|
||||
text: " -",
|
||||
want: nil, // Would be (1, 2) for a median match with variable}.
|
||||
}, {
|
||||
desc: "multiple subsequent identical variables",
|
||||
tag: "und",
|
||||
pattern: " ",
|
||||
text: " ",
|
||||
want: []int{0, 1, 1, 2, 2, 3, 3, 4},
|
||||
}, {
|
||||
desc: "text with variables",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
pattern: "abc",
|
||||
text: "3 abc 3",
|
||||
want: []int{2, 5},
|
||||
}, {
|
||||
desc: "pattern with interior variables",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
pattern: "a b c",
|
||||
text: "3 a b c abc a b c 3",
|
||||
want: []int{2, 7}, // Would have 3 matches using variable.
|
||||
|
||||
// TODO: Different variable handling settings.
|
||||
}, {
|
||||
// Options.
|
||||
|
||||
desc: "match all levels",
|
||||
tag: "und",
|
||||
pattern: "Abc",
|
||||
text: "abcAbcABCÁbcábc",
|
||||
want: []int{3, 6},
|
||||
}, {
|
||||
desc: "ignore diacritics in text",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
pattern: "Abc",
|
||||
text: "Ábc",
|
||||
want: []int{0, 4},
|
||||
}, {
|
||||
desc: "ignore diacritics in pattern",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
pattern: "Ábc",
|
||||
text: "Abc",
|
||||
want: []int{0, 3},
|
||||
}, {
|
||||
desc: "ignore diacritics",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
pattern: "Abc",
|
||||
text: "abcAbcABCÁbcábc",
|
||||
want: []int{3, 6, 9, 13},
|
||||
}, {
|
||||
desc: "ignore case",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreCase},
|
||||
pattern: "Abc",
|
||||
text: "abcAbcABCÁbcábc",
|
||||
want: []int{0, 3, 3, 6, 6, 9},
|
||||
}, {
|
||||
desc: "ignore case and diacritics",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreCase, IgnoreDiacritics},
|
||||
pattern: "Abc",
|
||||
text: "abcAbcABCÁbcábc",
|
||||
want: []int{0, 3, 3, 6, 6, 9, 9, 13, 13, 17},
|
||||
}, {
|
||||
desc: "ignore width to fullwidth",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreWidth},
|
||||
pattern: "abc",
|
||||
text: "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
|
||||
want: []int{4, 13},
|
||||
}, {
|
||||
// TODO: distinguish between case and width.
|
||||
desc: "don't ignore width to fullwidth, ignoring only case",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreCase},
|
||||
pattern: "abc",
|
||||
text: "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
|
||||
want: []int{4, 13},
|
||||
}, {
|
||||
desc: "ignore width to fullwidth and diacritics",
|
||||
tag: "und",
|
||||
options: []Option{IgnoreWidth, IgnoreDiacritics},
|
||||
pattern: "abc",
|
||||
text: "123 \uFF41\uFF42\uFF43 123", // U+FF41-3: FULLWIDTH LATIN SMALL LETTER A-C
|
||||
want: []int{4, 13},
|
||||
}, {
|
||||
desc: "whole grapheme, single rune",
|
||||
tag: "und",
|
||||
pattern: "eee",
|
||||
text: "123 eeé 123",
|
||||
want: nil,
|
||||
}, {
|
||||
// Note: rules on when to apply contractions may, for certain languages,
|
||||
// differ between search and collation. For example, "ch" is not
|
||||
// considered a contraction for the purpose of searching in Spanish.
|
||||
// Therefore, be careful picking this test.
|
||||
desc: "whole grapheme, contractions",
|
||||
tag: "da",
|
||||
pattern: "aba",
|
||||
// Fails at the primary level, because "aa" is a contraction.
|
||||
text: "123 abaa 123",
|
||||
want: []int{},
|
||||
}, {
|
||||
desc: "whole grapheme, trailing modifier",
|
||||
tag: "und",
|
||||
pattern: "eee",
|
||||
text: "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
|
||||
want: nil,
|
||||
}, {
|
||||
// Language-specific matching.
|
||||
|
||||
desc: "",
|
||||
tag: "da",
|
||||
options: []Option{IgnoreCase},
|
||||
pattern: "Århus",
|
||||
text: "AarhusÅrhus Århus ",
|
||||
want: []int{0, 6, 6, 12, 14, 20},
|
||||
}, {
|
||||
desc: "",
|
||||
tag: "da",
|
||||
options: []Option{IgnoreCase},
|
||||
pattern: "Aarhus",
|
||||
text: "Århus Aarhus",
|
||||
want: []int{0, 6, 7, 13},
|
||||
}, {
|
||||
desc: "",
|
||||
tag: "en", // Å does not match A for English.
|
||||
options: []Option{IgnoreCase},
|
||||
pattern: "Aarhus",
|
||||
text: "Århus",
|
||||
want: nil,
|
||||
}, {
|
||||
desc: "ignore modifier in text",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
tag: "und",
|
||||
pattern: "eee",
|
||||
text: "123 eee\u0300 123", // U+0300: COMBINING GRAVE ACCENT
|
||||
want: []int{4, 9}, // Matches on grapheme boundary.
|
||||
}, {
|
||||
desc: "ignore multiple modifiers in text",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
tag: "und",
|
||||
pattern: "eee",
|
||||
text: "123 eee\u0300\u0300 123", // U+0300: COMBINING GRAVE ACCENT
|
||||
want: []int{4, 11}, // Matches on grapheme boundary.
|
||||
}, {
|
||||
desc: "ignore modifier in pattern",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
tag: "und",
|
||||
pattern: "eee\u0300", // U+0300: COMBINING GRAVE ACCENT
|
||||
text: "123 eee 123",
|
||||
want: []int{4, 7},
|
||||
}, {
|
||||
desc: "ignore multiple modifiers in pattern",
|
||||
options: []Option{IgnoreDiacritics},
|
||||
tag: "und",
|
||||
pattern: "eee\u0300\u0300", // U+0300: COMBINING GRAVE ACCENT
|
||||
text: "123 eee 123",
|
||||
want: []int{4, 7},
|
||||
}, {
|
||||
desc: "match non-normalized pattern",
|
||||
tag: "und",
|
||||
// U+0300: COMBINING GRAVE ACCENT (CCC=230)
|
||||
// U+031B: COMBINING HORN (CCC=216)
|
||||
pattern: "eee\u0300\u031b",
|
||||
text: "123 eee\u031b\u0300 123",
|
||||
want: []int{4, 11},
|
||||
}, {
|
||||
desc: "match non-normalized text",
|
||||
tag: "und",
|
||||
// U+0300: COMBINING GRAVE ACCENT (CCC=230)
|
||||
// U+031B: COMBINING HORN (CCC=216)
|
||||
pattern: "eee\u031b\u0300",
|
||||
text: "123 eee\u0300\u031b 123",
|
||||
want: []int{4, 11},
|
||||
}} {
|
||||
m := New(language.MustParse(tc.tag), tc.options...)
|
||||
p := m.CompileString(tc.pattern)
|
||||
for j := 0; j < len(tc.text); {
|
||||
start, end := p.IndexString(tc.text[j:])
|
||||
if start == -1 && end == -1 {
|
||||
j++
|
||||
continue
|
||||
}
|
||||
start += j
|
||||
end += j
|
||||
j = end
|
||||
if len(tc.want) == 0 {
|
||||
t.Errorf("%d:%s: found unexpected result [%d %d]", i, tc.desc, start, end)
|
||||
break
|
||||
}
|
||||
if tc.want[0] != start || tc.want[1] != end {
|
||||
t.Errorf("%d:%s: got [%d %d]; want %v", i, tc.desc, start, end, tc.want[:2])
|
||||
tc.want = tc.want[2:]
|
||||
break
|
||||
}
|
||||
tc.want = tc.want[2:]
|
||||
}
|
||||
if len(tc.want) != 0 {
|
||||
t.Errorf("%d:%s: %d extra results", i, tc.desc, len(tc.want)/2)
|
||||
}
|
||||
}
|
||||
}
|
237
vendor/golang.org/x/text/search/search.go
generated
vendored
Normal file
237
vendor/golang.org/x/text/search/search.go
generated
vendored
Normal file
|
@ -0,0 +1,237 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run ../collate/maketables.go -cldr=23 -unicode=6.2.0 -types=search,searchjl -package=search
|
||||
|
||||
// Package search provides language-specific search and string matching.
|
||||
//
|
||||
// Natural language matching can be intricate. For example, Danish will insist
|
||||
// "Århus" and "Aarhus" are the same name and Turkish will match I to ı (note
|
||||
// the lack of a dot) in a case-insensitive match. This package handles such
|
||||
// language-specific details.
|
||||
//
|
||||
// Text passed to any of the calls in this message does not need to be
|
||||
// normalized.
|
||||
package search // import "golang.org/x/text/search"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/colltab"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
// An Option configures a Matcher.
|
||||
type Option func(*Matcher)
|
||||
|
||||
var (
|
||||
// WholeWord restricts matches to complete words. The default is to match at
|
||||
// the character level.
|
||||
WholeWord Option = nil
|
||||
|
||||
// Exact requires that two strings are their exact equivalent. For example
|
||||
// å would not match aa in Danish. It overrides any of the ignore options.
|
||||
Exact Option = nil
|
||||
|
||||
// Loose causes case, diacritics and width to be ignored.
|
||||
Loose Option = loose
|
||||
|
||||
// IgnoreCase enables case-insensitive search.
|
||||
IgnoreCase Option = ignoreCase
|
||||
|
||||
// IgnoreDiacritics causes diacritics to be ignored ("ö" == "o").
|
||||
IgnoreDiacritics Option = ignoreDiacritics
|
||||
|
||||
// IgnoreWidth equates narrow with wide variants.
|
||||
IgnoreWidth Option = ignoreWidth
|
||||
)
|
||||
|
||||
func ignoreDiacritics(m *Matcher) { m.ignoreDiacritics = true }
|
||||
func ignoreCase(m *Matcher) { m.ignoreCase = true }
|
||||
func ignoreWidth(m *Matcher) { m.ignoreWidth = true }
|
||||
func loose(m *Matcher) {
|
||||
ignoreDiacritics(m)
|
||||
ignoreCase(m)
|
||||
ignoreWidth(m)
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported lists the languages for which search differs from its parent.
|
||||
Supported language.Coverage
|
||||
|
||||
tags []language.Tag
|
||||
)
|
||||
|
||||
func init() {
|
||||
ids := strings.Split(availableLocales, ",")
|
||||
tags = make([]language.Tag, len(ids))
|
||||
for i, s := range ids {
|
||||
tags[i] = language.Raw.MustParse(s)
|
||||
}
|
||||
Supported = language.NewCoverage(tags)
|
||||
}
|
||||
|
||||
// New returns a new Matcher for the given language and options.
|
||||
func New(t language.Tag, opts ...Option) *Matcher {
|
||||
m := &Matcher{
|
||||
w: getTable(locales[colltab.MatchLang(t, tags)]),
|
||||
}
|
||||
for _, f := range opts {
|
||||
f(m)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// A Matcher implements language-specific string matching.
|
||||
type Matcher struct {
|
||||
w colltab.Weighter
|
||||
ignoreCase bool
|
||||
ignoreWidth bool
|
||||
ignoreDiacritics bool
|
||||
}
|
||||
|
||||
// An IndexOption specifies how the Index methods of Pattern or Matcher should
|
||||
// match the input.
|
||||
type IndexOption byte
|
||||
|
||||
const (
|
||||
// Anchor restricts the search to the start (or end for Backwards) of the
|
||||
// text.
|
||||
Anchor IndexOption = 1 << iota
|
||||
|
||||
// Backwards starts the search from the end of the text.
|
||||
Backwards
|
||||
|
||||
anchorBackwards = Anchor | Backwards
|
||||
)
|
||||
|
||||
// Index reports the start and end position of the first occurrence of pat in b
|
||||
// or -1, -1 if pat is not present.
|
||||
func (m *Matcher) Index(b, pat []byte, opts ...IndexOption) (start, end int) {
|
||||
// TODO: implement optimized version that does not use a pattern.
|
||||
return m.Compile(pat).Index(b, opts...)
|
||||
}
|
||||
|
||||
// IndexString reports the start and end position of the first occurrence of pat
|
||||
// in s or -1, -1 if pat is not present.
|
||||
func (m *Matcher) IndexString(s, pat string, opts ...IndexOption) (start, end int) {
|
||||
// TODO: implement optimized version that does not use a pattern.
|
||||
return m.CompileString(pat).IndexString(s, opts...)
|
||||
}
|
||||
|
||||
// Equal reports whether a and b are equivalent.
|
||||
func (m *Matcher) Equal(a, b []byte) bool {
|
||||
_, end := m.Index(a, b, Anchor)
|
||||
return end == len(a)
|
||||
}
|
||||
|
||||
// EqualString reports whether a and b are equivalent.
|
||||
func (m *Matcher) EqualString(a, b string) bool {
|
||||
_, end := m.IndexString(a, b, Anchor)
|
||||
return end == len(a)
|
||||
}
|
||||
|
||||
// Compile compiles and returns a pattern that can be used for faster searching.
|
||||
func (m *Matcher) Compile(b []byte) *Pattern {
|
||||
p := &Pattern{m: m}
|
||||
iter := colltab.Iter{Weighter: m.w}
|
||||
for iter.SetInput(b); iter.Next(); {
|
||||
}
|
||||
p.ce = iter.Elems
|
||||
p.deleteEmptyElements()
|
||||
return p
|
||||
}
|
||||
|
||||
// CompileString compiles and returns a pattern that can be used for faster
|
||||
// searching.
|
||||
func (m *Matcher) CompileString(s string) *Pattern {
|
||||
p := &Pattern{m: m}
|
||||
iter := colltab.Iter{Weighter: m.w}
|
||||
for iter.SetInputString(s); iter.Next(); {
|
||||
}
|
||||
p.ce = iter.Elems
|
||||
p.deleteEmptyElements()
|
||||
return p
|
||||
}
|
||||
|
||||
// A Pattern is a compiled search string. It is safe for concurrent use.
|
||||
type Pattern struct {
|
||||
m *Matcher
|
||||
ce []colltab.Elem
|
||||
}
|
||||
|
||||
// Design note (TODO remove):
|
||||
// The cost of retrieving collation elements for each rune, which is used for
|
||||
// search as well, is not trivial. Also, algorithms like Boyer-Moore and
|
||||
// Sunday require some additional precomputing.
|
||||
|
||||
// Index reports the start and end position of the first occurrence of p in b
|
||||
// or -1, -1 if p is not present.
|
||||
func (p *Pattern) Index(b []byte, opts ...IndexOption) (start, end int) {
|
||||
// Pick a large enough buffer such that we likely do not need to allocate
|
||||
// and small enough to not cause too much overhead initializing.
|
||||
var buf [8]colltab.Elem
|
||||
|
||||
it := &colltab.Iter{
|
||||
Weighter: p.m.w,
|
||||
Elems: buf[:0],
|
||||
}
|
||||
it.SetInput(b)
|
||||
|
||||
var optMask IndexOption
|
||||
for _, o := range opts {
|
||||
optMask |= o
|
||||
}
|
||||
|
||||
switch optMask {
|
||||
case 0:
|
||||
return p.forwardSearch(it)
|
||||
case Anchor:
|
||||
return p.anchoredForwardSearch(it)
|
||||
case Backwards, anchorBackwards:
|
||||
panic("TODO: implement")
|
||||
default:
|
||||
panic("unrecognized option")
|
||||
}
|
||||
}
|
||||
|
||||
// IndexString reports the start and end position of the first occurrence of p
|
||||
// in s or -1, -1 if p is not present.
|
||||
func (p *Pattern) IndexString(s string, opts ...IndexOption) (start, end int) {
|
||||
// Pick a large enough buffer such that we likely do not need to allocate
|
||||
// and small enough to not cause too much overhead initializing.
|
||||
var buf [8]colltab.Elem
|
||||
|
||||
it := &colltab.Iter{
|
||||
Weighter: p.m.w,
|
||||
Elems: buf[:0],
|
||||
}
|
||||
it.SetInputString(s)
|
||||
|
||||
var optMask IndexOption
|
||||
for _, o := range opts {
|
||||
optMask |= o
|
||||
}
|
||||
|
||||
switch optMask {
|
||||
case 0:
|
||||
return p.forwardSearch(it)
|
||||
case Anchor:
|
||||
return p.anchoredForwardSearch(it)
|
||||
case Backwards, anchorBackwards:
|
||||
panic("TODO: implement")
|
||||
default:
|
||||
panic("unrecognized option")
|
||||
}
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// - Maybe IndexAll methods (probably not necessary).
|
||||
// - Some way to match patterns in a Reader (a bit tricky).
|
||||
// - Some fold transformer that folds text to comparable text, based on the
|
||||
// search options. This is a common technique, though very different from the
|
||||
// collation-based design of this package. It has a somewhat different use
|
||||
// case, so probably makes sense to support both. Should probably be in a
|
||||
// different package, though, as it uses completely different kind of tables
|
||||
// (based on norm, cases, width and range tables.)
|
12448
vendor/golang.org/x/text/search/tables.go
generated
vendored
Normal file
12448
vendor/golang.org/x/text/search/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue