Checking in vendor folder for ease of using go get.
This commit is contained in:
parent
7a1251853b
commit
cdb4b5a1d0
3554 changed files with 1270116 additions and 0 deletions
198
vendor/golang.org/x/text/unicode/bidi/bidi.go
generated
vendored
Normal file
198
vendor/golang.org/x/text/unicode/bidi/bidi.go
generated
vendored
Normal file
|
@ -0,0 +1,198 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go gen_ranges.go
|
||||
|
||||
// Package bidi contains functionality for bidirectional text support.
|
||||
//
|
||||
// See http://www.unicode.org/reports/tr9.
|
||||
//
|
||||
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
|
||||
// and without notice.
|
||||
package bidi // import "golang.org/x/text/unicode/bidi"
|
||||
|
||||
// TODO:
|
||||
// The following functionality would not be hard to implement, but hinges on
|
||||
// the definition of a Segmenter interface. For now this is up to the user.
|
||||
// - Iterate over paragraphs
|
||||
// - Segmenter to iterate over runs directly from a given text.
|
||||
// Also:
|
||||
// - Transformer for reordering?
|
||||
// - Transformer (validator, really) for Bidi Rule.
|
||||
|
||||
// This API tries to avoid dealing with embedding levels for now. Under the hood
|
||||
// these will be computed, but the question is to which extent the user should
|
||||
// know they exist. We should at some point allow the user to specify an
|
||||
// embedding hierarchy, though.
|
||||
|
||||
// A Direction indicates the overall flow of text.
|
||||
type Direction int
|
||||
|
||||
const (
|
||||
// LeftToRight indicates the text contains no right-to-left characters and
|
||||
// that either there are some left-to-right characters or the option
|
||||
// DefaultDirection(LeftToRight) was passed.
|
||||
LeftToRight Direction = iota
|
||||
|
||||
// RightToLeft indicates the text contains no left-to-right characters and
|
||||
// that either there are some right-to-left characters or the option
|
||||
// DefaultDirection(RightToLeft) was passed.
|
||||
RightToLeft
|
||||
|
||||
// Mixed indicates text contains both left-to-right and right-to-left
|
||||
// characters.
|
||||
Mixed
|
||||
|
||||
// Neutral means that text contains no left-to-right and right-to-left
|
||||
// characters and that no default direction has been set.
|
||||
Neutral
|
||||
)
|
||||
|
||||
type options struct{}
|
||||
|
||||
// An Option is an option for Bidi processing.
|
||||
type Option func(*options)
|
||||
|
||||
// ICU allows the user to define embedding levels. This may be used, for example,
|
||||
// to use hierarchical structure of markup languages to define embeddings.
|
||||
// The following option may be a way to expose this functionality in this API.
|
||||
// // LevelFunc sets a function that associates nesting levels with the given text.
|
||||
// // The levels function will be called with monotonically increasing values for p.
|
||||
// func LevelFunc(levels func(p int) int) Option {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// DefaultDirection sets the default direction for a Paragraph. The direction is
|
||||
// overridden if the text contains directional characters.
|
||||
func DefaultDirection(d Direction) Option {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// A Paragraph holds a single Paragraph for Bidi processing.
|
||||
type Paragraph struct {
|
||||
// buffers
|
||||
}
|
||||
|
||||
// SetBytes configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// SetString configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// IsLeftToRight reports whether the principle direction of rendering for this
|
||||
// paragraphs is left-to-right. If this returns false, the principle direction
|
||||
// of rendering is right-to-left.
|
||||
func (p *Paragraph) IsLeftToRight() bool {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Direction returns the direction of the text of this paragraph.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (p *Paragraph) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// RunAt reports the Run at the given position of the input text.
|
||||
//
|
||||
// This method can be used for computing line breaks on paragraphs.
|
||||
func (p *Paragraph) RunAt(pos int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Order computes the visual ordering of all the runs in a Paragraph.
|
||||
func (p *Paragraph) Order() (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Line computes the visual ordering of runs for a single line starting and
|
||||
// ending at the given positions in the original text.
|
||||
func (p *Paragraph) Line(start, end int) (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
|
||||
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
|
||||
// The methods of an Ordering should only be called by one goroutine at a time.
|
||||
type Ordering struct{}
|
||||
|
||||
// Direction reports the directionality of the runs.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (o *Ordering) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// NumRuns returns the number of runs.
|
||||
func (o *Ordering) NumRuns() int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Run returns the ith run within the ordering.
|
||||
func (o *Ordering) Run(i int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: perhaps with options.
|
||||
// // Reorder creates a reader that reads the runes in visual order per character.
|
||||
// // Modifiers remain after the runes they modify.
|
||||
// func (l *Runs) Reorder() io.Reader {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// A Run is a continuous sequence of characters of a single direction.
|
||||
type Run struct {
|
||||
}
|
||||
|
||||
// String returns the text of the run in its original order.
|
||||
func (r *Run) String() string {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Bytes returns the text of the run in its original order.
|
||||
func (r *Run) Bytes() []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: methods for
|
||||
// - Display order
|
||||
// - headers and footers
|
||||
// - bracket replacement.
|
||||
|
||||
// Direction reports the direction of the run.
|
||||
func (r *Run) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Position of the Run within the text passed to SetBytes or SetString of the
|
||||
// originating Paragraph value.
|
||||
func (r *Run) Pos() (start, end int) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// AppendReverse reverses the order of characters of in, appends them to out,
|
||||
// and returns the result. Modifiers will still follow the runes they modify.
|
||||
// Brackets are replaced with their counterparts.
|
||||
func AppendReverse(out, in []byte) []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// ReverseString reverses the order of characters in s and returns a new string.
|
||||
// Modifiers will still follow the runes they modify. Brackets are replaced with
|
||||
// their counterparts.
|
||||
func ReverseString(s string) string {
|
||||
panic("unimplemented")
|
||||
}
|
335
vendor/golang.org/x/text/unicode/bidi/bracket.go
generated
vendored
Normal file
335
vendor/golang.org/x/text/unicode/bidi/bracket.go
generated
vendored
Normal file
|
@ -0,0 +1,335 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// This file contains a port of the reference implementation of the
|
||||
// Bidi Parentheses Algorithm:
|
||||
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
|
||||
//
|
||||
// The implementation in this file covers definitions BD14-BD16 and rule N0
|
||||
// of UAX#9.
|
||||
//
|
||||
// Some preprocessing is done for each rune before data is passed to this
|
||||
// algorithm:
|
||||
// - opening and closing brackets are identified
|
||||
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
|
||||
// is identical for the opening and closing bracket. It is left to do these
|
||||
// mappings.
|
||||
// - The BPA algorithm requires that bracket characters that are canonical
|
||||
// equivalents of each other be able to be substituted for each other.
|
||||
// It is the responsibility of the caller to do this canonicalization.
|
||||
//
|
||||
// In implementing BD16, this implementation departs slightly from the "logical"
|
||||
// algorithm defined in UAX#9. In particular, the stack referenced there
|
||||
// supports operations that go beyond a "basic" stack. An equivalent
|
||||
// implementation based on a linked list is used here.
|
||||
|
||||
// Bidi_Paired_Bracket_Type
|
||||
// BD14. An opening paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Open.
|
||||
//
|
||||
// BD15. A closing paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Close.
|
||||
type bracketType byte
|
||||
|
||||
const (
|
||||
bpNone bracketType = iota
|
||||
bpOpen
|
||||
bpClose
|
||||
)
|
||||
|
||||
// bracketPair holds a pair of index values for opening and closing bracket
|
||||
// location of a bracket pair.
|
||||
type bracketPair struct {
|
||||
opener int
|
||||
closer int
|
||||
}
|
||||
|
||||
func (b *bracketPair) String() string {
|
||||
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
|
||||
}
|
||||
|
||||
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
|
||||
type bracketPairs []bracketPair
|
||||
|
||||
func (b bracketPairs) Len() int { return len(b) }
|
||||
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
|
||||
|
||||
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
|
||||
//
|
||||
// For each rune, it takes the indexes into the original string, the class the
|
||||
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
|
||||
// takes the direction type for the start-of-sentence and the embedding level.
|
||||
//
|
||||
// The identifiers for bracket types are the rune of the canonicalized opening
|
||||
// bracket for brackets (open or close) or 0 for runes that are not brackets.
|
||||
func resolvePairedBrackets(s *isolatingRunSequence) {
|
||||
p := bracketPairer{
|
||||
sos: s.sos,
|
||||
openers: list.New(),
|
||||
codesIsolatedRun: s.types,
|
||||
indexes: s.indexes,
|
||||
}
|
||||
dirEmbed := L
|
||||
if s.level&1 != 0 {
|
||||
dirEmbed = R
|
||||
}
|
||||
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
|
||||
p.resolveBrackets(dirEmbed, s.p.initialTypes)
|
||||
}
|
||||
|
||||
type bracketPairer struct {
|
||||
sos Class // direction corresponding to start of sequence
|
||||
|
||||
// The following is a restatement of BD 16 using non-algorithmic language.
|
||||
//
|
||||
// A bracket pair is a pair of characters consisting of an opening
|
||||
// paired bracket and a closing paired bracket such that the
|
||||
// Bidi_Paired_Bracket property value of the former equals the latter,
|
||||
// subject to the following constraints.
|
||||
// - both characters of a pair occur in the same isolating run sequence
|
||||
// - the closing character of a pair follows the opening character
|
||||
// - any bracket character can belong at most to one pair, the earliest possible one
|
||||
// - any bracket character not part of a pair is treated like an ordinary character
|
||||
// - pairs may nest properly, but their spans may not overlap otherwise
|
||||
|
||||
// Bracket characters with canonical decompositions are supposed to be
|
||||
// treated as if they had been normalized, to allow normalized and non-
|
||||
// normalized text to give the same result. In this implementation that step
|
||||
// is pushed out to the caller. The caller has to ensure that the pairValue
|
||||
// slices contain the rune of the opening bracket after normalization for
|
||||
// any opening or closing bracket.
|
||||
|
||||
openers *list.List // list of positions for opening brackets
|
||||
|
||||
// bracket pair positions sorted by location of opening bracket
|
||||
pairPositions bracketPairs
|
||||
|
||||
codesIsolatedRun []Class // directional bidi codes for an isolated run
|
||||
indexes []int // array of index values into the original string
|
||||
|
||||
}
|
||||
|
||||
// matchOpener reports whether characters at given positions form a matching
|
||||
// bracket pair.
|
||||
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
|
||||
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
|
||||
}
|
||||
|
||||
const maxPairingDepth = 63
|
||||
|
||||
// locateBrackets locates matching bracket pairs according to BD16.
|
||||
//
|
||||
// This implementation uses a linked list instead of a stack, because, while
|
||||
// elements are added at the front (like a push) they are not generally removed
|
||||
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
|
||||
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
|
||||
// traverse the run
|
||||
// do that explicitly (not in a for-each) so we can record position
|
||||
for i, index := range p.indexes {
|
||||
|
||||
// look at the bracket type for each character
|
||||
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
|
||||
// continue scanning
|
||||
continue
|
||||
}
|
||||
switch pairTypes[index] {
|
||||
case bpOpen:
|
||||
// check if maximum pairing depth reached
|
||||
if p.openers.Len() == maxPairingDepth {
|
||||
p.openers.Init()
|
||||
return
|
||||
}
|
||||
// remember opener location, most recent first
|
||||
p.openers.PushFront(i)
|
||||
|
||||
case bpClose:
|
||||
// see if there is a match
|
||||
count := 0
|
||||
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
|
||||
count++
|
||||
opener := elem.Value.(int)
|
||||
if p.matchOpener(pairValues, opener, i) {
|
||||
// if the opener matches, add nested pair to the ordered list
|
||||
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
|
||||
// remove up to and including matched opener
|
||||
for ; count > 0; count-- {
|
||||
p.openers.Remove(p.openers.Front())
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
sort.Sort(p.pairPositions)
|
||||
// if we get here, the closing bracket matched no openers
|
||||
// and gets ignored
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bracket pairs within an isolating run sequence are processed as units so
|
||||
// that both the opening and the closing paired bracket in a pair resolve to
|
||||
// the same direction.
|
||||
//
|
||||
// N0. Process bracket pairs in an isolating run sequence sequentially in
|
||||
// the logical order of the text positions of the opening paired brackets
|
||||
// using the logic given below. Within this scope, bidirectional types EN
|
||||
// and AN are treated as R.
|
||||
//
|
||||
// Identify the bracket pairs in the current isolating run sequence
|
||||
// according to BD16. For each bracket-pair element in the list of pairs of
|
||||
// text positions:
|
||||
//
|
||||
// a Inspect the bidirectional types of the characters enclosed within the
|
||||
// bracket pair.
|
||||
//
|
||||
// b If any strong type (either L or R) matching the embedding direction is
|
||||
// found, set the type for both brackets in the pair to match the embedding
|
||||
// direction.
|
||||
//
|
||||
// o [ e ] o -> o e e e o
|
||||
//
|
||||
// o [ o e ] -> o e o e e
|
||||
//
|
||||
// o [ NI e ] -> o e NI e e
|
||||
//
|
||||
// c Otherwise, if a strong type (opposite the embedding direction) is
|
||||
// found, test for adjacent strong types as follows: 1 First, check
|
||||
// backwards before the opening paired bracket until the first strong type
|
||||
// (L, R, or sos) is found. If that first preceding strong type is opposite
|
||||
// the embedding direction, then set the type for both brackets in the pair
|
||||
// to that type. 2 Otherwise, set the type for both brackets in the pair to
|
||||
// the embedding direction.
|
||||
//
|
||||
// o [ o ] e -> o o o o e
|
||||
//
|
||||
// o [ o NI ] o -> o o o NI o o
|
||||
//
|
||||
// e [ o ] o -> e e o e o
|
||||
//
|
||||
// e [ o ] e -> e e o e e
|
||||
//
|
||||
// e ( o [ o ] NI ) e -> e e o o o o NI e e
|
||||
//
|
||||
// d Otherwise, do not set the type for the current bracket pair. Note that
|
||||
// if the enclosed text contains no strong types the paired brackets will
|
||||
// both resolve to the same level when resolved individually using rules N1
|
||||
// and N2.
|
||||
//
|
||||
// e ( NI ) o -> e ( NI ) o
|
||||
|
||||
// getStrongTypeN0 maps character's directional code to strong type as required
|
||||
// by rule N0.
|
||||
//
|
||||
// TODO: have separate type for "strong" directionality.
|
||||
func (p *bracketPairer) getStrongTypeN0(index int) Class {
|
||||
switch p.codesIsolatedRun[index] {
|
||||
// in the scope of N0, number types are treated as R
|
||||
case EN, AN, AL, R:
|
||||
return R
|
||||
case L:
|
||||
return L
|
||||
default:
|
||||
return ON
|
||||
}
|
||||
}
|
||||
|
||||
// classifyPairContent reports the strong types contained inside a Bracket Pair,
|
||||
// assuming the given embedding direction.
|
||||
//
|
||||
// It returns ON if no strong type is found. If a single strong type is found,
|
||||
// it returns this this type. Otherwise it returns the embedding direction.
|
||||
//
|
||||
// TODO: use separate type for "strong" directionality.
|
||||
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
|
||||
dirOpposite := ON
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
dir := p.getStrongTypeN0(i)
|
||||
if dir == ON {
|
||||
continue
|
||||
}
|
||||
if dir == dirEmbed {
|
||||
return dir // type matching embedding direction found
|
||||
}
|
||||
dirOpposite = dir
|
||||
}
|
||||
// return ON if no strong type found, or class opposite to dirEmbed
|
||||
return dirOpposite
|
||||
}
|
||||
|
||||
// classBeforePair determines which strong types are present before a Bracket
|
||||
// Pair. Return R or L if strong type found, otherwise ON.
|
||||
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
|
||||
for i := loc.opener - 1; i >= 0; i-- {
|
||||
if dir := p.getStrongTypeN0(i); dir != ON {
|
||||
return dir
|
||||
}
|
||||
}
|
||||
// no strong types found, return sos
|
||||
return p.sos
|
||||
}
|
||||
|
||||
// assignBracketType implements rule N0 for a single bracket pair.
|
||||
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
|
||||
// rule "N0, a", inspect contents of pair
|
||||
dirPair := p.classifyPairContent(loc, dirEmbed)
|
||||
|
||||
// dirPair is now L, R, or N (no strong type found)
|
||||
|
||||
// the following logical tests are performed out of order compared to
|
||||
// the statement of the rules but yield the same results
|
||||
if dirPair == ON {
|
||||
return // case "d" - nothing to do
|
||||
}
|
||||
|
||||
if dirPair != dirEmbed {
|
||||
// case "c": strong type found, opposite - check before (c.1)
|
||||
dirPair = p.classBeforePair(loc)
|
||||
if dirPair == dirEmbed || dirPair == ON {
|
||||
// no strong opposite type found before - use embedding (c.2)
|
||||
dirPair = dirEmbed
|
||||
}
|
||||
}
|
||||
// else: case "b", strong type found matching embedding,
|
||||
// no explicit action needed, as dirPair is already set to embedding
|
||||
// direction
|
||||
|
||||
// set the bracket types to the type found
|
||||
p.setBracketsToType(loc, dirPair, initialTypes)
|
||||
}
|
||||
|
||||
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
|
||||
p.codesIsolatedRun[loc.opener] = dirPair
|
||||
p.codesIsolatedRun[loc.closer] = dirPair
|
||||
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
|
||||
for i := loc.closer + 1; i < len(p.indexes); i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
}
|
||||
|
||||
// resolveBrackets implements rule N0 for a list of pairs.
|
||||
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
|
||||
for _, loc := range p.pairPositions {
|
||||
p.assignBracketType(loc, dirEmbed, initialTypes)
|
||||
}
|
||||
}
|
1058
vendor/golang.org/x/text/unicode/bidi/core.go
generated
vendored
Normal file
1058
vendor/golang.org/x/text/unicode/bidi/core.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
224
vendor/golang.org/x/text/unicode/bidi/core_test.go
generated
vendored
Normal file
224
vendor/golang.org/x/text/unicode/bidi/core_test.go
generated
vendored
Normal file
|
@ -0,0 +1,224 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var testLevels = flag.Bool("levels", false, "enable testing of levels")
|
||||
|
||||
// TestBidiCore performs the tests in BidiTest.txt.
|
||||
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt.
|
||||
func TestBidiCore(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
r := gen.OpenUCDFile("BidiTest.txt")
|
||||
defer r.Close()
|
||||
|
||||
var wantLevels, wantOrder []string
|
||||
p := ucd.New(r, ucd.Part(func(p *ucd.Parser) {
|
||||
s := strings.Split(p.String(0), ":")
|
||||
switch s[0] {
|
||||
case "Levels":
|
||||
wantLevels = strings.Fields(s[1])
|
||||
case "Reorder":
|
||||
wantOrder = strings.Fields(s[1])
|
||||
default:
|
||||
log.Fatalf("Unknown part %q.", s[0])
|
||||
}
|
||||
}))
|
||||
|
||||
for p.Next() {
|
||||
types := []Class{}
|
||||
for _, s := range p.Strings(0) {
|
||||
types = append(types, bidiClass[s])
|
||||
}
|
||||
// We ignore the bracketing part of the algorithm.
|
||||
pairTypes := make([]bracketType, len(types))
|
||||
pairValues := make([]rune, len(types))
|
||||
|
||||
for i := uint(0); i < 3; i++ {
|
||||
if p.Uint(1)&(1<<i) == 0 {
|
||||
continue
|
||||
}
|
||||
lev := level(int(i) - 1)
|
||||
par := newParagraph(types, pairTypes, pairValues, lev)
|
||||
|
||||
if *testLevels {
|
||||
levels := par.getLevels([]int{len(types)})
|
||||
for i, s := range wantLevels {
|
||||
if s == "x" {
|
||||
continue
|
||||
}
|
||||
l, _ := strconv.ParseUint(s, 10, 8)
|
||||
if level(l)&1 != levels[i]&1 {
|
||||
t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
order := par.getReordering([]int{len(types)})
|
||||
gotOrder := filterOrder(types, order)
|
||||
if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want {
|
||||
t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
var removeClasses = map[Class]bool{
|
||||
LRO: true,
|
||||
RLO: true,
|
||||
RLE: true,
|
||||
LRE: true,
|
||||
PDF: true,
|
||||
BN: true,
|
||||
}
|
||||
|
||||
// TestBidiCharacters performs the tests in BidiCharacterTest.txt.
|
||||
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
|
||||
func TestBidiCharacters(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiCharacterTest.txt"), func(p *ucd.Parser) {
|
||||
var (
|
||||
types []Class
|
||||
pairTypes []bracketType
|
||||
pairValues []rune
|
||||
parLevel level
|
||||
|
||||
wantLevel = level(p.Int(2))
|
||||
wantLevels = p.Strings(3)
|
||||
wantVisualOrder = p.Strings(4)
|
||||
)
|
||||
|
||||
switch l := p.Int(1); l {
|
||||
case 0, 1:
|
||||
parLevel = level(l)
|
||||
case 2:
|
||||
parLevel = implicitLevel
|
||||
default:
|
||||
// Spec says to ignore unknown parts.
|
||||
}
|
||||
|
||||
runes := p.Runes(0)
|
||||
|
||||
for _, r := range runes {
|
||||
// Assign the bracket type.
|
||||
if d := norm.NFKD.PropertiesString(string(r)).Decomposition(); d != nil {
|
||||
r = []rune(string(d))[0]
|
||||
}
|
||||
p, _ := LookupRune(r)
|
||||
|
||||
// Assign the class for this rune.
|
||||
types = append(types, p.Class())
|
||||
|
||||
switch {
|
||||
case !p.IsBracket():
|
||||
pairTypes = append(pairTypes, bpNone)
|
||||
pairValues = append(pairValues, 0)
|
||||
case p.IsOpeningBracket():
|
||||
pairTypes = append(pairTypes, bpOpen)
|
||||
pairValues = append(pairValues, r)
|
||||
default:
|
||||
pairTypes = append(pairTypes, bpClose)
|
||||
pairValues = append(pairValues, p.reverseBracket(r))
|
||||
}
|
||||
}
|
||||
par := newParagraph(types, pairTypes, pairValues, parLevel)
|
||||
|
||||
// Test results:
|
||||
if got := par.embeddingLevel; got != wantLevel {
|
||||
t.Errorf("%v:level: got %d; want %d", string(runes), got, wantLevel)
|
||||
}
|
||||
|
||||
if *testLevels {
|
||||
gotLevels := getLevelStrings(types, par.getLevels([]int{len(types)}))
|
||||
if got, want := fmt.Sprint(gotLevels), fmt.Sprint(wantLevels); got != want {
|
||||
t.Errorf("%04X %q:%d: got %v; want %v\nval: %x\npair: %v", runes, string(runes), parLevel, got, want, pairValues, pairTypes)
|
||||
}
|
||||
}
|
||||
|
||||
order := par.getReordering([]int{len(types)})
|
||||
order = filterOrder(types, order)
|
||||
if got, want := fmt.Sprint(order), fmt.Sprint(wantVisualOrder); got != want {
|
||||
t.Errorf("%04X %q:%d: got %v; want %v\ngot order: %s", runes, string(runes), parLevel, got, want, reorder(runes, order))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func getLevelStrings(cl []Class, levels []level) []string {
|
||||
var results []string
|
||||
for i, l := range levels {
|
||||
if !removeClasses[cl[i]] {
|
||||
results = append(results, fmt.Sprint(l))
|
||||
} else {
|
||||
results = append(results, "x")
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func filterOrder(cl []Class, order []int) []int {
|
||||
no := []int{}
|
||||
for _, o := range order {
|
||||
if !removeClasses[cl[o]] {
|
||||
no = append(no, o)
|
||||
}
|
||||
}
|
||||
return no
|
||||
}
|
||||
|
||||
func reorder(r []rune, order []int) string {
|
||||
nr := make([]rune, len(order))
|
||||
for i, o := range order {
|
||||
nr[i] = r[o]
|
||||
}
|
||||
return string(nr)
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // classArabicLetter,
|
||||
"AN": AN, // classArabicNumber,
|
||||
"B": B, // classParagraphSeparator,
|
||||
"BN": BN, // classBoundaryNeutral,
|
||||
"CS": CS, // classCommonSeparator,
|
||||
"EN": EN, // classEuropeanNumber,
|
||||
"ES": ES, // classEuropeanSeparator,
|
||||
"ET": ET, // classEuropeanTerminator,
|
||||
"L": L, // classLeftToRight,
|
||||
"NSM": NSM, // classNonspacingMark,
|
||||
"ON": ON, // classOtherNeutral,
|
||||
"R": R, // classRightToLeft,
|
||||
"S": S, // classSegmentSeparator,
|
||||
"WS": WS, // classWhiteSpace,
|
||||
|
||||
"LRO": LRO, // classLeftToRightOverride,
|
||||
"RLO": RLO, // classRightToLeftOverride,
|
||||
"LRE": LRE, // classLeftToRightEmbedding,
|
||||
"RLE": RLE, // classRightToLeftEmbedding,
|
||||
"PDF": PDF, // classPopDirectionalFormat,
|
||||
"LRI": LRI, // classLeftToRightIsolate,
|
||||
"RLI": RLI, // classRightToLeftIsolate,
|
||||
"FSI": FSI, // classFirstStrongIsolate,
|
||||
"PDI": PDI, // classPopDirectionalIsolate,
|
||||
}
|
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
Normal file
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("out", "tables.go", "output file")
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
|
||||
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
|
||||
|
||||
genTables()
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // ArabicLetter
|
||||
"AN": AN, // ArabicNumber
|
||||
"B": B, // ParagraphSeparator
|
||||
"BN": BN, // BoundaryNeutral
|
||||
"CS": CS, // CommonSeparator
|
||||
"EN": EN, // EuropeanNumber
|
||||
"ES": ES, // EuropeanSeparator
|
||||
"ET": ET, // EuropeanTerminator
|
||||
"L": L, // LeftToRight
|
||||
"NSM": NSM, // NonspacingMark
|
||||
"ON": ON, // OtherNeutral
|
||||
"R": R, // RightToLeft
|
||||
"S": S, // SegmentSeparator
|
||||
"WS": WS, // WhiteSpace
|
||||
|
||||
"FSI": Control,
|
||||
"PDF": Control,
|
||||
"PDI": Control,
|
||||
"LRE": Control,
|
||||
"LRI": Control,
|
||||
"LRO": Control,
|
||||
"RLE": Control,
|
||||
"RLI": Control,
|
||||
"RLO": Control,
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
if numClass > 0x0F {
|
||||
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
|
||||
}
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteVersionedGoFile(*outputFile, "bidi")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
t := triegen.NewTrie("bidi")
|
||||
|
||||
// Build data about bracket mapping. These bits need to be or-ed with
|
||||
// any other bits.
|
||||
orMask := map[rune]uint64{}
|
||||
|
||||
xorMap := map[rune]int{}
|
||||
xorMasks := []rune{0} // First value is no-op.
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
r2 := p.Rune(1)
|
||||
xor := r1 ^ r2
|
||||
if _, ok := xorMap[xor]; !ok {
|
||||
xorMap[xor] = len(xorMasks)
|
||||
xorMasks = append(xorMasks, xor)
|
||||
}
|
||||
entry := uint64(xorMap[xor]) << xorMaskShift
|
||||
switch p.String(2) {
|
||||
case "o":
|
||||
entry |= openMask
|
||||
case "c", "n":
|
||||
default:
|
||||
log.Fatalf("Unknown bracket class %q.", p.String(2))
|
||||
}
|
||||
orMask[r1] = entry
|
||||
})
|
||||
|
||||
w.WriteComment(`
|
||||
xorMasks contains masks to be xor-ed with brackets to get the reverse
|
||||
version.`)
|
||||
w.WriteVar("xorMasks", xorMasks)
|
||||
|
||||
done := map[rune]bool{}
|
||||
|
||||
insert := func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
t.Insert(r, orMask[r]|uint64(c))
|
||||
done[r] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
class, ok := bidiClass[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
|
||||
}
|
||||
insert(r, class)
|
||||
})
|
||||
visitDefaults(insert)
|
||||
|
||||
// TODO: use sparse blocks. This would reduce table size considerably
|
||||
// from the looks of it.
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
// dummy values to make methods in gen_common compile. The real versions
|
||||
// will be generated by this file to tables.go.
|
||||
var (
|
||||
xorMasks []rune
|
||||
)
|
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
Normal file
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
Normal file
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
206
vendor/golang.org/x/text/unicode/bidi/prop.go
generated
vendored
Normal file
206
vendor/golang.org/x/text/unicode/bidi/prop.go
generated
vendored
Normal file
|
@ -0,0 +1,206 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// Properties provides access to BiDi properties of runes.
|
||||
type Properties struct {
|
||||
entry uint8
|
||||
last uint8
|
||||
}
|
||||
|
||||
var trie = newBidiTrie(0)
|
||||
|
||||
// TODO: using this for bidirule reduces the running time by about 5%. Consider
|
||||
// if this is worth exposing or if we can find a way to speed up the Class
|
||||
// method.
|
||||
//
|
||||
// // CompactClass is like Class, but maps all of the BiDi control classes
|
||||
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
|
||||
// func (p Properties) CompactClass() Class {
|
||||
// return Class(p.entry & 0x0F)
|
||||
// }
|
||||
|
||||
// Class returns the Bidi class for p.
|
||||
func (p Properties) Class() Class {
|
||||
c := Class(p.entry & 0x0F)
|
||||
if c == Control {
|
||||
c = controlByteToClass[p.last&0xF]
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// IsBracket reports whether the rune is a bracket.
|
||||
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
|
||||
|
||||
// IsOpeningBracket reports whether the rune is an opening bracket.
|
||||
// IsBracket must return true.
|
||||
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
|
||||
|
||||
// TODO: find a better API and expose.
|
||||
func (p Properties) reverseBracket(r rune) rune {
|
||||
return xorMasks[p.entry>>xorMaskShift] ^ r
|
||||
}
|
||||
|
||||
var controlByteToClass = [16]Class{
|
||||
0xD: LRO, // U+202D LeftToRightOverride,
|
||||
0xE: RLO, // U+202E RightToLeftOverride,
|
||||
0xA: LRE, // U+202A LeftToRightEmbedding,
|
||||
0xB: RLE, // U+202B RightToLeftEmbedding,
|
||||
0xC: PDF, // U+202C PopDirectionalFormat,
|
||||
0x6: LRI, // U+2066 LeftToRightIsolate,
|
||||
0x7: RLI, // U+2067 RightToLeftIsolate,
|
||||
0x8: FSI, // U+2068 FirstStrongIsolate,
|
||||
0x9: PDI, // U+2069 PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// LookupRune returns properties for r.
|
||||
func LookupRune(r rune) (p Properties, size int) {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
return Lookup(buf[:n])
|
||||
}
|
||||
|
||||
// TODO: these lookup methods are based on the generated trie code. The returned
|
||||
// sizes have slightly different semantics from the generated code, in that it
|
||||
// always returns size==1 for an illegal UTF-8 byte (instead of the length
|
||||
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
|
||||
// leave invalid UTF-8 untouched, in which case it has performance benefits to
|
||||
// do so (without changing the semantics). Bidi requires the semantics used here
|
||||
// for the bidirule implementation to be compatible with the Go semantics.
|
||||
// They ultimately should perhaps be adopted by all trie implementations, for
|
||||
// convenience sake.
|
||||
// This unrolled code also boosts performance of the secure/bidirule package by
|
||||
// about 30%.
|
||||
// So, to remove this code:
|
||||
// - add option to trie generator to define return type.
|
||||
// - always return 1 byte size for ill-formed UTF-8 runes.
|
||||
|
||||
// Lookup returns properties for the first rune in s and the width in bytes of
|
||||
// its encoding. The size will be 0 if s does not hold enough bytes to complete
|
||||
// the encoding.
|
||||
func Lookup(s []byte) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
||||
|
||||
// LookupString returns properties for the first rune in s and the width in
|
||||
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
|
||||
// complete the encoding.
|
||||
func LookupString(s string) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
53
vendor/golang.org/x/text/unicode/bidi/ranges_test.go
generated
vendored
Normal file
53
vendor/golang.org/x/text/unicode/bidi/ranges_test.go
generated
vendored
Normal file
|
@ -0,0 +1,53 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
1815
vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
generated
vendored
Normal file
1815
vendor/golang.org/x/text/unicode/bidi/tables10.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
1781
vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
generated
vendored
Normal file
1781
vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
82
vendor/golang.org/x/text/unicode/bidi/tables_test.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/unicode/bidi/tables_test.go
generated
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var labels = []string{
|
||||
AL: "AL",
|
||||
AN: "AN",
|
||||
B: "B",
|
||||
BN: "BN",
|
||||
CS: "CS",
|
||||
EN: "EN",
|
||||
ES: "ES",
|
||||
ET: "ET",
|
||||
L: "L",
|
||||
NSM: "NSM",
|
||||
ON: "ON",
|
||||
R: "R",
|
||||
S: "S",
|
||||
WS: "WS",
|
||||
|
||||
LRO: "LRO",
|
||||
RLO: "RLO",
|
||||
LRE: "LRE",
|
||||
RLE: "RLE",
|
||||
PDF: "PDF",
|
||||
LRI: "LRI",
|
||||
RLI: "RLI",
|
||||
FSI: "FSI",
|
||||
PDI: "PDI",
|
||||
}
|
||||
|
||||
func TestTables(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
want := p.Rune(1)
|
||||
|
||||
e, _ := LookupRune(r1)
|
||||
if got := e.reverseBracket(r1); got != want {
|
||||
t.Errorf("Reverse(%U) = %U; want %U", r1, got, want)
|
||||
}
|
||||
})
|
||||
|
||||
done := map[rune]bool{}
|
||||
test := func(name string, r rune, want string) {
|
||||
str := string(r)
|
||||
e, _ := LookupString(str)
|
||||
if got := labels[e.Class()]; got != want {
|
||||
t.Errorf("%s:%U: got %s; want %s", name, r, got, want)
|
||||
}
|
||||
if e2, sz := LookupRune(r); e != e2 || sz != len(str) {
|
||||
t.Errorf("LookupRune(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
|
||||
}
|
||||
if e2, sz := Lookup([]byte(str)); e != e2 || sz != len(str) {
|
||||
t.Errorf("Lookup(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
|
||||
}
|
||||
done[r] = true
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
test("derived", r, p.String(1))
|
||||
})
|
||||
visitDefaults(func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
test("default", r, labels[c])
|
||||
}
|
||||
})
|
||||
|
||||
}
|
60
vendor/golang.org/x/text/unicode/bidi/trieval.go
generated
vendored
Normal file
60
vendor/golang.org/x/text/unicode/bidi/trieval.go
generated
vendored
Normal file
|
@ -0,0 +1,60 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue