go prop 源码

  • 2022-07-15
  • 浏览 (1063)

golang prop 代码

文件路径:/src/vendor/golang.org/x/text/unicode/bidi/prop.go

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package bidi

import "unicode/utf8"

// Properties provides access to BiDi properties of runes.
type Properties struct {
	entry uint8
	last  uint8
}

var trie = newBidiTrie(0)

// TODO: using this for bidirule reduces the running time by about 5%. Consider
// if this is worth exposing or if we can find a way to speed up the Class
// method.
//
// // CompactClass is like Class, but maps all of the BiDi control classes
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
// func (p Properties) CompactClass() Class {
// 	return Class(p.entry & 0x0F)
// }

// Class returns the Bidi class for p.
func (p Properties) Class() Class {
	c := Class(p.entry & 0x0F)
	if c == Control {
		c = controlByteToClass[p.last&0xF]
	}
	return c
}

// IsBracket reports whether the rune is a bracket.
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }

// IsOpeningBracket reports whether the rune is an opening bracket.
// IsBracket must return true.
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }

// TODO: find a better API and expose.
func (p Properties) reverseBracket(r rune) rune {
	return xorMasks[p.entry>>xorMaskShift] ^ r
}

var controlByteToClass = [16]Class{
	0xD: LRO, // U+202D LeftToRightOverride,
	0xE: RLO, // U+202E RightToLeftOverride,
	0xA: LRE, // U+202A LeftToRightEmbedding,
	0xB: RLE, // U+202B RightToLeftEmbedding,
	0xC: PDF, // U+202C PopDirectionalFormat,
	0x6: LRI, // U+2066 LeftToRightIsolate,
	0x7: RLI, // U+2067 RightToLeftIsolate,
	0x8: FSI, // U+2068 FirstStrongIsolate,
	0x9: PDI, // U+2069 PopDirectionalIsolate,
}

// LookupRune returns properties for r.
func LookupRune(r rune) (p Properties, size int) {
	var buf [4]byte
	n := utf8.EncodeRune(buf[:], r)
	return Lookup(buf[:n])
}

// TODO: these lookup methods are based on the generated trie code. The returned
// sizes have slightly different semantics from the generated code, in that it
// always returns size==1 for an illegal UTF-8 byte (instead of the length
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
// leave invalid UTF-8 untouched, in which case it has performance benefits to
// do so (without changing the semantics). Bidi requires the semantics used here
// for the bidirule implementation to be compatible with the Go semantics.
//  They ultimately should perhaps be adopted by all trie implementations, for
// convenience sake.
// This unrolled code also boosts performance of the secure/bidirule package by
// about 30%.
// So, to remove this code:
//   - add option to trie generator to define return type.
//   - always return 1 byte size for ill-formed UTF-8 runes.

// Lookup returns properties for the first rune in s and the width in bytes of
// its encoding. The size will be 0 if s does not hold enough bytes to complete
// the encoding.
func Lookup(s []byte) (p Properties, sz int) {
	c0 := s[0]
	switch {
	case c0 < 0x80: // is ASCII
		return Properties{entry: bidiValues[c0]}, 1
	case c0 < 0xC2:
		return Properties{}, 1
	case c0 < 0xE0: // 2-byte UTF-8
		if len(s) < 2 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
	case c0 < 0xF0: // 3-byte UTF-8
		if len(s) < 3 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		o := uint32(i)<<6 + uint32(c1)
		i = bidiIndex[o]
		c2 := s[2]
		if c2 < 0x80 || 0xC0 <= c2 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
	case c0 < 0xF8: // 4-byte UTF-8
		if len(s) < 4 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		o := uint32(i)<<6 + uint32(c1)
		i = bidiIndex[o]
		c2 := s[2]
		if c2 < 0x80 || 0xC0 <= c2 {
			return Properties{}, 1
		}
		o = uint32(i)<<6 + uint32(c2)
		i = bidiIndex[o]
		c3 := s[3]
		if c3 < 0x80 || 0xC0 <= c3 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
	}
	// Illegal rune
	return Properties{}, 1
}

// LookupString returns properties for the first rune in s and the width in
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
// complete the encoding.
func LookupString(s string) (p Properties, sz int) {
	c0 := s[0]
	switch {
	case c0 < 0x80: // is ASCII
		return Properties{entry: bidiValues[c0]}, 1
	case c0 < 0xC2:
		return Properties{}, 1
	case c0 < 0xE0: // 2-byte UTF-8
		if len(s) < 2 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
	case c0 < 0xF0: // 3-byte UTF-8
		if len(s) < 3 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		o := uint32(i)<<6 + uint32(c1)
		i = bidiIndex[o]
		c2 := s[2]
		if c2 < 0x80 || 0xC0 <= c2 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
	case c0 < 0xF8: // 4-byte UTF-8
		if len(s) < 4 {
			return Properties{}, 0
		}
		i := bidiIndex[c0]
		c1 := s[1]
		if c1 < 0x80 || 0xC0 <= c1 {
			return Properties{}, 1
		}
		o := uint32(i)<<6 + uint32(c1)
		i = bidiIndex[o]
		c2 := s[2]
		if c2 < 0x80 || 0xC0 <= c2 {
			return Properties{}, 1
		}
		o = uint32(i)<<6 + uint32(c2)
		i = bidiIndex[o]
		c3 := s[3]
		if c3 < 0x80 || 0xC0 <= c3 {
			return Properties{}, 1
		}
		return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
	}
	// Illegal rune
	return Properties{}, 1
}

相关信息

go 源码目录

相关文章

go bidi 源码

go bracket 源码

go core 源码

go tables10.0.0 源码

go tables11.0.0 源码

go tables12.0.0 源码

go tables13.0.0 源码

go tables9.0.0 源码

go trieval 源码

0  赞