spring Tokenizer 源码

  • 2022-08-08
  • 浏览 (275)

spring Tokenizer 代码

文件路径:/spring-expression/src/main/java/org/springframework/expression/spel/standard/Tokenizer.java

/*
 * Copyright 2002-2021 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.springframework.expression.spel.standard;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.springframework.expression.spel.InternalParseException;
import org.springframework.expression.spel.SpelMessage;
import org.springframework.expression.spel.SpelParseException;

/**
 * Lex some input data into a stream of tokens that can then be parsed.
 *
 * @author Andy Clement
 * @author Juergen Hoeller
 * @author Phillip Webb
 * @since 3.0
 */
class Tokenizer {

	// If this gets changed, it must remain sorted...
	private static final String[] ALTERNATIVE_OPERATOR_NAMES =
			{"DIV", "EQ", "GE", "GT", "LE", "LT", "MOD", "NE", "NOT"};

	private static final byte[] FLAGS = new byte[256];

	private static final byte IS_DIGIT = 0x01;

	private static final byte IS_HEXDIGIT = 0x02;

	private static final byte IS_ALPHA = 0x04;

	static {
		for (int ch = '0'; ch <= '9'; ch++) {
			FLAGS[ch] |= IS_DIGIT | IS_HEXDIGIT;
		}
		for (int ch = 'A'; ch <= 'F'; ch++) {
			FLAGS[ch] |= IS_HEXDIGIT;
		}
		for (int ch = 'a'; ch <= 'f'; ch++) {
			FLAGS[ch] |= IS_HEXDIGIT;
		}
		for (int ch = 'A'; ch <= 'Z'; ch++) {
			FLAGS[ch] |= IS_ALPHA;
		}
		for (int ch = 'a'; ch <= 'z'; ch++) {
			FLAGS[ch] |= IS_ALPHA;
		}
	}


	private final String expressionString;

	private final char[] charsToProcess;

	private int pos;

	private final int max;

	private final List<Token> tokens = new ArrayList<>();


	public Tokenizer(String inputData) {
		this.expressionString = inputData;
		this.charsToProcess = (inputData + "\0").toCharArray();
		this.max = this.charsToProcess.length;
		this.pos = 0;
	}


	public List<Token> process() {
		while (this.pos < this.max) {
			char ch = this.charsToProcess[this.pos];
			if (isAlphabetic(ch)) {
				lexIdentifier();
			}
			else {
				switch (ch) {
					case '+':
						if (isTwoCharToken(TokenKind.INC)) {
							pushPairToken(TokenKind.INC);
						}
						else {
							pushCharToken(TokenKind.PLUS);
						}
						break;
					case '_': // the other way to start an identifier
						lexIdentifier();
						break;
					case '-':
						if (isTwoCharToken(TokenKind.DEC)) {
							pushPairToken(TokenKind.DEC);
						}
						else {
							pushCharToken(TokenKind.MINUS);
						}
						break;
					case ':':
						pushCharToken(TokenKind.COLON);
						break;
					case '.':
						pushCharToken(TokenKind.DOT);
						break;
					case ',':
						pushCharToken(TokenKind.COMMA);
						break;
					case '*':
						pushCharToken(TokenKind.STAR);
						break;
					case '/':
						pushCharToken(TokenKind.DIV);
						break;
					case '%':
						pushCharToken(TokenKind.MOD);
						break;
					case '(':
						pushCharToken(TokenKind.LPAREN);
						break;
					case ')':
						pushCharToken(TokenKind.RPAREN);
						break;
					case '[':
						pushCharToken(TokenKind.LSQUARE);
						break;
					case '#':
						pushCharToken(TokenKind.HASH);
						break;
					case ']':
						pushCharToken(TokenKind.RSQUARE);
						break;
					case '{':
						pushCharToken(TokenKind.LCURLY);
						break;
					case '}':
						pushCharToken(TokenKind.RCURLY);
						break;
					case '@':
						pushCharToken(TokenKind.BEAN_REF);
						break;
					case '^':
						if (isTwoCharToken(TokenKind.SELECT_FIRST)) {
							pushPairToken(TokenKind.SELECT_FIRST);
						}
						else {
							pushCharToken(TokenKind.POWER);
						}
						break;
					case '!':
						if (isTwoCharToken(TokenKind.NE)) {
							pushPairToken(TokenKind.NE);
						}
						else if (isTwoCharToken(TokenKind.PROJECT)) {
							pushPairToken(TokenKind.PROJECT);
						}
						else {
							pushCharToken(TokenKind.NOT);
						}
						break;
					case '=':
						if (isTwoCharToken(TokenKind.EQ)) {
							pushPairToken(TokenKind.EQ);
						}
						else {
							pushCharToken(TokenKind.ASSIGN);
						}
						break;
					case '&':
						if (isTwoCharToken(TokenKind.SYMBOLIC_AND)) {
							pushPairToken(TokenKind.SYMBOLIC_AND);
						}
						else {
							pushCharToken(TokenKind.FACTORY_BEAN_REF);
						}
						break;
					case '|':
						if (!isTwoCharToken(TokenKind.SYMBOLIC_OR)) {
							raiseParseException(this.pos, SpelMessage.MISSING_CHARACTER, "|");
						}
						pushPairToken(TokenKind.SYMBOLIC_OR);
						break;
					case '?':
						if (isTwoCharToken(TokenKind.SELECT)) {
							pushPairToken(TokenKind.SELECT);
						}
						else if (isTwoCharToken(TokenKind.ELVIS)) {
							pushPairToken(TokenKind.ELVIS);
						}
						else if (isTwoCharToken(TokenKind.SAFE_NAVI)) {
							pushPairToken(TokenKind.SAFE_NAVI);
						}
						else {
							pushCharToken(TokenKind.QMARK);
						}
						break;
					case '$':
						if (isTwoCharToken(TokenKind.SELECT_LAST)) {
							pushPairToken(TokenKind.SELECT_LAST);
						}
						else {
							lexIdentifier();
						}
						break;
					case '>':
						if (isTwoCharToken(TokenKind.GE)) {
							pushPairToken(TokenKind.GE);
						}
						else {
							pushCharToken(TokenKind.GT);
						}
						break;
					case '<':
						if (isTwoCharToken(TokenKind.LE)) {
							pushPairToken(TokenKind.LE);
						}
						else {
							pushCharToken(TokenKind.LT);
						}
						break;
					case '0':
					case '1':
					case '2':
					case '3':
					case '4':
					case '5':
					case '6':
					case '7':
					case '8':
					case '9':
						lexNumericLiteral(ch == '0');
						break;
					case ' ':
					case '\t':
					case '\r':
					case '\n':
						// drift over white space
						this.pos++;
						break;
					case '\'':
						lexQuotedStringLiteral();
						break;
					case '"':
						lexDoubleQuotedStringLiteral();
						break;
					case 0:
						// hit sentinel at end of value
						this.pos++;  // will take us to the end
						break;
					case '\\':
						raiseParseException(this.pos, SpelMessage.UNEXPECTED_ESCAPE_CHAR);
						break;
					default:
						throw new IllegalStateException("Cannot handle (" + (int) ch + ") '" + ch + "'");
				}
			}
		}
		return this.tokens;
	}


	// STRING_LITERAL: '\''! (APOS|~'\'')* '\''!;
	private void lexQuotedStringLiteral() {
		int start = this.pos;
		boolean terminated = false;
		while (!terminated) {
			this.pos++;
			char ch = this.charsToProcess[this.pos];
			if (ch == '\'') {
				// may not be the end if the char after is also a '
				if (this.charsToProcess[this.pos + 1] == '\'') {
					this.pos++;  // skip over that too, and continue
				}
				else {
					terminated = true;
				}
			}
			if (isExhausted()) {
				raiseParseException(start, SpelMessage.NON_TERMINATING_QUOTED_STRING);
			}
		}
		this.pos++;
		this.tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start, this.pos), start, this.pos));
	}

	// DQ_STRING_LITERAL: '"'! (~'"')* '"'!;
	private void lexDoubleQuotedStringLiteral() {
		int start = this.pos;
		boolean terminated = false;
		while (!terminated) {
			this.pos++;
			char ch = this.charsToProcess[this.pos];
			if (ch == '"') {
				// may not be the end if the char after is also a "
				if (this.charsToProcess[this.pos + 1] == '"') {
					this.pos++;  // skip over that too, and continue
				}
				else {
					terminated = true;
				}
			}
			if (isExhausted()) {
				raiseParseException(start, SpelMessage.NON_TERMINATING_DOUBLE_QUOTED_STRING);
			}
		}
		this.pos++;
		this.tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start, this.pos), start, this.pos));
	}

	// REAL_LITERAL :
	// ('.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
	// ((DECIMAL_DIGIT)+ '.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
	// ((DECIMAL_DIGIT)+ (EXPONENT_PART) (REAL_TYPE_SUFFIX)?) |
	// ((DECIMAL_DIGIT)+ (REAL_TYPE_SUFFIX));
	// fragment INTEGER_TYPE_SUFFIX : ( 'L' | 'l' );
	// fragment HEX_DIGIT :
	// '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'|'A'|'B'|'C'|'D'|'E'|'F'|'a'|'b'|'c'|'d'|'e'|'f';
	//
	// fragment EXPONENT_PART : 'e' (SIGN)* (DECIMAL_DIGIT)+ | 'E' (SIGN)*
	// (DECIMAL_DIGIT)+ ;
	// fragment SIGN : '+' | '-' ;
	// fragment REAL_TYPE_SUFFIX : 'F' | 'f' | 'D' | 'd';
	// INTEGER_LITERAL
	// : (DECIMAL_DIGIT)+ (INTEGER_TYPE_SUFFIX)?;

	private void lexNumericLiteral(boolean firstCharIsZero) {
		boolean isReal = false;
		int start = this.pos;
		char ch = this.charsToProcess[this.pos + 1];
		boolean isHex = ch == 'x' || ch == 'X';

		// deal with hexadecimal
		if (firstCharIsZero && isHex) {
			this.pos = this.pos + 1;
			do {
				this.pos++;
			}
			while (isHexadecimalDigit(this.charsToProcess[this.pos]));
			if (isChar('L', 'l')) {
				pushHexIntToken(subarray(start + 2, this.pos), true, start, this.pos);
				this.pos++;
			}
			else {
				pushHexIntToken(subarray(start + 2, this.pos), false, start, this.pos);
			}
			return;
		}

		// real numbers must have leading digits

		// Consume first part of number
		do {
			this.pos++;
		}
		while (isDigit(this.charsToProcess[this.pos]));

		// a '.' indicates this number is a real
		ch = this.charsToProcess[this.pos];
		if (ch == '.') {
			isReal = true;
			int dotpos = this.pos;
			// carry on consuming digits
			do {
				this.pos++;
			}
			while (isDigit(this.charsToProcess[this.pos]));
			if (this.pos == dotpos + 1) {
				// the number is something like '3.'. It is really an int but may be
				// part of something like '3.toString()'. In this case process it as
				// an int and leave the dot as a separate token.
				this.pos = dotpos;
				pushIntToken(subarray(start, this.pos), false, start, this.pos);
				return;
			}
		}

		int endOfNumber = this.pos;

		// Now there may or may not be an exponent

		// Is it a long ?
		if (isChar('L', 'l')) {
			if (isReal) {  // 3.4L - not allowed
				raiseParseException(start, SpelMessage.REAL_CANNOT_BE_LONG);
			}
			pushIntToken(subarray(start, endOfNumber), true, start, endOfNumber);
			this.pos++;
		}
		else if (isExponentChar(this.charsToProcess[this.pos])) {
			isReal = true;  // if it wasn't before, it is now
			this.pos++;
			char possibleSign = this.charsToProcess[this.pos];
			if (isSign(possibleSign)) {
				this.pos++;
			}

			// exponent digits
			do {
				this.pos++;
			}
			while (isDigit(this.charsToProcess[this.pos]));
			boolean isFloat = false;
			if (isFloatSuffix(this.charsToProcess[this.pos])) {
				isFloat = true;
				endOfNumber = ++this.pos;
			}
			else if (isDoubleSuffix(this.charsToProcess[this.pos])) {
				endOfNumber = ++this.pos;
			}
			pushRealToken(subarray(start, this.pos), isFloat, start, this.pos);
		}
		else {
			ch = this.charsToProcess[this.pos];
			boolean isFloat = false;
			if (isFloatSuffix(ch)) {
				isReal = true;
				isFloat = true;
				endOfNumber = ++this.pos;
			}
			else if (isDoubleSuffix(ch)) {
				isReal = true;
				endOfNumber = ++this.pos;
			}
			if (isReal) {
				pushRealToken(subarray(start, endOfNumber), isFloat, start, endOfNumber);
			}
			else {
				pushIntToken(subarray(start, endOfNumber), false, start, endOfNumber);
			}
		}
	}

	private void lexIdentifier() {
		int start = this.pos;
		do {
			this.pos++;
		}
		while (isIdentifier(this.charsToProcess[this.pos]));
		char[] subarray = subarray(start, this.pos);

		// Check if this is the alternative (textual) representation of an operator (see
		// alternativeOperatorNames)
		if ((this.pos - start) == 2 || (this.pos - start) == 3) {
			String asString = new String(subarray).toUpperCase();
			int idx = Arrays.binarySearch(ALTERNATIVE_OPERATOR_NAMES, asString);
			if (idx >= 0) {
				pushOneCharOrTwoCharToken(TokenKind.valueOf(asString), start, subarray);
				return;
			}
		}
		this.tokens.add(new Token(TokenKind.IDENTIFIER, subarray, start, this.pos));
	}

	private void pushIntToken(char[] data, boolean isLong, int start, int end) {
		if (isLong) {
			this.tokens.add(new Token(TokenKind.LITERAL_LONG, data, start, end));
		}
		else {
			this.tokens.add(new Token(TokenKind.LITERAL_INT, data, start, end));
		}
	}

	private void pushHexIntToken(char[] data, boolean isLong, int start, int end) {
		if (data.length == 0) {
			if (isLong) {
				raiseParseException(start, SpelMessage.NOT_A_LONG, this.expressionString.substring(start, end + 1));
			}
			else {
				raiseParseException(start, SpelMessage.NOT_AN_INTEGER, this.expressionString.substring(start, end));
			}
		}
		if (isLong) {
			this.tokens.add(new Token(TokenKind.LITERAL_HEXLONG, data, start, end));
		}
		else {
			this.tokens.add(new Token(TokenKind.LITERAL_HEXINT, data, start, end));
		}
	}

	private void pushRealToken(char[] data, boolean isFloat, int start, int end) {
		if (isFloat) {
			this.tokens.add(new Token(TokenKind.LITERAL_REAL_FLOAT, data, start, end));
		}
		else {
			this.tokens.add(new Token(TokenKind.LITERAL_REAL, data, start, end));
		}
	}

	private char[] subarray(int start, int end) {
		return Arrays.copyOfRange(this.charsToProcess, start, end);
	}

	/**
	 * Check if this might be a two character token.
	 */
	private boolean isTwoCharToken(TokenKind kind) {
		return (kind.tokenChars.length == 2 &&
				this.charsToProcess[this.pos] == kind.tokenChars[0] &&
				this.charsToProcess[this.pos + 1] == kind.tokenChars[1]);
	}

	/**
	 * Push a token of just one character in length.
	 */
	private void pushCharToken(TokenKind kind) {
		this.tokens.add(new Token(kind, this.pos, this.pos + 1));
		this.pos++;
	}

	/**
	 * Push a token of two characters in length.
	 */
	private void pushPairToken(TokenKind kind) {
		this.tokens.add(new Token(kind, this.pos, this.pos + 2));
		this.pos += 2;
	}

	private void pushOneCharOrTwoCharToken(TokenKind kind, int pos, char[] data) {
		this.tokens.add(new Token(kind, data, pos, pos + kind.getLength()));
	}

	// ID: ('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'$'|'0'..'9'|DOT_ESCAPED)*;
	private boolean isIdentifier(char ch) {
		return isAlphabetic(ch) || isDigit(ch) || ch == '_' || ch == '$';
	}

	private boolean isChar(char a, char b) {
		char ch = this.charsToProcess[this.pos];
		return ch == a || ch == b;
	}

	private boolean isExponentChar(char ch) {
		return ch == 'e' || ch == 'E';
	}

	private boolean isFloatSuffix(char ch) {
		return ch == 'f' || ch == 'F';
	}

	private boolean isDoubleSuffix(char ch) {
		return ch == 'd' || ch == 'D';
	}

	private boolean isSign(char ch) {
		return ch == '+' || ch == '-';
	}

	private boolean isDigit(char ch) {
		if (ch > 255) {
			return false;
		}
		return (FLAGS[ch] & IS_DIGIT) != 0;
	}

	private boolean isAlphabetic(char ch) {
		if (ch > 255) {
			return false;
		}
		return (FLAGS[ch] & IS_ALPHA) != 0;
	}

	private boolean isHexadecimalDigit(char ch) {
		if (ch > 255) {
			return false;
		}
		return (FLAGS[ch] & IS_HEXDIGIT) != 0;
	}

	private boolean isExhausted() {
		return (this.pos == this.max - 1);
	}

	private void raiseParseException(int start, SpelMessage msg, Object... inserts) {
		throw new InternalParseException(new SpelParseException(this.expressionString, start, msg, inserts));
	}

}

相关信息

spring 源码目录

相关文章

spring InternalSpelExpressionParser 源码

spring SpelCompiler 源码

spring SpelExpression 源码

spring SpelExpressionParser 源码

spring Token 源码

spring TokenKind 源码

spring package-info 源码

0  赞