From c637ee5ca277c5347690d4ba94ce919a969aaebd Mon Sep 17 00:00:00 2001 From: Kenny F Date: Sat, 27 Jul 2019 21:06:25 +0800 Subject: [PATCH] fix(lexer): improved scanner performance --- src/common.ts | 2 +- src/errors.ts | 16 ++- src/lexer/comments.ts | 22 +-- src/lexer/common.ts | 10 +- src/lexer/identifier.ts | 41 +++--- src/lexer/index.ts | 4 +- src/lexer/jsx.ts | 26 ++-- src/lexer/numeric.ts | 85 ++++++----- src/lexer/regexp.ts | 10 +- src/lexer/scan.ts | 302 ++++++++++++++++++++-------------------- src/lexer/string.ts | 34 ++--- src/lexer/template.ts | 16 +-- src/parser.ts | 34 +++-- 13 files changed, 326 insertions(+), 276 deletions(-) diff --git a/src/common.ts b/src/common.ts index d852dc1d..4a3f9868 100644 --- a/src/common.ts +++ b/src/common.ts @@ -202,7 +202,7 @@ export interface ParserState { sourceFile: string | void; assignable: AssignmentKind | DestructuringKind; destructible: AssignmentKind | DestructuringKind; - nextCP: number; + currentChar: number; exportedNames: any; exportedBindings: any; } diff --git a/src/errors.ts b/src/errors.ts index 5368365e..4fa92627 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -385,6 +385,7 @@ export function report(parser: ParserState, type: Errors, ...params: string[]): export function reportScopeError(scope: any): never { throw new ParseError(scope.index, scope.line, scope.column, scope.type); } + /** * Throws an error at a given position * @@ -395,8 +396,21 @@ export function reportScopeError(scope: any): never { * @param {number} column * @param {Errors} type * @param {...string[]} params - * @returns {never} */ export function reportMessageAt(index: number, line: number, column: number, type: Errors, ...params: string[]): never { throw new ParseError(index, line, column, type, ...params); } + +/** + * Throws an error at a given position + * + * @export + * @param {ParserState} state + * @param {number} index + * @param {number} line + * @param {number} column + * @param {Errors} type + */ +export function reportScannerError(index: number, line: number, column: number, type: Errors): never { + throw new ParseError(index, line, column, type); +} diff --git a/src/lexer/comments.ts b/src/lexer/comments.ts index fb8fbe01..103dfa93 100644 --- a/src/lexer/comments.ts +++ b/src/lexer/comments.ts @@ -1,4 +1,4 @@ -import { nextCP, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './'; +import { advanceChar, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './'; import { Chars } from '../chars'; import { Context, ParserState } from '../common'; import { report, Errors } from '../errors'; @@ -21,7 +21,7 @@ export const CommentTypes = ['SingleLine', 'MultiLine', 'HTMLOpen', 'HTMLClose', export function skipHashBang(parser: ParserState): void { // HashbangComment :: // #! SingleLineCommentChars_opt - if (parser.nextCP === Chars.Hash && parser.source.charCodeAt(parser.index + 1) === Chars.Exclamation) { + if (parser.currentChar === Chars.Hash && parser.source.charCodeAt(parser.index + 1) === Chars.Exclamation) { skipSingleLineComment(parser, LexerState.None, CommentType.HashBang); } } @@ -45,12 +45,12 @@ export function skipSingleHTMLComment( export function skipSingleLineComment(parser: ParserState, state: LexerState, type: CommentType): LexerState { const { index } = parser; while (parser.index < parser.end) { - if (CharTypes[parser.nextCP] & CharFlags.LineTerminator || (parser.nextCP ^ Chars.LineSeparator) <= 1) { + if (CharTypes[parser.currentChar] & CharFlags.LineTerminator || (parser.currentChar ^ Chars.LineSeparator) <= 1) { state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR; scanNewLine(parser); return state; } - nextCP(parser); + advanceChar(parser); } if (parser.onComment) parser.onComment(CommentTypes[type & 0xff], parser.source.slice(index, parser.index), parser, parser.index); @@ -66,9 +66,9 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState, ty export function skipMultiLineComment(parser: ParserState, state: LexerState): LexerState | void { const { index } = parser; while (parser.index < parser.end) { - while (parser.nextCP === Chars.Asterisk) { - if (nextCP(parser) === Chars.Slash) { - nextCP(parser); + while (parser.currentChar === Chars.Asterisk) { + if (advanceChar(parser) === Chars.Slash) { + advanceChar(parser); if (parser.onComment) parser.onComment( CommentTypes[CommentType.Multi & 0xff], @@ -80,17 +80,17 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le } } - if (parser.nextCP === Chars.CarriageReturn) { + if (parser.currentChar === Chars.CarriageReturn) { state |= LexerState.NewLine | LexerState.LastIsCR; scanNewLine(parser); - } else if (parser.nextCP === Chars.LineFeed) { + } else if (parser.currentChar === Chars.LineFeed) { consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0); state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR; - } else if ((parser.nextCP ^ Chars.LineSeparator) <= 1) { + } else if ((parser.currentChar ^ Chars.LineSeparator) <= 1) { state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR; scanNewLine(parser); } else { - nextCP(parser); + advanceChar(parser); } } diff --git a/src/lexer/common.ts b/src/lexer/common.ts index 52e9c57d..f10a03da 100644 --- a/src/lexer/common.ts +++ b/src/lexer/common.ts @@ -26,9 +26,9 @@ export const enum NumberKind { * * @param parser The parser instance */ -export function nextCP(parser: ParserState): number { +export function advanceChar(parser: ParserState): number { parser.column++; - return (parser.nextCP = parser.source.charCodeAt(++parser.index)); + return (parser.currentChar = parser.source.charCodeAt(++parser.index)); } /** @@ -42,7 +42,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): 0 | if ((hi & 0xfc00) !== Chars.LeadSurrogateMin) return 0; const lo = parser.source.charCodeAt(parser.index + 1); if ((lo & 0xfc00) !== 0xdc00) return 0; - hi = parser.nextCP = Chars.NonBMPMin + ((hi & 0x3ff) << 10) + (lo & 0x3ff); + hi = parser.currentChar = Chars.NonBMPMin + ((hi & 0x3ff) << 10) + (lo & 0x3ff); if (((unicodeLookup[(hi >>> 5) + 0] >>> hi) & 31 & 1) === 0) { report(parser, Errors.IllegalCaracter, fromCodePoint(hi)); } @@ -55,7 +55,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): 0 | * Use to consume a line feed instead of `scanNewLine`. */ export function consumeLineFeed(parser: ParserState, lastIsCR: boolean): void { - parser.nextCP = parser.source.charCodeAt(++parser.index); + parser.currentChar = parser.source.charCodeAt(++parser.index); parser.flags |= Flags.NewLine; if (!lastIsCR) { parser.column = 0; @@ -65,7 +65,7 @@ export function consumeLineFeed(parser: ParserState, lastIsCR: boolean): void { export function scanNewLine(parser: ParserState): void { parser.flags |= Flags.NewLine; - parser.nextCP = parser.source.charCodeAt(++parser.index); + parser.currentChar = parser.source.charCodeAt(++parser.index); parser.column = 0; parser.line++; } diff --git a/src/lexer/identifier.ts b/src/lexer/identifier.ts index acd8616c..136535e8 100644 --- a/src/lexer/identifier.ts +++ b/src/lexer/identifier.ts @@ -1,9 +1,9 @@ import { ParserState, Context } from '../common'; import { Token, descKeywordTable } from '../token'; import { Chars } from '../chars'; -import { nextCP, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './'; +import { advanceChar, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './'; import { CharTypes, CharFlags, isIdentifierPart, isIdentifierStart } from './charClassifier'; -import { report, reportMessageAt, Errors } from '../errors'; +import { report, reportScannerError, Errors } from '../errors'; /** * Scans identifier @@ -12,9 +12,9 @@ import { report, reportMessageAt, Errors } from '../errors'; * @param context Context masks */ export function scanIdentifier(parser: ParserState, context: Context, isValidAsKeyword: 0 | 1): Token { - while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {} + while ((CharTypes[advanceChar(parser)] & CharFlags.IdentifierPart) !== 0) {} parser.tokenValue = parser.source.slice(parser.tokenPos, parser.index); - return parser.nextCP !== Chars.Backslash && parser.nextCP < 0x7e + return parser.currentChar !== Chars.Backslash && parser.currentChar < 0x7e ? descKeywordTable[parser.tokenValue] || Token.Identifier : scanIdentifierSlowCase(parser, context, 0, isValidAsKeyword); } @@ -49,7 +49,7 @@ export function scanIdentifierSlowCase( let start = parser.index; while (parser.index < parser.end) { - if (parser.nextCP === Chars.Backslash) { + if (parser.currentChar === Chars.Backslash) { parser.tokenValue += parser.source.slice(start, parser.index); hasEscape = 1; const code = scanIdentifierUnicodeEscape(parser); @@ -57,8 +57,8 @@ export function scanIdentifierSlowCase( isValidAsKeyword = isValidAsKeyword && CharTypes[code] & CharFlags.KeywordCandidate; parser.tokenValue += fromCodePoint(code); start = parser.index; - } else if (isIdentifierPart(parser.nextCP) || consumeMultiUnitCodePoint(parser, parser.nextCP)) { - nextCP(parser); + } else if (isIdentifierPart(parser.currentChar) || consumeMultiUnitCodePoint(parser, parser.currentChar)) { + advanceChar(parser); } else { break; } @@ -94,7 +94,7 @@ export function scanIdentifierSlowCase( * @param parser Parser object */ export function scanPrivateName(parser: ParserState): Token { - if (!isIdentifierStart(nextCP(parser))) report(parser, Errors.MissingPrivateName); + if (!isIdentifierStart(advanceChar(parser))) report(parser, Errors.MissingPrivateName); return Token.PrivateField; } @@ -109,8 +109,8 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): number { if (parser.source.charCodeAt(parser.index + 1) !== Chars.LowerU) { report(parser, Errors.InvalidUnicodeEscapeSequence); } - parser.nextCP = parser.source.charCodeAt((parser.index += 2)); - return scanUnicodeEscapeValue(parser); + parser.currentChar = parser.source.charCodeAt((parser.index += 2)); + return scanUnicodeEscape(parser); } /** @@ -118,22 +118,23 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): number { * * @param parser Parser object */ -export function scanUnicodeEscapeValue(parser: ParserState): number { +export function scanUnicodeEscape(parser: ParserState): number { + // Accept both \uxxxx and \u{xxxxxx} let codePoint = 0; - const char = parser.nextCP; + const char = parser.currentChar; // First handle a delimited Unicode escape, e.g. \u{1F4A9} if (char === Chars.LeftBrace) { - const startPos = parser.index; - while (CharTypes[nextCP(parser)] & CharFlags.Hex) { - codePoint = (codePoint << 4) | toHex(parser.nextCP); - if (codePoint > Chars.NonBMPMax) report(parser, Errors.UnicodeOverflow); + const begin = parser.index - 2; + while (CharTypes[advanceChar(parser)] & CharFlags.Hex) { + codePoint = (codePoint << 4) | toHex(parser.currentChar); + if (codePoint > Chars.NonBMPMax) reportScannerError(begin, parser.line, parser.index + 1, Errors.UnicodeOverflow); } // At least 4 characters have to be read - if (codePoint < 1 || (parser.nextCP as number) !== Chars.RightBrace) { - reportMessageAt(startPos, parser.line, startPos - 1, Errors.InvalidHexEscapeSequence); + if (codePoint < 1 || (parser.currentChar as number) !== Chars.RightBrace) { + reportScannerError(begin, parser.line, parser.index - 1, Errors.InvalidHexEscapeSequence); } - nextCP(parser); // consumes '}' + advanceChar(parser); // consumes '}' return codePoint; } @@ -148,7 +149,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number { codePoint = (toHex(char) << 12) | (toHex(char2) << 8) | (toHex(char3) << 4) | toHex(char4); - parser.nextCP = parser.source.charCodeAt((parser.index += 4)); + parser.currentChar = parser.source.charCodeAt((parser.index += 4)); return codePoint; } diff --git a/src/lexer/index.ts b/src/lexer/index.ts index 6789dafd..b833424f 100644 --- a/src/lexer/index.ts +++ b/src/lexer/index.ts @@ -7,7 +7,7 @@ export { CommentType } from './comments'; export { - nextCP, + advanceChar, consumeMultiUnitCodePoint, isExoticECMAScriptWhitespace, fromCodePoint, @@ -23,7 +23,7 @@ export { scanIdentifierSlowCase, scanUnicodeIdentifier, scanPrivateName, - scanUnicodeEscapeValue + scanUnicodeEscape } from './identifier'; export { scanString } from './string'; export { scanNumber } from './numeric'; diff --git a/src/lexer/jsx.ts b/src/lexer/jsx.ts index f99fabed..8c1beec4 100644 --- a/src/lexer/jsx.ts +++ b/src/lexer/jsx.ts @@ -3,7 +3,7 @@ import { Chars } from '../chars'; import { Token } from '../token'; import { ParserState, Context } from '../common'; import { report, Errors } from '../errors'; -import { nextCP, LexerState, TokenLookup } from './'; +import { advanceChar, LexerState, TokenLookup } from './'; import { scanSingleToken } from './scan'; /** @@ -17,7 +17,7 @@ export function scanJSXAttributeValue(parser: ParserState, context: Context): To parser.startColumn = parser.column; parser.startLine = parser.line; parser.token = - CharTypes[parser.nextCP] & CharFlags.StringLiteral + CharTypes[parser.currentChar] & CharFlags.StringLiteral ? scanJSXString(parser) : scanSingleToken(parser, context, LexerState.None); return parser.token; @@ -29,18 +29,18 @@ export function scanJSXAttributeValue(parser: ParserState, context: Context): To * @param parser The parser object */ export function scanJSXString(parser: ParserState): Token { - const quote = parser.nextCP; - let char = nextCP(parser); + const quote = parser.currentChar; + let char = advanceChar(parser); const start = parser.index; while (char !== quote) { if (parser.index >= parser.end) report(parser, Errors.UnterminatedString); - char = nextCP(parser); + char = advanceChar(parser); } // check for unterminated string if (char !== quote) report(parser, Errors.UnterminatedString); parser.tokenValue = parser.source.slice(start, parser.index); - nextCP(parser); // skip the quote + advanceChar(parser); // skip the quote return Token.StringLiteral; } @@ -60,22 +60,22 @@ export function scanJSXToken(parser: ParserState): Token { switch (token) { case Token.LessThan: { - nextCP(parser); - if (parser.nextCP === Chars.Slash) { - nextCP(parser); + advanceChar(parser); + if (parser.currentChar === Chars.Slash) { + advanceChar(parser); return (parser.token = Token.JSXClose); } return (parser.token = Token.LessThan); } case Token.LeftBrace: { - nextCP(parser); + advanceChar(parser); return (parser.token = Token.LeftBrace); } default: // ignore } - while (parser.index < parser.end && (CharTypes[nextCP(parser)] & CharFlags.JSXToken) === 0) {} + while (parser.index < parser.end && (CharTypes[advanceChar(parser)] & CharFlags.JSXToken) === 0) {} parser.tokenValue = parser.source.slice(parser.tokenPos, parser.index); @@ -90,9 +90,9 @@ export function scanJSXToken(parser: ParserState): Token { export function scanJSXIdentifier(parser: ParserState): Token { if ((parser.token & Token.IsIdentifier) === Token.IsIdentifier) { const { index } = parser; - let char = parser.nextCP; + let char = parser.currentChar; while ((CharTypes[char] & (CharFlags.Hyphen | CharFlags.IdentifierPart)) !== 0) { - char = nextCP(parser); + char = advanceChar(parser); } parser.tokenValue += parser.source.slice(index, parser.index); } diff --git a/src/lexer/numeric.ts b/src/lexer/numeric.ts index d849788a..5cf5751b 100644 --- a/src/lexer/numeric.ts +++ b/src/lexer/numeric.ts @@ -1,8 +1,8 @@ import { ParserState, Context, Flags } from '../common'; import { Token } from '../token'; -import { nextCP, toHex, CharTypes, CharFlags, isIdentifierStart, NumberKind } from './'; +import { advanceChar, toHex, CharTypes, CharFlags, isIdentifierStart, NumberKind } from './'; import { Chars } from '../chars'; -import { report, Errors } from '../errors'; +import { report, Errors, reportScannerError } from '../errors'; /** * Scans numeric literal @@ -15,7 +15,7 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi // DecimalLiteral :: // DecimalIntegerLiteral . DecimalDigits_opt // . DecimalDigits - let char = parser.nextCP; + let char = parser.currentChar; let value: any = 0; let digit = 9; let atStart = kind & NumberKind.Float ? 1 : 0; @@ -24,28 +24,28 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi if (kind & NumberKind.Float) { value = '.' + scanDecimalDigitsOrSeparator(parser, char); - char = parser.nextCP; + char = parser.currentChar; // It is a Syntax Error if the MV is not an integer. (dot decimalDigits) if (char === Chars.LowerN) report(parser, Errors.InvalidBigInt); } else { if (char === Chars.Zero) { - char = nextCP(parser); + char = advanceChar(parser); // Hex if ((char | 32) === Chars.LowerX) { kind = NumberKind.Hex; - char = nextCP(parser); // skips 'X', 'x' + char = advanceChar(parser); // skips 'X', 'x' while (CharTypes[char] & (CharFlags.Hex | CharFlags.Underscore)) { if (char === Chars.Underscore) { if (!allowSeparator) report(parser, Errors.ContinuousNumericSeparator); allowSeparator = 0; - char = nextCP(parser); + char = advanceChar(parser); continue; } allowSeparator = 1; value = value * 0x10 + toHex(char); digits++; - char = nextCP(parser); + char = advanceChar(parser); } if (digits < 1 || !allowSeparator) { @@ -54,40 +54,40 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi // Octal } else if ((char | 32) === Chars.LowerO) { kind = NumberKind.Octal; - char = nextCP(parser); // skips 'X', 'x' + char = advanceChar(parser); // skips 'X', 'x' while (CharTypes[char] & (CharFlags.Octal | CharFlags.Underscore)) { if (char === Chars.Underscore) { if (!allowSeparator) { report(parser, Errors.ContinuousNumericSeparator); } allowSeparator = 0; - char = nextCP(parser); + char = advanceChar(parser); continue; } allowSeparator = 1; value = value * 8 + (char - Chars.Zero); digits++; - char = nextCP(parser); + char = advanceChar(parser); } if (digits < 1 || !allowSeparator) { report(parser, digits < 1 ? Errors.MissingHexDigits : Errors.TrailingNumericSeparator); } } else if ((char | 32) === Chars.LowerB) { kind = NumberKind.Binary; - char = nextCP(parser); // skips 'B', 'b' + char = advanceChar(parser); // skips 'B', 'b' while (CharTypes[char] & (CharFlags.Binary | CharFlags.Underscore)) { if (char === Chars.Underscore) { if (!allowSeparator) { report(parser, Errors.ContinuousNumericSeparator); } allowSeparator = 0; - char = nextCP(parser); + char = advanceChar(parser); continue; } allowSeparator = 1; value = value * 2 + (char - Chars.Zero); digits++; - char = nextCP(parser); + char = advanceChar(parser); } if (digits < 1 || !allowSeparator) { report(parser, digits < 1 ? Errors.MissingHexDigits : Errors.TrailingNumericSeparator); @@ -103,7 +103,7 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi break; } value = value * 8 + (char - Chars.Zero); - char = nextCP(parser); + char = advanceChar(parser); } } else if (CharTypes[char] & CharFlags.ImplicitOctalDigits) { if (context & Context.Strict) report(parser, Errors.StrictOctalEscape); @@ -119,18 +119,32 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi if (atStart) { while (digit >= 0 && CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) { if (char === Chars.Underscore) { - char = nextCP(parser); - if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator); + char = advanceChar(parser); + if (char === Chars.Underscore) { + reportScannerError( + parser.index, + parser.line, + parser.index + 1 /* skips `_` */, + Errors.ContinuousNumericSeparator + ); + } allowSeparator = 1; continue; } allowSeparator = 0; value = 10 * value + (char - Chars.Zero); - char = nextCP(parser); + char = advanceChar(parser); --digit; } - if (allowSeparator) report(parser, Errors.TrailingNumericSeparator); + if (allowSeparator) { + reportScannerError( + parser.index, + parser.line, + parser.index + 1 /* skips `_` */, + Errors.TrailingNumericSeparator + ); + } if (digit >= 0 && !isIdentifierStart(char) && char !== Chars.Period) { // Most numbers are pure decimal integers without fractional component @@ -143,14 +157,14 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi value += scanDecimalDigitsOrSeparator(parser, char); - char = parser.nextCP; + char = parser.currentChar; // Consume any decimal dot and fractional component. if (char === Chars.Period) { - if (nextCP(parser) === Chars.Underscore) report(parser, Errors.Unexpected); + if (advanceChar(parser) === Chars.Underscore) report(parser, Errors.Unexpected); kind = NumberKind.Float; - value += '.' + scanDecimalDigitsOrSeparator(parser, parser.nextCP); - char = parser.nextCP; + value += '.' + scanDecimalDigitsOrSeparator(parser, parser.currentChar); + char = parser.currentChar; } } } @@ -160,14 +174,14 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi if (char === Chars.LowerN && kind & NumberKind.ValidBigIntKind) { isBigInt = 1; - char = nextCP(parser); + char = advanceChar(parser); } else { // Consume any exponential notation. if ((char | 32) === Chars.LowerE) { - char = nextCP(parser); + char = advanceChar(parser); // '-', '+' - if (CharTypes[char] & CharFlags.Exponent) char = nextCP(parser); + if (CharTypes[char] & CharFlags.Exponent) char = advanceChar(parser); const { index } = parser; @@ -177,7 +191,7 @@ export function scanNumber(parser: ParserState, context: Context, kind: NumberKi // Consume exponential digits value += parser.source.substring(end, index) + scanDecimalDigitsOrSeparator(parser, char); - char = parser.nextCP; + char = parser.currentChar; } } @@ -218,18 +232,27 @@ export function scanDecimalDigitsOrSeparator(parser: ParserState, char: number): while (CharTypes[char] & (CharFlags.Decimal | CharFlags.Underscore)) { if (char === Chars.Underscore) { const { index } = parser; - char = nextCP(parser); - if (char === Chars.Underscore) report(parser, Errors.ContinuousNumericSeparator); + char = advanceChar(parser); + if (char === Chars.Underscore) { + reportScannerError( + parser.index, + parser.line, + parser.index + 1 /* skips `_` */, + Errors.ContinuousNumericSeparator + ); + } allowSeparator = 1; ret += parser.source.substring(start, index); start = parser.index; continue; } allowSeparator = 0; - char = nextCP(parser); + char = advanceChar(parser); } - if (allowSeparator) report(parser, Errors.TrailingNumericSeparator); + if (allowSeparator) { + reportScannerError(parser.index, parser.line, parser.index + 1 /* skips `_` */, Errors.TrailingNumericSeparator); + } return ret + parser.source.substring(start, parser.index); } diff --git a/src/lexer/regexp.ts b/src/lexer/regexp.ts index 477d6bc9..bfba36a9 100644 --- a/src/lexer/regexp.ts +++ b/src/lexer/regexp.ts @@ -1,7 +1,7 @@ import { Chars } from '../chars'; import { Context, ParserState } from '../common'; import { Token } from '../token'; -import { nextCP, isIdentifierPart } from './'; +import { advanceChar, isIdentifierPart } from './'; import { report, Errors } from '../errors'; /** @@ -22,8 +22,8 @@ export function scanRegularExpression(parser: ParserState, context: Context): To let preparseState = RegexState.Empty; loop: while (true) { - const ch = parser.nextCP; - nextCP(parser); + const ch = parser.currentChar; + advanceChar(parser); if (preparseState & RegexState.Escape) { preparseState &= ~RegexState.Escape; @@ -68,7 +68,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To } let mask = RegexFlags.Empty; - let char = parser.nextCP; + let char = parser.currentChar; const { index: flagStart } = parser; @@ -108,7 +108,7 @@ export function scanRegularExpression(parser: ParserState, context: Context): To report(parser, Errors.UnexpectedTokenRegExpFlag); } - char = nextCP(parser); + char = advanceChar(parser); } const flags = parser.source.slice(flagStart, parser.index); diff --git a/src/lexer/scan.ts b/src/lexer/scan.ts index 361955ed..b844c2f8 100644 --- a/src/lexer/scan.ts +++ b/src/lexer/scan.ts @@ -5,7 +5,7 @@ import { ParserState, Context, Flags } from '../common'; import { report, Errors } from '../errors'; import { isIDStart } from '../unicode'; import { - nextCP, + advanceChar, skipSingleLineComment, skipMultiLineComment, skipSingleHTMLComment, @@ -194,7 +194,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le parser.colPos = parser.column; parser.linePos = parser.line; - const char = parser.nextCP; + const char = parser.currentChar; if (char <= 0x7e) { const token = TokenLookup[char]; @@ -213,108 +213,126 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le case Token.Complement: case Token.Decorator: case Token.Illegal: - nextCP(parser); + advanceChar(parser); return token; - // Skip over non-EOL whitespace chars. - case Token.WhiteSpace: - nextCP(parser); - break; - - case Token.CarriageReturn: - state |= LexerState.NewLine | LexerState.LastIsCR; - scanNewLine(parser); - break; - - case Token.LineFeed: - consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0); - state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR; - break; - - // Look for identifier or keyword - case Token.Keyword: - return scanIdentifier(parser, context, /* isValidAsKeyword */ 1); - - // Look for an identifier - case Token.Identifier: - return scanIdentifier(parser, context, /* isValidAsKeyword */ 0); - - // Look for a decimal number. - case Token.NumericLiteral: - return scanNumber(parser, context, NumberKind.Decimal); - - // Look for a string or a template string - case Token.StringLiteral: - return scanString(parser, context, char); + // `<`, `<=`, `<<`, `<<=`, `` + case Token.Assign: { + advanceChar(parser); + if (parser.index >= parser.end) return Token.Assign; + const ch = parser.currentChar; - case Token.EscapedIdentifier: - return scanUnicodeIdentifier(parser, context); + if (ch === Chars.EqualSign) { + if (advanceChar(parser) === Chars.EqualSign) { + advanceChar(parser); + return Token.StrictEqual; + } else { + return Token.LooseEqual; + } + } else if (ch === Chars.GreaterThan) { + advanceChar(parser); + return Token.Arrow; + } - // `#` - case Token.PrivateField: - return scanPrivateName(parser); + return Token.Assign; + } // `!`, `!=`, `!==` case Token.Negate: - if (nextCP(parser) !== Chars.EqualSign) { + if (advanceChar(parser) !== Chars.EqualSign) { return Token.Negate; } - if (nextCP(parser) !== Chars.EqualSign) { + if (advanceChar(parser) !== Chars.EqualSign) { return Token.LooseNotEqual; } - nextCP(parser); + advanceChar(parser); return Token.StrictNotEqual; // `%`, `%=` case Token.Modulo: - if (nextCP(parser) !== Chars.EqualSign) return Token.Modulo; - nextCP(parser); + if (advanceChar(parser) !== Chars.EqualSign) return Token.Modulo; + advanceChar(parser); return Token.ModuloAssign; // `*`, `**`, `*=`, `**=` case Token.Multiply: { - nextCP(parser); + advanceChar(parser); if (parser.index >= parser.end) return Token.Multiply; - const next = parser.nextCP; + const ch = parser.currentChar; - if (next === Chars.EqualSign) { - nextCP(parser); + if (ch === Chars.EqualSign) { + advanceChar(parser); return Token.MultiplyAssign; } - if (next !== Chars.Asterisk) return Token.Multiply; + if (ch !== Chars.Asterisk) return Token.Multiply; - if (nextCP(parser) !== Chars.EqualSign) return Token.Exponentiate; + if (advanceChar(parser) !== Chars.EqualSign) return Token.Exponentiate; - nextCP(parser); + advanceChar(parser); return Token.ExponentiateAssign; } // `^`, `^=` case Token.BitwiseXor: - if (nextCP(parser) !== Chars.EqualSign) return Token.BitwiseXor; - nextCP(parser); + if (advanceChar(parser) !== Chars.EqualSign) return Token.BitwiseXor; + advanceChar(parser); return Token.BitwiseXorAssign; // `+`, `++`, `+=` case Token.Add: { - nextCP(parser); + advanceChar(parser); - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.Plus) { - nextCP(parser); + advanceChar(parser); return Token.Increment; } if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.AddAssign; } @@ -323,13 +341,13 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le // `-`, `--`, `-=`, `-->` case Token.Subtract: { - nextCP(parser); + advanceChar(parser); if (parser.index >= parser.end) return Token.Subtract; - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.Hyphen) { - nextCP(parser); - if ((state & LexerState.NewLine || isStartOfLine) && parser.nextCP === Chars.GreaterThan) { + advanceChar(parser); + if ((state & LexerState.NewLine || isStartOfLine) && parser.currentChar === Chars.GreaterThan) { if ((context & Context.OptionsWebCompat) === 0) report(parser, Errors.HtmlCommentInWebCompat); state = skipSingleHTMLComment(parser, state, context, CommentType.HTMLClose); continue; @@ -339,7 +357,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le } if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.SubtractAssign; } @@ -348,21 +366,21 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le // `/`, `/=`, `/>`, '/*..*/' case Token.Divide: { - nextCP(parser); + advanceChar(parser); if (parser.index < parser.end) { - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.Slash) { - nextCP(parser); + advanceChar(parser); state = skipSingleLineComment(parser, state, CommentType.Single); continue; } else if (ch === Chars.Asterisk) { - nextCP(parser); + advanceChar(parser); state = skipMultiLineComment(parser, state) as LexerState; continue; } else if (context & Context.AllowRegExp) { return scanRegularExpression(parser, context); } else if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.DivideAssign; } } @@ -370,76 +388,17 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le return Token.Divide; } - // `<`, `<=`, `<<`, `<<=`, `` - case Token.Assign: { - nextCP(parser); - if (parser.index >= parser.end) return Token.Assign; - const ch = parser.nextCP; - - if (ch === Chars.EqualSign) { - if (nextCP(parser) === Chars.EqualSign) { - nextCP(parser); - return Token.StrictEqual; - } else { - return Token.LooseEqual; - } - } else if (ch === Chars.GreaterThan) { - nextCP(parser); - return Token.Arrow; - } - - return Token.Assign; - } - // `|`, `||`, `|=` case Token.BitwiseOr: { - nextCP(parser); + advanceChar(parser); if (parser.index >= parser.end) return Token.BitwiseOr; - const next = parser.nextCP; + const ch = parser.currentChar; - if (next === Chars.VerticalBar) { - nextCP(parser); + if (ch === Chars.VerticalBar) { + advanceChar(parser); return Token.LogicalOr; - } else if (next === Chars.EqualSign) { - nextCP(parser); + } else if (ch === Chars.EqualSign) { + advanceChar(parser); return Token.BitwiseOrAssign; } @@ -448,33 +407,33 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le // `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=` case Token.GreaterThan: { - nextCP(parser); + advanceChar(parser); if (parser.index >= parser.end) return Token.GreaterThan; - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.GreaterThanOrEqual; } if (ch !== Chars.GreaterThan) return Token.GreaterThan; - nextCP(parser); + advanceChar(parser); if (parser.index < parser.end) { - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.GreaterThan) { - if (nextCP(parser) === Chars.EqualSign) { - nextCP(parser); + if (advanceChar(parser) === Chars.EqualSign) { + advanceChar(parser); return Token.LogicalShiftRightAssign; } else { return Token.LogicalShiftRight; } } else if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.ShiftRightAssign; } } @@ -484,17 +443,17 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le // `&`, `&&`, `&=` case Token.BitwiseAnd: { - nextCP(parser); + advanceChar(parser); if (parser.index >= parser.end) return Token.BitwiseAnd; - const ch = parser.nextCP; + const ch = parser.currentChar; if (ch === Chars.Ampersand) { - nextCP(parser); + advanceChar(parser); return Token.LogicalAnd; } if (ch === Chars.EqualSign) { - nextCP(parser); + advanceChar(parser); return Token.BitwiseAndAssign; } @@ -502,17 +461,58 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le } // `.`, `...`, `.123` (numeric literal) case Token.Period: - nextCP(parser); - if ((CharTypes[parser.nextCP] & CharFlags.Decimal) !== 0) + if ((CharTypes[advanceChar(parser)] & CharFlags.Decimal) !== 0) return scanNumber(parser, context, NumberKind.Float | NumberKind.Decimal); - if (parser.nextCP === Chars.Period) { - if (nextCP(parser) === Chars.Period) { - nextCP(parser); + if (parser.currentChar === Chars.Period) { + if (advanceChar(parser) === Chars.Period) { + advanceChar(parser); return Token.Ellipsis; } } return Token.Period; + // Look for identifier or keyword + case Token.Keyword: + return scanIdentifier(parser, context, /* isValidAsKeyword */ 1); + + // Look for an identifier + case Token.Identifier: + return scanIdentifier(parser, context, /* isValidAsKeyword */ 0); + + // Look for a decimal number. + case Token.NumericLiteral: + return scanNumber(parser, context, NumberKind.Decimal); + + // Look for a string literal + case Token.StringLiteral: + return scanString(parser, context, char); + + // Look for a template string + case Token.Template: + return scanTemplate(parser, context); + + // Look for a escaped identifier + case Token.EscapedIdentifier: + return scanUnicodeIdentifier(parser, context); + + // `#` (private name) + case Token.PrivateField: + return scanPrivateName(parser); + + case Token.WhiteSpace: + advanceChar(parser); + break; + + case Token.CarriageReturn: + state |= LexerState.NewLine | LexerState.LastIsCR; + scanNewLine(parser); + break; + + case Token.LineFeed: + consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0); + state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR; + break; + default: // unreachable } @@ -529,7 +529,7 @@ export function scanSingleToken(parser: ParserState, context: Context, state: Le } if (isExoticECMAScriptWhitespace(char)) { - nextCP(parser); + advanceChar(parser); continue; } diff --git a/src/lexer/string.ts b/src/lexer/string.ts index 49d9d0c2..db9c75f6 100644 --- a/src/lexer/string.ts +++ b/src/lexer/string.ts @@ -2,7 +2,7 @@ import { ParserState, Context, Flags } from '../common'; import { Token } from '../token'; import { Chars } from '../chars'; import { report, Errors } from '../errors'; -import { toHex, nextCP, fromCodePoint, CharTypes, CharFlags } from './'; +import { toHex, advanceChar, fromCodePoint, CharTypes, CharFlags } from './'; // Intentionally negative export const enum Escape { @@ -20,13 +20,13 @@ export function scanString(parser: ParserState, context: Context, quote: number) const { index: start } = parser; let ret: string | void = ''; - let char = nextCP(parser); + let char = advanceChar(parser); let marker = parser.index; // Consumes the quote while ((CharTypes[char] & CharFlags.LineTerminator) === 0) { if (char === quote) { ret += parser.source.slice(marker, parser.index); - nextCP(parser); // skip closing quote + advanceChar(parser); // skip closing quote if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(start, parser.index); parser.tokenValue = ret; return Token.StringLiteral; @@ -34,7 +34,7 @@ export function scanString(parser: ParserState, context: Context, quote: number) if ((char & 8) === 8 && char === Chars.Backslash) { ret += parser.source.slice(marker, parser.index); - char = nextCP(parser); + char = advanceChar(parser); if (char > 0x7e) { ret += fromCodePoint(char); @@ -49,7 +49,7 @@ export function scanString(parser: ParserState, context: Context, quote: number) if (parser.index >= parser.end) report(parser, Errors.UnterminatedString); - char = nextCP(parser); + char = advanceChar(parser); } report(parser, Errors.UnterminatedString); @@ -75,9 +75,9 @@ export function parseEscape(parser: ParserState, context: Context, first: number // Line continuations case Chars.CarriageReturn: { if (parser.index < parser.end) { - if (parser.nextCP === Chars.LineFeed) { + if (parser.currentChar === Chars.LineFeed) { parser.index = parser.index + 1; - parser.nextCP = parser.source.charCodeAt(parser.index); + parser.currentChar = parser.source.charCodeAt(parser.index); } } } @@ -109,7 +109,7 @@ export function parseEscape(parser: ParserState, context: Context, first: number } else if (context & Context.Strict) { return Escape.StrictOctal; } else { - parser.nextCP = next; + parser.currentChar = next; code = (code << 3) | (next - Chars.Zero); index++; column++; @@ -117,7 +117,7 @@ export function parseEscape(parser: ParserState, context: Context, first: number const next = parser.source.charCodeAt(index); if (CharTypes[next] & CharFlags.Octal) { - parser.nextCP = next; + parser.currentChar = next; code = (code << 3) | (next - Chars.Zero); index++; column++; @@ -149,7 +149,7 @@ export function parseEscape(parser: ParserState, context: Context, first: number if (CharTypes[next] & CharFlags.Octal) { code = (code << 3) | (next - Chars.Zero); - parser.nextCP = next; + parser.currentChar = next; parser.index = index; parser.column = column; } @@ -168,10 +168,10 @@ export function parseEscape(parser: ParserState, context: Context, first: number // HexEscapeSequence // \x HexDigit HexDigit case Chars.LowerX: { - const ch1 = nextCP(parser); + const ch1 = advanceChar(parser); if ((CharTypes[ch1] & CharFlags.Hex) === 0) return Escape.InvalidHex; const hi = toHex(ch1); - const ch2 = nextCP(parser); + const ch2 = advanceChar(parser); if ((CharTypes[ch2] & CharFlags.Hex) === 0) return Escape.InvalidHex; const lo = toHex(ch2); @@ -182,16 +182,16 @@ export function parseEscape(parser: ParserState, context: Context, first: number // \u HexDigit HexDigit HexDigit HexDigit // \u { HexDigit HexDigit HexDigit ... } case Chars.LowerU: { - const ch = nextCP(parser); + const ch = advanceChar(parser); - if (parser.nextCP === Chars.LeftBrace) { + if (parser.currentChar === Chars.LeftBrace) { let code = 0; - while ((CharTypes[nextCP(parser)] & CharFlags.Hex) !== 0) { - code = (code << 4) | toHex(parser.nextCP); + while ((CharTypes[advanceChar(parser)] & CharFlags.Hex) !== 0) { + code = (code << 4) | toHex(parser.currentChar); if (code > Chars.NonBMPMax) return Escape.OutOfRange; } - if (parser.nextCP < 1 || (parser.nextCP as number) !== Chars.RightBrace) { + if (parser.currentChar < 1 || (parser.currentChar as number) !== Chars.RightBrace) { return Escape.InvalidHex; } return code; diff --git a/src/lexer/template.ts b/src/lexer/template.ts index 329a9532..e5b26627 100644 --- a/src/lexer/template.ts +++ b/src/lexer/template.ts @@ -1,7 +1,7 @@ import { ParserState, Context } from '../common'; import { Token } from '../token'; import { Chars } from '../chars'; -import { nextCP, fromCodePoint } from './common'; +import { advanceChar, fromCodePoint } from './common'; import { parseEscape, Escape, handleStringError } from './string'; import { report, Errors } from '../errors'; @@ -13,15 +13,15 @@ export function scanTemplate(parser: ParserState, context: Context): Token { let tail = 1; let ret: string | void = ''; - let char = nextCP(parser); + let char = advanceChar(parser); while (char !== Chars.Backtick) { if (char === Chars.Dollar && parser.source.charCodeAt(parser.index + 1) === Chars.LeftBrace) { - nextCP(parser); // Skip: '}' + advanceChar(parser); // Skip: '}' tail = 0; break; } else if ((char & 8) === 8 && char === Chars.Backslash) { - char = nextCP(parser); + char = advanceChar(parser); if (char > 0x7e) { ret += fromCodePoint(char); } else { @@ -43,7 +43,7 @@ export function scanTemplate(parser: ParserState, context: Context): Token { if (char === Chars.CarriageReturn) { if (parser.index < parser.end && parser.source.charCodeAt(parser.index) === Chars.LineFeed) { ret += fromCodePoint(char); - parser.nextCP = parser.source.charCodeAt(++parser.index); + parser.currentChar = parser.source.charCodeAt(++parser.index); } } @@ -54,10 +54,10 @@ export function scanTemplate(parser: ParserState, context: Context): Token { ret += fromCodePoint(char); } if (parser.index >= parser.end) report(parser, Errors.UnterminatedTemplate); - char = nextCP(parser); + char = advanceChar(parser); } - nextCP(parser); // Consume the quote or opening brace + advanceChar(parser); // Consume the quote or opening brace parser.tokenValue = ret; if (tail) { parser.tokenRaw = parser.source.slice(start + 1, parser.index - 1); @@ -94,7 +94,7 @@ function scanBadTemplate(parser: ParserState, ch: number): number { // do nothing } if (parser.index >= parser.end) report(parser, Errors.UnterminatedTemplate); - ch = nextCP(parser); + ch = advanceChar(parser); } return ch; diff --git a/src/parser.ts b/src/parser.ts index 0b98f5f1..25afe054 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -137,7 +137,7 @@ export function create(source: string, sourceFile: string | void, onComment: OnC /** * The code point at the current index */ - nextCP: source.charCodeAt(0), + currentChar: source.charCodeAt(0), /** * https://tc39.es/ecma262/#sec-module-semantics-static-semantics-exportednames @@ -4110,7 +4110,7 @@ export function parseTemplateLiteral( return finishNode(parser, context, start, line, column, { type: 'TemplateLiteral', expressions: [], - quasis: [parseTemplateTail(parser, context)] + quasis: [parseTemplateTail(parser, context, start, line, column)] }); } @@ -4121,12 +4121,18 @@ export function parseTemplateLiteral( * @param context Context masks * @returns {ESTree.TemplateElement} */ -export function parseTemplateTail(parser: ParserState, context: Context): ESTree.TemplateElement { - const { tokenValue, tokenRaw, startPos, linePos, colPos } = parser; +export function parseTemplateTail( + parser: ParserState, + context: Context, + start: number, + line: number, + column: number +): ESTree.TemplateElement { + const { tokenValue, tokenRaw } = parser; consume(parser, context, Token.TemplateTail); - return finishNode(parser, context, startPos, linePos, colPos, { + return finishNode(parser, context, start, line, column, { type: 'TemplateElement', value: { cooked: tokenValue, @@ -4149,7 +4155,7 @@ export function parseTemplate( line: number, column: number ): ESTree.TemplateLiteral { - const quasis = [parseTemplateSpans(parser, context, /* tail */ false)]; + const quasis = [parseTemplateSpans(parser, context, /* tail */ false, start, line, column)]; consume(parser, context | Context.AllowRegExp, Token.TemplateContinuation); const expressions = [ @@ -4166,12 +4172,12 @@ export function parseTemplate( if (parser.token !== Token.RightBrace) report(parser, Errors.InvalidTemplateContinuation); while ((parser.token = scanTemplateTail(parser, context)) !== Token.TemplateTail) { const { tokenPos, linePos, colPos } = parser; - quasis.push(parseTemplateSpans(parser, context, /* tail */ false)); + quasis.push(parseTemplateSpans(parser, context, /* tail */ false, tokenPos, linePos, colPos)); consume(parser, context | Context.AllowRegExp, Token.TemplateContinuation); expressions.push(parseExpressions(parser, context, 0, 1, tokenPos, linePos, colPos)); } - quasis.push(parseTemplateSpans(parser, context, /* tail */ true)); + quasis.push(parseTemplateSpans(parser, context, /* tail */ true, parser.tokenPos, parser.linePos, parser.colPos)); nextToken(parser, context); @@ -4188,9 +4194,15 @@ export function parseTemplate( * @param parser Parser object * @param tail */ -export function parseTemplateSpans(parser: ParserState, context: Context, tail: boolean): ESTree.TemplateElement { - const { tokenPos, linePos, colPos } = parser; - return finishNode(parser, context, tokenPos, linePos, colPos, { +export function parseTemplateSpans( + parser: ParserState, + context: Context, + tail: boolean, + start: number, + line: number, + column: number +): ESTree.TemplateElement { + return finishNode(parser, context, start, line, column, { type: 'TemplateElement', value: { cooked: parser.tokenValue,