Skip to content

Commit

Permalink
fix(lexer): improved identifier scanning performance
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed Jul 27, 2019
1 parent f18f5b4 commit 29c1d3d
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 104 deletions.
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions src/errors.ts
Expand Up @@ -167,7 +167,8 @@ export const enum Errors {
InvalidDotProperty,
UnclosedSpreadElement,
CatchWithoutTry,
FinallyWithoutTry
FinallyWithoutTry,
UnCorrespondingFragmentTag
}

/*@internal*/
Expand Down Expand Up @@ -347,7 +348,8 @@ export const errorMessages: {
[Errors.InvalidDotProperty]: 'Dot property must be an identifier',
[Errors.UnclosedSpreadElement]: 'Encountered invalid input after spread/rest argument',
[Errors.CatchWithoutTry]: 'Catch without try',
[Errors.FinallyWithoutTry]: 'Finally without try'
[Errors.FinallyWithoutTry]: 'Finally without try',
[Errors.UnCorrespondingFragmentTag]: 'Expected corresponding closing tag for JSX fragment'
};

export class ParseError extends SyntaxError {
Expand Down
31 changes: 15 additions & 16 deletions src/lexer/charClassifier.ts
Expand Up @@ -5,21 +5,20 @@ export const enum CharFlags {
None = 0,
IdentifierStart = 1 << 0,
IdentifierPart = 1 << 1,
KeywordCandidate = 1 << 6,
LineTerminator = 1 << 9, // ECMA-262 11.3 Line Terminators
Decimal = 1 << 10,
Octal = 1 << 11,
Hex = 1 << 12,
Binary = 1 << 13,
Exponent = 1 << 15,
BackSlash = 1 << 17,
ImplicitOctalDigits = 1 << 18,
CarriageReturn = 1 << 19,
LineFeed = 1 << 20,
Underscore = 1 << 21,
StringLiteral = 1 << 22,
JSXToken = 1 << 23,
Hyphen = 1 << 24,
KeywordCandidate = 1 << 2,
LineTerminator = 1 << 3, // ECMA-262 11.3 Line Terminators
Decimal = 1 << 4,
Octal = 1 << 5,
Hex = 1 << 6,
Binary = 1 << 7,
Exponent = 1 << 8,
ImplicitOctalDigits = 1 << 9,
CarriageReturn = 1 << 10,
LineFeed = 1 << 11,
Underscore = 1 << 12,
StringLiteral = 1 << 13,
JSXToken = 1 << 14,
Hyphen = 1 << 15,
}

/**
Expand Down Expand Up @@ -118,7 +117,7 @@ export const CharTypes = [
CharFlags.IdentifierStart | CharFlags.IdentifierPart /* 0x59 Y */,
CharFlags.IdentifierStart | CharFlags.IdentifierPart /* 0x5A Z */,
CharFlags.None /* 0x5B */,
CharFlags.BackSlash | CharFlags.IdentifierStart /* 0x5C */,
CharFlags.IdentifierStart /* 0x5C */,
CharFlags.None /* 0x5D */,
CharFlags.None /* 0x5E */,
CharFlags.IdentifierStart | CharFlags.IdentifierPart | CharFlags.Underscore/* 0x5F _ */,
Expand Down
41 changes: 20 additions & 21 deletions src/lexer/identifier.ts
Expand Up @@ -14,11 +14,9 @@ import { report, reportMessageAt, Errors } from '../errors';
export function scanIdentifier(parser: ParserState, context: Context, isValidAsKeyword: 0 | 1): Token {
while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenPos, parser.index);
if ((CharTypes[parser.nextCP] & CharFlags.BackSlash) === 0 && parser.nextCP < 0x7e) {
return descKeywordTable[parser.tokenValue] || Token.Identifier;
}
// Slow path that has to deal with multi unit encoding
return scanIdentifierSlowCase(parser, context, 0, isValidAsKeyword);
return parser.nextCP !== Chars.Backslash && parser.nextCP < 0x7e
? descKeywordTable[parser.tokenValue] || Token.Identifier
: scanIdentifierSlowCase(parser, context, 0, isValidAsKeyword);
}

/**
Expand All @@ -28,7 +26,7 @@ export function scanIdentifier(parser: ParserState, context: Context, isValidAsK
* @param context Context masks
*/
export function scanUnicodeIdentifier(parser: ParserState, context: Context): Token {
const cookedChar = scanIdentifierUnicodeEscape(parser) as number;
const cookedChar = scanIdentifierUnicodeEscape(parser);
if (!isIdentifierPart(cookedChar)) report(parser, Errors.InvalidUnicodeEscapeSequence);
parser.tokenValue = fromCodePoint(cookedChar);
return scanIdentifierSlowCase(parser, context, /* hasEscape */ 1, CharTypes[cookedChar] & CharFlags.KeywordCandidate);
Expand All @@ -39,8 +37,8 @@ export function scanUnicodeIdentifier(parser: ParserState, context: Context): To
*
* @param parser Parser object
* @param context Context masks
* @param hasEscape
* @param canBeKeyword
* @param hasEscape True if contains a unicode sequence
* @param isValidAsKeyword
*/
export function scanIdentifierSlowCase(
parser: ParserState,
Expand All @@ -49,11 +47,12 @@ export function scanIdentifierSlowCase(
isValidAsKeyword: number
): Token {
let start = parser.index;

while (parser.index < parser.end) {
if (parser.nextCP === Chars.Backslash) {
parser.tokenValue += parser.source.slice(start, parser.index);
hasEscape = 1;
const code = scanIdentifierUnicodeEscape(parser) as number;
const code = scanIdentifierUnicodeEscape(parser);
if (!isIdentifierPart(code)) report(parser, Errors.InvalidUnicodeEscapeSequence);
isValidAsKeyword = isValidAsKeyword && CharTypes[code] & CharFlags.KeywordCandidate;
parser.tokenValue += fromCodePoint(code);
Expand All @@ -72,18 +71,18 @@ export function scanIdentifierSlowCase(
const length = parser.tokenValue.length;

if (isValidAsKeyword && (length >= 2 && length <= 11)) {
const t: Token | undefined = descKeywordTable[parser.tokenValue];
const token: Token | undefined = descKeywordTable[parser.tokenValue];

return t === void 0
return token === void 0
? Token.Identifier
: t === Token.YieldKeyword || !hasEscape
? t
: context & Context.Strict && (t === Token.LetKeyword || t === Token.StaticKeyword)
: token === Token.YieldKeyword || !hasEscape
? token
: context & Context.Strict && (token === Token.LetKeyword || token === Token.StaticKeyword)
? Token.EscapedFutureReserved
: (t & Token.FutureReserved) === Token.FutureReserved
: (token & Token.FutureReserved) === Token.FutureReserved
? context & Context.Strict
? Token.EscapedFutureReserved
: t
: token
: Token.EscapedReserved;
}
return Token.Identifier;
Expand All @@ -104,14 +103,14 @@ export function scanPrivateName(parser: ParserState): Token {
*
* @param parser Parser object
*/
export function scanIdentifierUnicodeEscape(parser: ParserState): number | void {
export function scanIdentifierUnicodeEscape(parser: ParserState): number {
// Check for Unicode escape of the form '\uXXXX'
// and return code point value if valid Unicode escape is found.
if (parser.index + 5 < parser.end && parser.source.charCodeAt(parser.index + 1) === Chars.LowerU) {
parser.nextCP = parser.source.charCodeAt((parser.index += 2));
return scanUnicodeEscapeValue(parser);
if (parser.source.charCodeAt(parser.index + 1) !== Chars.LowerU) {
report(parser, Errors.InvalidUnicodeEscapeSequence);
}
report(parser, Errors.InvalidUnicodeEscapeSequence);
parser.nextCP = parser.source.charCodeAt((parser.index += 2));
return scanUnicodeEscapeValue(parser);
}

/**
Expand Down

0 comments on commit 29c1d3d

Please sign in to comment.