Skip to content

Commit

Permalink
fix(lexer): improved scanner performance
Browse files Browse the repository at this point in the history
  • Loading branch information
KFlash committed Jul 27, 2019
1 parent 0899ad3 commit c637ee5
Show file tree
Hide file tree
Showing 13 changed files with 326 additions and 276 deletions.
2 changes: 1 addition & 1 deletion src/common.ts
Expand Up @@ -202,7 +202,7 @@ export interface ParserState {
sourceFile: string | void;
assignable: AssignmentKind | DestructuringKind;
destructible: AssignmentKind | DestructuringKind;
nextCP: number;
currentChar: number;
exportedNames: any;
exportedBindings: any;
}
Expand Down
16 changes: 15 additions & 1 deletion src/errors.ts
Expand Up @@ -385,6 +385,7 @@ export function report(parser: ParserState, type: Errors, ...params: string[]):
export function reportScopeError(scope: any): never {
throw new ParseError(scope.index, scope.line, scope.column, scope.type);
}

/**
* Throws an error at a given position
*
Expand All @@ -395,8 +396,21 @@ export function reportScopeError(scope: any): never {
* @param {number} column
* @param {Errors} type
* @param {...string[]} params
* @returns {never}
*/
export function reportMessageAt(index: number, line: number, column: number, type: Errors, ...params: string[]): never {
throw new ParseError(index, line, column, type, ...params);
}

/**
* Throws an error at a given position
*
* @export
* @param {ParserState} state
* @param {number} index
* @param {number} line
* @param {number} column
* @param {Errors} type
*/
export function reportScannerError(index: number, line: number, column: number, type: Errors): never {
throw new ParseError(index, line, column, type);
}
22 changes: 11 additions & 11 deletions src/lexer/comments.ts
@@ -1,4 +1,4 @@
import { nextCP, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './';
import { advanceChar, CharTypes, CharFlags, LexerState, scanNewLine, consumeLineFeed } from './';
import { Chars } from '../chars';
import { Context, ParserState } from '../common';
import { report, Errors } from '../errors';
Expand All @@ -21,7 +21,7 @@ export const CommentTypes = ['SingleLine', 'MultiLine', 'HTMLOpen', 'HTMLClose',
export function skipHashBang(parser: ParserState): void {
// HashbangComment ::
// #! SingleLineCommentChars_opt
if (parser.nextCP === Chars.Hash && parser.source.charCodeAt(parser.index + 1) === Chars.Exclamation) {
if (parser.currentChar === Chars.Hash && parser.source.charCodeAt(parser.index + 1) === Chars.Exclamation) {
skipSingleLineComment(parser, LexerState.None, CommentType.HashBang);
}
}
Expand All @@ -45,12 +45,12 @@ export function skipSingleHTMLComment(
export function skipSingleLineComment(parser: ParserState, state: LexerState, type: CommentType): LexerState {
const { index } = parser;
while (parser.index < parser.end) {
if (CharTypes[parser.nextCP] & CharFlags.LineTerminator || (parser.nextCP ^ Chars.LineSeparator) <= 1) {
if (CharTypes[parser.currentChar] & CharFlags.LineTerminator || (parser.currentChar ^ Chars.LineSeparator) <= 1) {
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
scanNewLine(parser);
return state;
}
nextCP(parser);
advanceChar(parser);
}
if (parser.onComment)
parser.onComment(CommentTypes[type & 0xff], parser.source.slice(index, parser.index), parser, parser.index);
Expand All @@ -66,9 +66,9 @@ export function skipSingleLineComment(parser: ParserState, state: LexerState, ty
export function skipMultiLineComment(parser: ParserState, state: LexerState): LexerState | void {
const { index } = parser;
while (parser.index < parser.end) {
while (parser.nextCP === Chars.Asterisk) {
if (nextCP(parser) === Chars.Slash) {
nextCP(parser);
while (parser.currentChar === Chars.Asterisk) {
if (advanceChar(parser) === Chars.Slash) {
advanceChar(parser);
if (parser.onComment)
parser.onComment(
CommentTypes[CommentType.Multi & 0xff],
Expand All @@ -80,17 +80,17 @@ export function skipMultiLineComment(parser: ParserState, state: LexerState): Le
}
}

if (parser.nextCP === Chars.CarriageReturn) {
if (parser.currentChar === Chars.CarriageReturn) {
state |= LexerState.NewLine | LexerState.LastIsCR;
scanNewLine(parser);
} else if (parser.nextCP === Chars.LineFeed) {
} else if (parser.currentChar === Chars.LineFeed) {
consumeLineFeed(parser, (state & LexerState.LastIsCR) !== 0);
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
} else if ((parser.nextCP ^ Chars.LineSeparator) <= 1) {
} else if ((parser.currentChar ^ Chars.LineSeparator) <= 1) {
state = (state | LexerState.LastIsCR | LexerState.NewLine) ^ LexerState.LastIsCR;
scanNewLine(parser);
} else {
nextCP(parser);
advanceChar(parser);
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/lexer/common.ts
Expand Up @@ -26,9 +26,9 @@ export const enum NumberKind {
*
* @param parser The parser instance
*/
export function nextCP(parser: ParserState): number {
export function advanceChar(parser: ParserState): number {
parser.column++;
return (parser.nextCP = parser.source.charCodeAt(++parser.index));
return (parser.currentChar = parser.source.charCodeAt(++parser.index));
}

/**
Expand All @@ -42,7 +42,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): 0 |
if ((hi & 0xfc00) !== Chars.LeadSurrogateMin) return 0;
const lo = parser.source.charCodeAt(parser.index + 1);
if ((lo & 0xfc00) !== 0xdc00) return 0;
hi = parser.nextCP = Chars.NonBMPMin + ((hi & 0x3ff) << 10) + (lo & 0x3ff);
hi = parser.currentChar = Chars.NonBMPMin + ((hi & 0x3ff) << 10) + (lo & 0x3ff);
if (((unicodeLookup[(hi >>> 5) + 0] >>> hi) & 31 & 1) === 0) {
report(parser, Errors.IllegalCaracter, fromCodePoint(hi));
}
Expand All @@ -55,7 +55,7 @@ export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): 0 |
* Use to consume a line feed instead of `scanNewLine`.
*/
export function consumeLineFeed(parser: ParserState, lastIsCR: boolean): void {
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.currentChar = parser.source.charCodeAt(++parser.index);
parser.flags |= Flags.NewLine;
if (!lastIsCR) {
parser.column = 0;
Expand All @@ -65,7 +65,7 @@ export function consumeLineFeed(parser: ParserState, lastIsCR: boolean): void {

export function scanNewLine(parser: ParserState): void {
parser.flags |= Flags.NewLine;
parser.nextCP = parser.source.charCodeAt(++parser.index);
parser.currentChar = parser.source.charCodeAt(++parser.index);
parser.column = 0;
parser.line++;
}
Expand Down
41 changes: 21 additions & 20 deletions src/lexer/identifier.ts
@@ -1,9 +1,9 @@
import { ParserState, Context } from '../common';
import { Token, descKeywordTable } from '../token';
import { Chars } from '../chars';
import { nextCP, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
import { advanceChar, consumeMultiUnitCodePoint, fromCodePoint, toHex } from './';
import { CharTypes, CharFlags, isIdentifierPart, isIdentifierStart } from './charClassifier';
import { report, reportMessageAt, Errors } from '../errors';
import { report, reportScannerError, Errors } from '../errors';

/**
* Scans identifier
Expand All @@ -12,9 +12,9 @@ import { report, reportMessageAt, Errors } from '../errors';
* @param context Context masks
*/
export function scanIdentifier(parser: ParserState, context: Context, isValidAsKeyword: 0 | 1): Token {
while ((CharTypes[nextCP(parser)] & CharFlags.IdentifierPart) !== 0) {}
while ((CharTypes[advanceChar(parser)] & CharFlags.IdentifierPart) !== 0) {}
parser.tokenValue = parser.source.slice(parser.tokenPos, parser.index);
return parser.nextCP !== Chars.Backslash && parser.nextCP < 0x7e
return parser.currentChar !== Chars.Backslash && parser.currentChar < 0x7e
? descKeywordTable[parser.tokenValue] || Token.Identifier
: scanIdentifierSlowCase(parser, context, 0, isValidAsKeyword);
}
Expand Down Expand Up @@ -49,16 +49,16 @@ export function scanIdentifierSlowCase(
let start = parser.index;

while (parser.index < parser.end) {
if (parser.nextCP === Chars.Backslash) {
if (parser.currentChar === Chars.Backslash) {
parser.tokenValue += parser.source.slice(start, parser.index);
hasEscape = 1;
const code = scanIdentifierUnicodeEscape(parser);
if (!isIdentifierPart(code)) report(parser, Errors.InvalidUnicodeEscapeSequence);
isValidAsKeyword = isValidAsKeyword && CharTypes[code] & CharFlags.KeywordCandidate;
parser.tokenValue += fromCodePoint(code);
start = parser.index;
} else if (isIdentifierPart(parser.nextCP) || consumeMultiUnitCodePoint(parser, parser.nextCP)) {
nextCP(parser);
} else if (isIdentifierPart(parser.currentChar) || consumeMultiUnitCodePoint(parser, parser.currentChar)) {
advanceChar(parser);
} else {
break;
}
Expand Down Expand Up @@ -94,7 +94,7 @@ export function scanIdentifierSlowCase(
* @param parser Parser object
*/
export function scanPrivateName(parser: ParserState): Token {
if (!isIdentifierStart(nextCP(parser))) report(parser, Errors.MissingPrivateName);
if (!isIdentifierStart(advanceChar(parser))) report(parser, Errors.MissingPrivateName);
return Token.PrivateField;
}

Expand All @@ -109,31 +109,32 @@ export function scanIdentifierUnicodeEscape(parser: ParserState): number {
if (parser.source.charCodeAt(parser.index + 1) !== Chars.LowerU) {
report(parser, Errors.InvalidUnicodeEscapeSequence);
}
parser.nextCP = parser.source.charCodeAt((parser.index += 2));
return scanUnicodeEscapeValue(parser);
parser.currentChar = parser.source.charCodeAt((parser.index += 2));
return scanUnicodeEscape(parser);
}

/**
* Scans unicode escape value
*
* @param parser Parser object
*/
export function scanUnicodeEscapeValue(parser: ParserState): number {
export function scanUnicodeEscape(parser: ParserState): number {
// Accept both \uxxxx and \u{xxxxxx}
let codePoint = 0;
const char = parser.nextCP;
const char = parser.currentChar;
// First handle a delimited Unicode escape, e.g. \u{1F4A9}
if (char === Chars.LeftBrace) {
const startPos = parser.index;
while (CharTypes[nextCP(parser)] & CharFlags.Hex) {
codePoint = (codePoint << 4) | toHex(parser.nextCP);
if (codePoint > Chars.NonBMPMax) report(parser, Errors.UnicodeOverflow);
const begin = parser.index - 2;
while (CharTypes[advanceChar(parser)] & CharFlags.Hex) {
codePoint = (codePoint << 4) | toHex(parser.currentChar);
if (codePoint > Chars.NonBMPMax) reportScannerError(begin, parser.line, parser.index + 1, Errors.UnicodeOverflow);
}

// At least 4 characters have to be read
if (codePoint < 1 || (parser.nextCP as number) !== Chars.RightBrace) {
reportMessageAt(startPos, parser.line, startPos - 1, Errors.InvalidHexEscapeSequence);
if (codePoint < 1 || (parser.currentChar as number) !== Chars.RightBrace) {
reportScannerError(begin, parser.line, parser.index - 1, Errors.InvalidHexEscapeSequence);
}
nextCP(parser); // consumes '}'
advanceChar(parser); // consumes '}'
return codePoint;
}

Expand All @@ -148,7 +149,7 @@ export function scanUnicodeEscapeValue(parser: ParserState): number {

codePoint = (toHex(char) << 12) | (toHex(char2) << 8) | (toHex(char3) << 4) | toHex(char4);

parser.nextCP = parser.source.charCodeAt((parser.index += 4));
parser.currentChar = parser.source.charCodeAt((parser.index += 4));

return codePoint;
}
4 changes: 2 additions & 2 deletions src/lexer/index.ts
Expand Up @@ -7,7 +7,7 @@ export {
CommentType
} from './comments';
export {
nextCP,
advanceChar,
consumeMultiUnitCodePoint,
isExoticECMAScriptWhitespace,
fromCodePoint,
Expand All @@ -23,7 +23,7 @@ export {
scanIdentifierSlowCase,
scanUnicodeIdentifier,
scanPrivateName,
scanUnicodeEscapeValue
scanUnicodeEscape
} from './identifier';
export { scanString } from './string';
export { scanNumber } from './numeric';
Expand Down
26 changes: 13 additions & 13 deletions src/lexer/jsx.ts
Expand Up @@ -3,7 +3,7 @@ import { Chars } from '../chars';
import { Token } from '../token';
import { ParserState, Context } from '../common';
import { report, Errors } from '../errors';
import { nextCP, LexerState, TokenLookup } from './';
import { advanceChar, LexerState, TokenLookup } from './';
import { scanSingleToken } from './scan';

/**
Expand All @@ -17,7 +17,7 @@ export function scanJSXAttributeValue(parser: ParserState, context: Context): To
parser.startColumn = parser.column;
parser.startLine = parser.line;
parser.token =
CharTypes[parser.nextCP] & CharFlags.StringLiteral
CharTypes[parser.currentChar] & CharFlags.StringLiteral
? scanJSXString(parser)
: scanSingleToken(parser, context, LexerState.None);
return parser.token;
Expand All @@ -29,18 +29,18 @@ export function scanJSXAttributeValue(parser: ParserState, context: Context): To
* @param parser The parser object
*/
export function scanJSXString(parser: ParserState): Token {
const quote = parser.nextCP;
let char = nextCP(parser);
const quote = parser.currentChar;
let char = advanceChar(parser);
const start = parser.index;
while (char !== quote) {
if (parser.index >= parser.end) report(parser, Errors.UnterminatedString);
char = nextCP(parser);
char = advanceChar(parser);
}

// check for unterminated string
if (char !== quote) report(parser, Errors.UnterminatedString);
parser.tokenValue = parser.source.slice(start, parser.index);
nextCP(parser); // skip the quote
advanceChar(parser); // skip the quote
return Token.StringLiteral;
}

Expand All @@ -60,22 +60,22 @@ export function scanJSXToken(parser: ParserState): Token {

switch (token) {
case Token.LessThan: {
nextCP(parser);
if (parser.nextCP === Chars.Slash) {
nextCP(parser);
advanceChar(parser);
if (parser.currentChar === Chars.Slash) {
advanceChar(parser);
return (parser.token = Token.JSXClose);
}

return (parser.token = Token.LessThan);
}
case Token.LeftBrace: {
nextCP(parser);
advanceChar(parser);
return (parser.token = Token.LeftBrace);
}
default: // ignore
}

while (parser.index < parser.end && (CharTypes[nextCP(parser)] & CharFlags.JSXToken) === 0) {}
while (parser.index < parser.end && (CharTypes[advanceChar(parser)] & CharFlags.JSXToken) === 0) {}

parser.tokenValue = parser.source.slice(parser.tokenPos, parser.index);

Expand All @@ -90,9 +90,9 @@ export function scanJSXToken(parser: ParserState): Token {
export function scanJSXIdentifier(parser: ParserState): Token {
if ((parser.token & Token.IsIdentifier) === Token.IsIdentifier) {
const { index } = parser;
let char = parser.nextCP;
let char = parser.currentChar;
while ((CharTypes[char] & (CharFlags.Hyphen | CharFlags.IdentifierPart)) !== 0) {
char = nextCP(parser);
char = advanceChar(parser);
}
parser.tokenValue += parser.source.slice(index, parser.index);
}
Expand Down

1 comment on commit c637ee5

@KFlash
Copy link
Contributor Author

@KFlash KFlash commented on c637ee5 Jul 27, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes speeds up minified code parsing, and cases where a punctuator follows another punctuator with no white space between. E. g. ({a})

Please sign in to comment.