You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
902 lines
23 KiB
902 lines
23 KiB
import { syntaxError } from '../error/syntaxError.mjs'; |
|
import { Token } from './ast.mjs'; |
|
import { dedentBlockStringLines } from './blockString.mjs'; |
|
import { isDigit, isNameContinue, isNameStart } from './characterClasses.mjs'; |
|
import { TokenKind } from './tokenKind.mjs'; |
|
/** |
|
* Given a Source object, creates a Lexer for that source. |
|
* A Lexer is a stateful stream generator in that every time |
|
* it is advanced, it returns the next token in the Source. Assuming the |
|
* source lexes, the final Token emitted by the lexer will be of kind |
|
* EOF, after which the lexer will repeatedly return the same EOF token |
|
* whenever called. |
|
*/ |
|
|
|
export class Lexer { |
|
/** |
|
* The previously focused non-ignored token. |
|
*/ |
|
|
|
/** |
|
* The currently focused non-ignored token. |
|
*/ |
|
|
|
/** |
|
* The (1-indexed) line containing the current token. |
|
*/ |
|
|
|
/** |
|
* The character offset at which the current line begins. |
|
*/ |
|
constructor(source) { |
|
const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); |
|
this.source = source; |
|
this.lastToken = startOfFileToken; |
|
this.token = startOfFileToken; |
|
this.line = 1; |
|
this.lineStart = 0; |
|
} |
|
|
|
get [Symbol.toStringTag]() { |
|
return 'Lexer'; |
|
} |
|
/** |
|
* Advances the token stream to the next non-ignored token. |
|
*/ |
|
|
|
advance() { |
|
this.lastToken = this.token; |
|
const token = (this.token = this.lookahead()); |
|
return token; |
|
} |
|
/** |
|
* Looks ahead and returns the next non-ignored token, but does not change |
|
* the state of Lexer. |
|
*/ |
|
|
|
lookahead() { |
|
let token = this.token; |
|
|
|
if (token.kind !== TokenKind.EOF) { |
|
do { |
|
if (token.next) { |
|
token = token.next; |
|
} else { |
|
// Read the next token and form a link in the token linked-list. |
|
const nextToken = readNextToken(this, token.end); // @ts-expect-error next is only mutable during parsing. |
|
|
|
token.next = nextToken; // @ts-expect-error prev is only mutable during parsing. |
|
|
|
nextToken.prev = token; |
|
token = nextToken; |
|
} |
|
} while (token.kind === TokenKind.COMMENT); |
|
} |
|
|
|
return token; |
|
} |
|
} |
|
/** |
|
* @internal |
|
*/ |
|
|
|
export function isPunctuatorTokenKind(kind) { |
|
return ( |
|
kind === TokenKind.BANG || |
|
kind === TokenKind.DOLLAR || |
|
kind === TokenKind.AMP || |
|
kind === TokenKind.PAREN_L || |
|
kind === TokenKind.PAREN_R || |
|
kind === TokenKind.SPREAD || |
|
kind === TokenKind.COLON || |
|
kind === TokenKind.EQUALS || |
|
kind === TokenKind.AT || |
|
kind === TokenKind.BRACKET_L || |
|
kind === TokenKind.BRACKET_R || |
|
kind === TokenKind.BRACE_L || |
|
kind === TokenKind.PIPE || |
|
kind === TokenKind.BRACE_R |
|
); |
|
} |
|
/** |
|
* A Unicode scalar value is any Unicode code point except surrogate code |
|
* points. In other words, the inclusive ranges of values 0x0000 to 0xD7FF and |
|
* 0xE000 to 0x10FFFF. |
|
* |
|
* SourceCharacter :: |
|
* - "Any Unicode scalar value" |
|
*/ |
|
|
|
function isUnicodeScalarValue(code) { |
|
return ( |
|
(code >= 0x0000 && code <= 0xd7ff) || (code >= 0xe000 && code <= 0x10ffff) |
|
); |
|
} |
|
/** |
|
* The GraphQL specification defines source text as a sequence of unicode scalar |
|
* values (which Unicode defines to exclude surrogate code points). However |
|
* JavaScript defines strings as a sequence of UTF-16 code units which may |
|
* include surrogates. A surrogate pair is a valid source character as it |
|
* encodes a supplementary code point (above U+FFFF), but unpaired surrogate |
|
* code points are not valid source characters. |
|
*/ |
|
|
|
function isSupplementaryCodePoint(body, location) { |
|
return ( |
|
isLeadingSurrogate(body.charCodeAt(location)) && |
|
isTrailingSurrogate(body.charCodeAt(location + 1)) |
|
); |
|
} |
|
|
|
function isLeadingSurrogate(code) { |
|
return code >= 0xd800 && code <= 0xdbff; |
|
} |
|
|
|
function isTrailingSurrogate(code) { |
|
return code >= 0xdc00 && code <= 0xdfff; |
|
} |
|
/** |
|
* Prints the code point (or end of file reference) at a given location in a |
|
* source for use in error messages. |
|
* |
|
* Printable ASCII is printed quoted, while other points are printed in Unicode |
|
* code point form (ie. U+1234). |
|
*/ |
|
|
|
function printCodePointAt(lexer, location) { |
|
const code = lexer.source.body.codePointAt(location); |
|
|
|
if (code === undefined) { |
|
return TokenKind.EOF; |
|
} else if (code >= 0x0020 && code <= 0x007e) { |
|
// Printable ASCII |
|
const char = String.fromCodePoint(code); |
|
return char === '"' ? "'\"'" : `"${char}"`; |
|
} // Unicode code point |
|
|
|
return 'U+' + code.toString(16).toUpperCase().padStart(4, '0'); |
|
} |
|
/** |
|
* Create a token with line and column location information. |
|
*/ |
|
|
|
function createToken(lexer, kind, start, end, value) { |
|
const line = lexer.line; |
|
const col = 1 + start - lexer.lineStart; |
|
return new Token(kind, start, end, line, col, value); |
|
} |
|
/** |
|
* Gets the next token from the source starting at the given position. |
|
* |
|
* This skips over whitespace until it finds the next lexable token, then lexes |
|
* punctuators immediately or calls the appropriate helper function for more |
|
* complicated tokens. |
|
*/ |
|
|
|
function readNextToken(lexer, start) { |
|
const body = lexer.source.body; |
|
const bodyLength = body.length; |
|
let position = start; |
|
|
|
while (position < bodyLength) { |
|
const code = body.charCodeAt(position); // SourceCharacter |
|
|
|
switch (code) { |
|
// Ignored :: |
|
// - UnicodeBOM |
|
// - WhiteSpace |
|
// - LineTerminator |
|
// - Comment |
|
// - Comma |
|
// |
|
// UnicodeBOM :: "Byte Order Mark (U+FEFF)" |
|
// |
|
// WhiteSpace :: |
|
// - "Horizontal Tab (U+0009)" |
|
// - "Space (U+0020)" |
|
// |
|
// Comma :: , |
|
case 0xfeff: // <BOM> |
|
|
|
case 0x0009: // \t |
|
|
|
case 0x0020: // <space> |
|
|
|
case 0x002c: |
|
// , |
|
++position; |
|
continue; |
|
// LineTerminator :: |
|
// - "New Line (U+000A)" |
|
// - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"] |
|
// - "Carriage Return (U+000D)" "New Line (U+000A)" |
|
|
|
case 0x000a: |
|
// \n |
|
++position; |
|
++lexer.line; |
|
lexer.lineStart = position; |
|
continue; |
|
|
|
case 0x000d: |
|
// \r |
|
if (body.charCodeAt(position + 1) === 0x000a) { |
|
position += 2; |
|
} else { |
|
++position; |
|
} |
|
|
|
++lexer.line; |
|
lexer.lineStart = position; |
|
continue; |
|
// Comment |
|
|
|
case 0x0023: |
|
// # |
|
return readComment(lexer, position); |
|
// Token :: |
|
// - Punctuator |
|
// - Name |
|
// - IntValue |
|
// - FloatValue |
|
// - StringValue |
|
// |
|
// Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | } |
|
|
|
case 0x0021: |
|
// ! |
|
return createToken(lexer, TokenKind.BANG, position, position + 1); |
|
|
|
case 0x0024: |
|
// $ |
|
return createToken(lexer, TokenKind.DOLLAR, position, position + 1); |
|
|
|
case 0x0026: |
|
// & |
|
return createToken(lexer, TokenKind.AMP, position, position + 1); |
|
|
|
case 0x0028: |
|
// ( |
|
return createToken(lexer, TokenKind.PAREN_L, position, position + 1); |
|
|
|
case 0x0029: |
|
// ) |
|
return createToken(lexer, TokenKind.PAREN_R, position, position + 1); |
|
|
|
case 0x002e: |
|
// . |
|
if ( |
|
body.charCodeAt(position + 1) === 0x002e && |
|
body.charCodeAt(position + 2) === 0x002e |
|
) { |
|
return createToken(lexer, TokenKind.SPREAD, position, position + 3); |
|
} |
|
|
|
break; |
|
|
|
case 0x003a: |
|
// : |
|
return createToken(lexer, TokenKind.COLON, position, position + 1); |
|
|
|
case 0x003d: |
|
// = |
|
return createToken(lexer, TokenKind.EQUALS, position, position + 1); |
|
|
|
case 0x0040: |
|
// @ |
|
return createToken(lexer, TokenKind.AT, position, position + 1); |
|
|
|
case 0x005b: |
|
// [ |
|
return createToken(lexer, TokenKind.BRACKET_L, position, position + 1); |
|
|
|
case 0x005d: |
|
// ] |
|
return createToken(lexer, TokenKind.BRACKET_R, position, position + 1); |
|
|
|
case 0x007b: |
|
// { |
|
return createToken(lexer, TokenKind.BRACE_L, position, position + 1); |
|
|
|
case 0x007c: |
|
// | |
|
return createToken(lexer, TokenKind.PIPE, position, position + 1); |
|
|
|
case 0x007d: |
|
// } |
|
return createToken(lexer, TokenKind.BRACE_R, position, position + 1); |
|
// StringValue |
|
|
|
case 0x0022: |
|
// " |
|
if ( |
|
body.charCodeAt(position + 1) === 0x0022 && |
|
body.charCodeAt(position + 2) === 0x0022 |
|
) { |
|
return readBlockString(lexer, position); |
|
} |
|
|
|
return readString(lexer, position); |
|
} // IntValue | FloatValue (Digit | -) |
|
|
|
if (isDigit(code) || code === 0x002d) { |
|
return readNumber(lexer, position, code); |
|
} // Name |
|
|
|
if (isNameStart(code)) { |
|
return readName(lexer, position); |
|
} |
|
|
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
code === 0x0027 |
|
? 'Unexpected single quote character (\'), did you mean to use a double quote (")?' |
|
: isUnicodeScalarValue(code) || isSupplementaryCodePoint(body, position) |
|
? `Unexpected character: ${printCodePointAt(lexer, position)}.` |
|
: `Invalid character: ${printCodePointAt(lexer, position)}.`, |
|
); |
|
} |
|
|
|
return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); |
|
} |
|
/** |
|
* Reads a comment token from the source file. |
|
* |
|
* ``` |
|
* Comment :: # CommentChar* [lookahead != CommentChar] |
|
* |
|
* CommentChar :: SourceCharacter but not LineTerminator |
|
* ``` |
|
*/ |
|
|
|
function readComment(lexer, start) { |
|
const body = lexer.source.body; |
|
const bodyLength = body.length; |
|
let position = start + 1; |
|
|
|
while (position < bodyLength) { |
|
const code = body.charCodeAt(position); // LineTerminator (\n | \r) |
|
|
|
if (code === 0x000a || code === 0x000d) { |
|
break; |
|
} // SourceCharacter |
|
|
|
if (isUnicodeScalarValue(code)) { |
|
++position; |
|
} else if (isSupplementaryCodePoint(body, position)) { |
|
position += 2; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return createToken( |
|
lexer, |
|
TokenKind.COMMENT, |
|
start, |
|
position, |
|
body.slice(start + 1, position), |
|
); |
|
} |
|
/** |
|
* Reads a number token from the source file, either a FloatValue or an IntValue |
|
* depending on whether a FractionalPart or ExponentPart is encountered. |
|
* |
|
* ``` |
|
* IntValue :: IntegerPart [lookahead != {Digit, `.`, NameStart}] |
|
* |
|
* IntegerPart :: |
|
* - NegativeSign? 0 |
|
* - NegativeSign? NonZeroDigit Digit* |
|
* |
|
* NegativeSign :: - |
|
* |
|
* NonZeroDigit :: Digit but not `0` |
|
* |
|
* FloatValue :: |
|
* - IntegerPart FractionalPart ExponentPart [lookahead != {Digit, `.`, NameStart}] |
|
* - IntegerPart FractionalPart [lookahead != {Digit, `.`, NameStart}] |
|
* - IntegerPart ExponentPart [lookahead != {Digit, `.`, NameStart}] |
|
* |
|
* FractionalPart :: . Digit+ |
|
* |
|
* ExponentPart :: ExponentIndicator Sign? Digit+ |
|
* |
|
* ExponentIndicator :: one of `e` `E` |
|
* |
|
* Sign :: one of + - |
|
* ``` |
|
*/ |
|
|
|
function readNumber(lexer, start, firstCode) { |
|
const body = lexer.source.body; |
|
let position = start; |
|
let code = firstCode; |
|
let isFloat = false; // NegativeSign (-) |
|
|
|
if (code === 0x002d) { |
|
code = body.charCodeAt(++position); |
|
} // Zero (0) |
|
|
|
if (code === 0x0030) { |
|
code = body.charCodeAt(++position); |
|
|
|
if (isDigit(code)) { |
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid number, unexpected digit after 0: ${printCodePointAt( |
|
lexer, |
|
position, |
|
)}.`, |
|
); |
|
} |
|
} else { |
|
position = readDigits(lexer, position, code); |
|
code = body.charCodeAt(position); |
|
} // Full stop (.) |
|
|
|
if (code === 0x002e) { |
|
isFloat = true; |
|
code = body.charCodeAt(++position); |
|
position = readDigits(lexer, position, code); |
|
code = body.charCodeAt(position); |
|
} // E e |
|
|
|
if (code === 0x0045 || code === 0x0065) { |
|
isFloat = true; |
|
code = body.charCodeAt(++position); // + - |
|
|
|
if (code === 0x002b || code === 0x002d) { |
|
code = body.charCodeAt(++position); |
|
} |
|
|
|
position = readDigits(lexer, position, code); |
|
code = body.charCodeAt(position); |
|
} // Numbers cannot be followed by . or NameStart |
|
|
|
if (code === 0x002e || isNameStart(code)) { |
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid number, expected digit but got: ${printCodePointAt( |
|
lexer, |
|
position, |
|
)}.`, |
|
); |
|
} |
|
|
|
return createToken( |
|
lexer, |
|
isFloat ? TokenKind.FLOAT : TokenKind.INT, |
|
start, |
|
position, |
|
body.slice(start, position), |
|
); |
|
} |
|
/** |
|
* Returns the new position in the source after reading one or more digits. |
|
*/ |
|
|
|
function readDigits(lexer, start, firstCode) { |
|
if (!isDigit(firstCode)) { |
|
throw syntaxError( |
|
lexer.source, |
|
start, |
|
`Invalid number, expected digit but got: ${printCodePointAt( |
|
lexer, |
|
start, |
|
)}.`, |
|
); |
|
} |
|
|
|
const body = lexer.source.body; |
|
let position = start + 1; // +1 to skip first firstCode |
|
|
|
while (isDigit(body.charCodeAt(position))) { |
|
++position; |
|
} |
|
|
|
return position; |
|
} |
|
/** |
|
* Reads a single-quote string token from the source file. |
|
* |
|
* ``` |
|
* StringValue :: |
|
* - `""` [lookahead != `"`] |
|
* - `"` StringCharacter+ `"` |
|
* |
|
* StringCharacter :: |
|
* - SourceCharacter but not `"` or `\` or LineTerminator |
|
* - `\u` EscapedUnicode |
|
* - `\` EscapedCharacter |
|
* |
|
* EscapedUnicode :: |
|
* - `{` HexDigit+ `}` |
|
* - HexDigit HexDigit HexDigit HexDigit |
|
* |
|
* EscapedCharacter :: one of `"` `\` `/` `b` `f` `n` `r` `t` |
|
* ``` |
|
*/ |
|
|
|
function readString(lexer, start) { |
|
const body = lexer.source.body; |
|
const bodyLength = body.length; |
|
let position = start + 1; |
|
let chunkStart = position; |
|
let value = ''; |
|
|
|
while (position < bodyLength) { |
|
const code = body.charCodeAt(position); // Closing Quote (") |
|
|
|
if (code === 0x0022) { |
|
value += body.slice(chunkStart, position); |
|
return createToken(lexer, TokenKind.STRING, start, position + 1, value); |
|
} // Escape Sequence (\) |
|
|
|
if (code === 0x005c) { |
|
value += body.slice(chunkStart, position); |
|
const escape = |
|
body.charCodeAt(position + 1) === 0x0075 // u |
|
? body.charCodeAt(position + 2) === 0x007b // { |
|
? readEscapedUnicodeVariableWidth(lexer, position) |
|
: readEscapedUnicodeFixedWidth(lexer, position) |
|
: readEscapedCharacter(lexer, position); |
|
value += escape.value; |
|
position += escape.size; |
|
chunkStart = position; |
|
continue; |
|
} // LineTerminator (\n | \r) |
|
|
|
if (code === 0x000a || code === 0x000d) { |
|
break; |
|
} // SourceCharacter |
|
|
|
if (isUnicodeScalarValue(code)) { |
|
++position; |
|
} else if (isSupplementaryCodePoint(body, position)) { |
|
position += 2; |
|
} else { |
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid character within String: ${printCodePointAt( |
|
lexer, |
|
position, |
|
)}.`, |
|
); |
|
} |
|
} |
|
|
|
throw syntaxError(lexer.source, position, 'Unterminated string.'); |
|
} // The string value and lexed size of an escape sequence. |
|
|
|
function readEscapedUnicodeVariableWidth(lexer, position) { |
|
const body = lexer.source.body; |
|
let point = 0; |
|
let size = 3; // Cannot be larger than 12 chars (\u{00000000}). |
|
|
|
while (size < 12) { |
|
const code = body.charCodeAt(position + size++); // Closing Brace (}) |
|
|
|
if (code === 0x007d) { |
|
// Must be at least 5 chars (\u{0}) and encode a Unicode scalar value. |
|
if (size < 5 || !isUnicodeScalarValue(point)) { |
|
break; |
|
} |
|
|
|
return { |
|
value: String.fromCodePoint(point), |
|
size, |
|
}; |
|
} // Append this hex digit to the code point. |
|
|
|
point = (point << 4) | readHexDigit(code); |
|
|
|
if (point < 0) { |
|
break; |
|
} |
|
} |
|
|
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid Unicode escape sequence: "${body.slice( |
|
position, |
|
position + size, |
|
)}".`, |
|
); |
|
} |
|
|
|
function readEscapedUnicodeFixedWidth(lexer, position) { |
|
const body = lexer.source.body; |
|
const code = read16BitHexCode(body, position + 2); |
|
|
|
if (isUnicodeScalarValue(code)) { |
|
return { |
|
value: String.fromCodePoint(code), |
|
size: 6, |
|
}; |
|
} // GraphQL allows JSON-style surrogate pair escape sequences, but only when |
|
// a valid pair is formed. |
|
|
|
if (isLeadingSurrogate(code)) { |
|
// \u |
|
if ( |
|
body.charCodeAt(position + 6) === 0x005c && |
|
body.charCodeAt(position + 7) === 0x0075 |
|
) { |
|
const trailingCode = read16BitHexCode(body, position + 8); |
|
|
|
if (isTrailingSurrogate(trailingCode)) { |
|
// JavaScript defines strings as a sequence of UTF-16 code units and |
|
// encodes Unicode code points above U+FFFF using a surrogate pair of |
|
// code units. Since this is a surrogate pair escape sequence, just |
|
// include both codes into the JavaScript string value. Had JavaScript |
|
// not been internally based on UTF-16, then this surrogate pair would |
|
// be decoded to retrieve the supplementary code point. |
|
return { |
|
value: String.fromCodePoint(code, trailingCode), |
|
size: 12, |
|
}; |
|
} |
|
} |
|
} |
|
|
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid Unicode escape sequence: "${body.slice(position, position + 6)}".`, |
|
); |
|
} |
|
/** |
|
* Reads four hexadecimal characters and returns the positive integer that 16bit |
|
* hexadecimal string represents. For example, "000f" will return 15, and "dead" |
|
* will return 57005. |
|
* |
|
* Returns a negative number if any char was not a valid hexadecimal digit. |
|
*/ |
|
|
|
function read16BitHexCode(body, position) { |
|
// readHexDigit() returns -1 on error. ORing a negative value with any other |
|
// value always produces a negative value. |
|
return ( |
|
(readHexDigit(body.charCodeAt(position)) << 12) | |
|
(readHexDigit(body.charCodeAt(position + 1)) << 8) | |
|
(readHexDigit(body.charCodeAt(position + 2)) << 4) | |
|
readHexDigit(body.charCodeAt(position + 3)) |
|
); |
|
} |
|
/** |
|
* Reads a hexadecimal character and returns its positive integer value (0-15). |
|
* |
|
* '0' becomes 0, '9' becomes 9 |
|
* 'A' becomes 10, 'F' becomes 15 |
|
* 'a' becomes 10, 'f' becomes 15 |
|
* |
|
* Returns -1 if the provided character code was not a valid hexadecimal digit. |
|
* |
|
* HexDigit :: one of |
|
* - `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` |
|
* - `A` `B` `C` `D` `E` `F` |
|
* - `a` `b` `c` `d` `e` `f` |
|
*/ |
|
|
|
function readHexDigit(code) { |
|
return code >= 0x0030 && code <= 0x0039 // 0-9 |
|
? code - 0x0030 |
|
: code >= 0x0041 && code <= 0x0046 // A-F |
|
? code - 0x0037 |
|
: code >= 0x0061 && code <= 0x0066 // a-f |
|
? code - 0x0057 |
|
: -1; |
|
} |
|
/** |
|
* | Escaped Character | Code Point | Character Name | |
|
* | ----------------- | ---------- | ---------------------------- | |
|
* | `"` | U+0022 | double quote | |
|
* | `\` | U+005C | reverse solidus (back slash) | |
|
* | `/` | U+002F | solidus (forward slash) | |
|
* | `b` | U+0008 | backspace | |
|
* | `f` | U+000C | form feed | |
|
* | `n` | U+000A | line feed (new line) | |
|
* | `r` | U+000D | carriage return | |
|
* | `t` | U+0009 | horizontal tab | |
|
*/ |
|
|
|
function readEscapedCharacter(lexer, position) { |
|
const body = lexer.source.body; |
|
const code = body.charCodeAt(position + 1); |
|
|
|
switch (code) { |
|
case 0x0022: |
|
// " |
|
return { |
|
value: '\u0022', |
|
size: 2, |
|
}; |
|
|
|
case 0x005c: |
|
// \ |
|
return { |
|
value: '\u005c', |
|
size: 2, |
|
}; |
|
|
|
case 0x002f: |
|
// / |
|
return { |
|
value: '\u002f', |
|
size: 2, |
|
}; |
|
|
|
case 0x0062: |
|
// b |
|
return { |
|
value: '\u0008', |
|
size: 2, |
|
}; |
|
|
|
case 0x0066: |
|
// f |
|
return { |
|
value: '\u000c', |
|
size: 2, |
|
}; |
|
|
|
case 0x006e: |
|
// n |
|
return { |
|
value: '\u000a', |
|
size: 2, |
|
}; |
|
|
|
case 0x0072: |
|
// r |
|
return { |
|
value: '\u000d', |
|
size: 2, |
|
}; |
|
|
|
case 0x0074: |
|
// t |
|
return { |
|
value: '\u0009', |
|
size: 2, |
|
}; |
|
} |
|
|
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid character escape sequence: "${body.slice( |
|
position, |
|
position + 2, |
|
)}".`, |
|
); |
|
} |
|
/** |
|
* Reads a block string token from the source file. |
|
* |
|
* ``` |
|
* StringValue :: |
|
* - `"""` BlockStringCharacter* `"""` |
|
* |
|
* BlockStringCharacter :: |
|
* - SourceCharacter but not `"""` or `\"""` |
|
* - `\"""` |
|
* ``` |
|
*/ |
|
|
|
function readBlockString(lexer, start) { |
|
const body = lexer.source.body; |
|
const bodyLength = body.length; |
|
let lineStart = lexer.lineStart; |
|
let position = start + 3; |
|
let chunkStart = position; |
|
let currentLine = ''; |
|
const blockLines = []; |
|
|
|
while (position < bodyLength) { |
|
const code = body.charCodeAt(position); // Closing Triple-Quote (""") |
|
|
|
if ( |
|
code === 0x0022 && |
|
body.charCodeAt(position + 1) === 0x0022 && |
|
body.charCodeAt(position + 2) === 0x0022 |
|
) { |
|
currentLine += body.slice(chunkStart, position); |
|
blockLines.push(currentLine); |
|
const token = createToken( |
|
lexer, |
|
TokenKind.BLOCK_STRING, |
|
start, |
|
position + 3, // Return a string of the lines joined with U+000A. |
|
dedentBlockStringLines(blockLines).join('\n'), |
|
); |
|
lexer.line += blockLines.length - 1; |
|
lexer.lineStart = lineStart; |
|
return token; |
|
} // Escaped Triple-Quote (\""") |
|
|
|
if ( |
|
code === 0x005c && |
|
body.charCodeAt(position + 1) === 0x0022 && |
|
body.charCodeAt(position + 2) === 0x0022 && |
|
body.charCodeAt(position + 3) === 0x0022 |
|
) { |
|
currentLine += body.slice(chunkStart, position); |
|
chunkStart = position + 1; // skip only slash |
|
|
|
position += 4; |
|
continue; |
|
} // LineTerminator |
|
|
|
if (code === 0x000a || code === 0x000d) { |
|
currentLine += body.slice(chunkStart, position); |
|
blockLines.push(currentLine); |
|
|
|
if (code === 0x000d && body.charCodeAt(position + 1) === 0x000a) { |
|
position += 2; |
|
} else { |
|
++position; |
|
} |
|
|
|
currentLine = ''; |
|
chunkStart = position; |
|
lineStart = position; |
|
continue; |
|
} // SourceCharacter |
|
|
|
if (isUnicodeScalarValue(code)) { |
|
++position; |
|
} else if (isSupplementaryCodePoint(body, position)) { |
|
position += 2; |
|
} else { |
|
throw syntaxError( |
|
lexer.source, |
|
position, |
|
`Invalid character within String: ${printCodePointAt( |
|
lexer, |
|
position, |
|
)}.`, |
|
); |
|
} |
|
} |
|
|
|
throw syntaxError(lexer.source, position, 'Unterminated string.'); |
|
} |
|
/** |
|
* Reads an alphanumeric + underscore name from the source. |
|
* |
|
* ``` |
|
* Name :: |
|
* - NameStart NameContinue* [lookahead != NameContinue] |
|
* ``` |
|
*/ |
|
|
|
function readName(lexer, start) { |
|
const body = lexer.source.body; |
|
const bodyLength = body.length; |
|
let position = start + 1; |
|
|
|
while (position < bodyLength) { |
|
const code = body.charCodeAt(position); |
|
|
|
if (isNameContinue(code)) { |
|
++position; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return createToken( |
|
lexer, |
|
TokenKind.NAME, |
|
start, |
|
position, |
|
body.slice(start, position), |
|
); |
|
}
|
|
|