Erster Docker-Stand

This commit is contained in:
Ali
2026-02-20 16:06:40 +09:00
commit f31e2e8ed3
8818 changed files with 1605323 additions and 0 deletions

87
_node_modules/chevrotain/src/api.ts generated Normal file
View File

@@ -0,0 +1,87 @@
/* istanbul ignore file - tricky to import some things from this module during testing */
// semantic version
export { VERSION } from "./version"
export {
CstParser,
EmbeddedActionsParser,
ParserDefinitionErrorType,
EMPTY_ALT
} from "./parse/parser/parser"
export { Lexer, LexerDefinitionErrorType } from "./scan/lexer_public"
// Tokens utilities
export {
createToken,
createTokenInstance,
EOF,
tokenLabel,
tokenMatcher,
tokenName
} from "./scan/tokens_public"
// Lookahead
export { getLookaheadPaths } from "./parse/grammar/lookahead"
export { LLkLookaheadStrategy } from "./parse/grammar/llk_lookahead"
// Other Utilities
export { defaultParserErrorProvider } from "./parse/errors_public"
export {
EarlyExitException,
isRecognitionException,
MismatchedTokenException,
NotAllInputParsedException,
NoViableAltException
} from "./parse/exceptions_public"
export { defaultLexerErrorProvider } from "./scan/lexer_errors_public"
// grammar reflection API
export {
Alternation,
Alternative,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule,
Terminal
} from "@chevrotain/gast"
// GAST Utilities
export {
serializeGrammar,
serializeProduction,
GAstVisitor
} from "@chevrotain/gast"
export { generateCstDts } from "@chevrotain/cst-dts-gen"
/* istanbul ignore next */
export function clearCache() {
console.warn(
"The clearCache function was 'soft' removed from the Chevrotain API." +
"\n\t It performs no action other than printing this message." +
"\n\t Please avoid using it as it will be completely removed in the future"
)
}
export { createSyntaxDiagramsCode } from "./diagrams/render_public"
export class Parser {
constructor() {
throw new Error(
"The Parser class has been deprecated, use CstParser or EmbeddedActionsParser instead.\t\n" +
"See: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_7-0-0"
)
}
}

View File

@@ -0,0 +1,53 @@
import { VERSION } from "../version"
import { ISerializedGast } from "@chevrotain/types"
export function createSyntaxDiagramsCode(
grammar: ISerializedGast[],
{
resourceBase = `https://unpkg.com/chevrotain@${VERSION}/diagrams/`,
css = `https://unpkg.com/chevrotain@${VERSION}/diagrams/diagrams.css`
}: {
resourceBase?: string
css?: string
} = {}
) {
const header = `
<!-- This is a generated file -->
<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
background-color: hsl(30, 20%, 95%)
}
</style>
`
const cssHtml = `
<link rel='stylesheet' href='${css}'>
`
const scripts = `
<script src='${resourceBase}vendor/railroad-diagrams.js'></script>
<script src='${resourceBase}src/diagrams_builder.js'></script>
<script src='${resourceBase}src/diagrams_behavior.js'></script>
<script src='${resourceBase}src/main.js'></script>
`
const diagramsDiv = `
<div id="diagrams" align="center"></div>
`
const serializedGrammar = `
<script>
window.serializedGrammar = ${JSON.stringify(grammar, null, " ")};
</script>
`
const initLogic = `
<script>
var diagramsDiv = document.getElementById("diagrams");
main.drawDiagramsFromSerializedGrammar(serializedGrammar, diagramsDiv);
</script>
`
return (
header + cssHtml + scripts + diagramsDiv + serializedGrammar + initLogic
)
}

View File

@@ -0,0 +1,10 @@
const NAME = "name"
export function defineNameProp(obj: {}, nameValue: string): void {
Object.defineProperty(obj, NAME, {
enumerable: false,
configurable: true,
writable: false,
value: nameValue
})
}

View File

@@ -0,0 +1,2 @@
// TODO: can this be removed? where is it used?
export const IN = "_~IN~_"

View File

@@ -0,0 +1,87 @@
import { CstNode, CstNodeLocation, IToken } from "@chevrotain/types"
/**
* This nodeLocation tracking is not efficient and should only be used
* when error recovery is enabled or the Token Vector contains virtual Tokens
* (e.g, Python Indent/Outdent)
* As it executes the calculation for every single terminal/nonTerminal
* and does not rely on the fact the token vector is **sorted**
*/
export function setNodeLocationOnlyOffset(
currNodeLocation: CstNodeLocation,
newLocationInfo: Required<Pick<IToken, "startOffset" | "endOffset">>
): void {
// First (valid) update for this cst node
if (isNaN(currNodeLocation.startOffset) === true) {
// assumption1: Token location information is either NaN or a valid number
// assumption2: Token location information is fully valid if it exist
// (both start/end offsets exist and are numbers).
currNodeLocation.startOffset = newLocationInfo.startOffset
currNodeLocation.endOffset = newLocationInfo.endOffset
}
// Once the startOffset has been updated with a valid number it should never receive
// any farther updates as the Token vector is sorted.
// We still have to check this this condition for every new possible location info
// because with error recovery enabled we may encounter invalid tokens (NaN location props)
else if (currNodeLocation.endOffset! < newLocationInfo.endOffset === true) {
currNodeLocation.endOffset = newLocationInfo.endOffset
}
}
/**
* This nodeLocation tracking is not efficient and should only be used
* when error recovery is enabled or the Token Vector contains virtual Tokens
* (e.g, Python Indent/Outdent)
* As it executes the calculation for every single terminal/nonTerminal
* and does not rely on the fact the token vector is **sorted**
*/
export function setNodeLocationFull(
currNodeLocation: CstNodeLocation,
newLocationInfo: CstNodeLocation
): void {
// First (valid) update for this cst node
if (isNaN(currNodeLocation.startOffset) === true) {
// assumption1: Token location information is either NaN or a valid number
// assumption2: Token location information is fully valid if it exist
// (all start/end props exist and are numbers).
currNodeLocation.startOffset = newLocationInfo.startOffset
currNodeLocation.startColumn = newLocationInfo.startColumn
currNodeLocation.startLine = newLocationInfo.startLine
currNodeLocation.endOffset = newLocationInfo.endOffset
currNodeLocation.endColumn = newLocationInfo.endColumn
currNodeLocation.endLine = newLocationInfo.endLine
}
// Once the start props has been updated with a valid number it should never receive
// any farther updates as the Token vector is sorted.
// We still have to check this this condition for every new possible location info
// because with error recovery enabled we may encounter invalid tokens (NaN location props)
else if (currNodeLocation.endOffset! < newLocationInfo.endOffset! === true) {
currNodeLocation.endOffset = newLocationInfo.endOffset
currNodeLocation.endColumn = newLocationInfo.endColumn
currNodeLocation.endLine = newLocationInfo.endLine
}
}
export function addTerminalToCst(
node: CstNode,
token: IToken,
tokenTypeName: string
): void {
if (node.children[tokenTypeName] === undefined) {
node.children[tokenTypeName] = [token]
} else {
node.children[tokenTypeName].push(token)
}
}
export function addNoneTerminalToCst(
node: CstNode,
ruleName: string,
ruleResult: any
): void {
if (node.children[ruleName] === undefined) {
node.children[ruleName] = [ruleResult]
} else {
node.children[ruleName].push(ruleResult)
}
}

View File

@@ -0,0 +1,151 @@
import isEmpty from "lodash/isEmpty"
import compact from "lodash/compact"
import isArray from "lodash/isArray"
import map from "lodash/map"
import forEach from "lodash/forEach"
import filter from "lodash/filter"
import keys from "lodash/keys"
import isFunction from "lodash/isFunction"
import isUndefined from "lodash/isUndefined"
import { defineNameProp } from "../../lang/lang_extensions"
import { CstNode, ICstVisitor } from "@chevrotain/types"
export function defaultVisit<IN>(ctx: any, param: IN): void {
const childrenNames = keys(ctx)
const childrenNamesLength = childrenNames.length
for (let i = 0; i < childrenNamesLength; i++) {
const currChildName = childrenNames[i]
const currChildArray = ctx[currChildName]
const currChildArrayLength = currChildArray.length
for (let j = 0; j < currChildArrayLength; j++) {
const currChild: any = currChildArray[j]
// distinction between Tokens Children and CstNode children
if (currChild.tokenTypeIdx === undefined) {
this[currChild.name](currChild.children, param)
}
}
}
// defaultVisit does not support generic out param
}
export function createBaseSemanticVisitorConstructor(
grammarName: string,
ruleNames: string[]
): {
new (...args: any[]): ICstVisitor<any, any>
} {
const derivedConstructor: any = function () {}
// can be overwritten according to:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/
// name?redirectlocale=en-US&redirectslug=JavaScript%2FReference%2FGlobal_Objects%2FFunction%2Fname
defineNameProp(derivedConstructor, grammarName + "BaseSemantics")
const semanticProto = {
visit: function (cstNode: CstNode | CstNode[], param: any) {
// enables writing more concise visitor methods when CstNode has only a single child
if (isArray(cstNode)) {
// A CST Node's children dictionary can never have empty arrays as values
// If a key is defined there will be at least one element in the corresponding value array.
cstNode = cstNode[0]
}
// enables passing optional CstNodes concisely.
if (isUndefined(cstNode)) {
return undefined
}
return this[cstNode.name](cstNode.children, param)
},
validateVisitor: function () {
const semanticDefinitionErrors = validateVisitor(this, ruleNames)
if (!isEmpty(semanticDefinitionErrors)) {
const errorMessages = map(
semanticDefinitionErrors,
(currDefError) => currDefError.msg
)
throw Error(
`Errors Detected in CST Visitor <${this.constructor.name}>:\n\t` +
`${errorMessages.join("\n\n").replace(/\n/g, "\n\t")}`
)
}
}
}
derivedConstructor.prototype = semanticProto
derivedConstructor.prototype.constructor = derivedConstructor
derivedConstructor._RULE_NAMES = ruleNames
return derivedConstructor
}
export function createBaseVisitorConstructorWithDefaults(
grammarName: string,
ruleNames: string[],
baseConstructor: Function
): {
new (...args: any[]): ICstVisitor<any, any>
} {
const derivedConstructor: any = function () {}
// can be overwritten according to:
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/
// name?redirectlocale=en-US&redirectslug=JavaScript%2FReference%2FGlobal_Objects%2FFunction%2Fname
defineNameProp(derivedConstructor, grammarName + "BaseSemanticsWithDefaults")
const withDefaultsProto = Object.create(baseConstructor.prototype)
forEach(ruleNames, (ruleName) => {
withDefaultsProto[ruleName] = defaultVisit
})
derivedConstructor.prototype = withDefaultsProto
derivedConstructor.prototype.constructor = derivedConstructor
return derivedConstructor
}
export enum CstVisitorDefinitionError {
REDUNDANT_METHOD,
MISSING_METHOD
}
export interface IVisitorDefinitionError {
msg: string
type: CstVisitorDefinitionError
methodName: string
}
export function validateVisitor(
visitorInstance: ICstVisitor<unknown, unknown>,
ruleNames: string[]
): IVisitorDefinitionError[] {
const missingErrors = validateMissingCstMethods(visitorInstance, ruleNames)
return missingErrors
}
export function validateMissingCstMethods(
visitorInstance: ICstVisitor<unknown, unknown>,
ruleNames: string[]
): IVisitorDefinitionError[] {
const missingRuleNames = filter(ruleNames, (currRuleName) => {
return isFunction((visitorInstance as any)[currRuleName]) === false
})
const errors: IVisitorDefinitionError[] = map(
missingRuleNames,
(currRuleName) => {
return {
msg: `Missing visitor method: <${currRuleName}> on ${<any>(
visitorInstance.constructor.name
)} CST Visitor.`,
type: CstVisitorDefinitionError.MISSING_METHOD,
methodName: currRuleName
}
}
)
return compact<IVisitorDefinitionError>(errors)
}

View File

@@ -0,0 +1,322 @@
import { hasTokenLabel, tokenLabel } from "../scan/tokens_public"
import first from "lodash/first"
import map from "lodash/map"
import reduce from "lodash/reduce"
import { Alternation, NonTerminal, Rule, Terminal } from "@chevrotain/gast"
import { getProductionDslName } from "@chevrotain/gast"
import {
IParserErrorMessageProvider,
IProductionWithOccurrence,
TokenType
} from "@chevrotain/types"
import {
IGrammarResolverErrorMessageProvider,
IGrammarValidatorErrorMessageProvider
} from "./grammar/types"
export const defaultParserErrorProvider: IParserErrorMessageProvider = {
buildMismatchTokenMessage({ expected, actual, previous, ruleName }): string {
const hasLabel = hasTokenLabel(expected)
const expectedMsg = hasLabel
? `--> ${tokenLabel(expected)} <--`
: `token of type --> ${expected.name} <--`
const msg = `Expecting ${expectedMsg} but found --> '${actual.image}' <--`
return msg
},
buildNotAllInputParsedMessage({ firstRedundant, ruleName }): string {
return "Redundant input, expecting EOF but found: " + firstRedundant.image
},
buildNoViableAltMessage({
expectedPathsPerAlt,
actual,
previous,
customUserDescription,
ruleName
}): string {
const errPrefix = "Expecting: "
// TODO: issue: No Viable Alternative Error may have incomplete details. #502
const actualText = first(actual)!.image
const errSuffix = "\nbut found: '" + actualText + "'"
if (customUserDescription) {
return errPrefix + customUserDescription + errSuffix
} else {
const allLookAheadPaths = reduce(
expectedPathsPerAlt,
(result, currAltPaths) => result.concat(currAltPaths),
[] as TokenType[][]
)
const nextValidTokenSequences = map(
allLookAheadPaths,
(currPath) =>
`[${map(currPath, (currTokenType) => tokenLabel(currTokenType)).join(
", "
)}]`
)
const nextValidSequenceItems = map(
nextValidTokenSequences,
(itemMsg, idx) => ` ${idx + 1}. ${itemMsg}`
)
const calculatedDescription = `one of these possible Token sequences:\n${nextValidSequenceItems.join(
"\n"
)}`
return errPrefix + calculatedDescription + errSuffix
}
},
buildEarlyExitMessage({
expectedIterationPaths,
actual,
customUserDescription,
ruleName
}): string {
const errPrefix = "Expecting: "
// TODO: issue: No Viable Alternative Error may have incomplete details. #502
const actualText = first(actual)!.image
const errSuffix = "\nbut found: '" + actualText + "'"
if (customUserDescription) {
return errPrefix + customUserDescription + errSuffix
} else {
const nextValidTokenSequences = map(
expectedIterationPaths,
(currPath) =>
`[${map(currPath, (currTokenType) => tokenLabel(currTokenType)).join(
","
)}]`
)
const calculatedDescription =
`expecting at least one iteration which starts with one of these possible Token sequences::\n ` +
`<${nextValidTokenSequences.join(" ,")}>`
return errPrefix + calculatedDescription + errSuffix
}
}
}
Object.freeze(defaultParserErrorProvider)
export const defaultGrammarResolverErrorProvider: IGrammarResolverErrorMessageProvider =
{
buildRuleNotFoundError(
topLevelRule: Rule,
undefinedRule: NonTerminal
): string {
const msg =
"Invalid grammar, reference to a rule which is not defined: ->" +
undefinedRule.nonTerminalName +
"<-\n" +
"inside top level rule: ->" +
topLevelRule.name +
"<-"
return msg
}
}
export const defaultGrammarValidatorErrorProvider: IGrammarValidatorErrorMessageProvider =
{
buildDuplicateFoundError(
topLevelRule: Rule,
duplicateProds: IProductionWithOccurrence[]
): string {
function getExtraProductionArgument(
prod: IProductionWithOccurrence
): string {
if (prod instanceof Terminal) {
return prod.terminalType.name
} else if (prod instanceof NonTerminal) {
return prod.nonTerminalName
} else {
return ""
}
}
const topLevelName = topLevelRule.name
const duplicateProd = first(duplicateProds)!
const index = duplicateProd.idx
const dslName = getProductionDslName(duplicateProd)
const extraArgument = getExtraProductionArgument(duplicateProd)
const hasExplicitIndex = index > 0
let msg = `->${dslName}${hasExplicitIndex ? index : ""}<- ${
extraArgument ? `with argument: ->${extraArgument}<-` : ""
}
appears more than once (${
duplicateProds.length
} times) in the top level rule: ->${topLevelName}<-.
For further details see: https://chevrotain.io/docs/FAQ.html#NUMERICAL_SUFFIXES
`
// white space trimming time! better to trim afterwards as it allows to use WELL formatted multi line template strings...
msg = msg.replace(/[ \t]+/g, " ")
msg = msg.replace(/\s\s+/g, "\n")
return msg
},
buildNamespaceConflictError(rule: Rule): string {
const errMsg =
`Namespace conflict found in grammar.\n` +
`The grammar has both a Terminal(Token) and a Non-Terminal(Rule) named: <${rule.name}>.\n` +
`To resolve this make sure each Terminal and Non-Terminal names are unique\n` +
`This is easy to accomplish by using the convention that Terminal names start with an uppercase letter\n` +
`and Non-Terminal names start with a lower case letter.`
return errMsg
},
buildAlternationPrefixAmbiguityError(options: {
topLevelRule: Rule
prefixPath: TokenType[]
ambiguityIndices: number[]
alternation: Alternation
}): string {
const pathMsg = map(options.prefixPath, (currTok) =>
tokenLabel(currTok)
).join(", ")
const occurrence =
options.alternation.idx === 0 ? "" : options.alternation.idx
const errMsg =
`Ambiguous alternatives: <${options.ambiguityIndices.join(
" ,"
)}> due to common lookahead prefix\n` +
`in <OR${occurrence}> inside <${options.topLevelRule.name}> Rule,\n` +
`<${pathMsg}> may appears as a prefix path in all these alternatives.\n` +
`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#COMMON_PREFIX\n` +
`For Further details.`
return errMsg
},
buildAlternationAmbiguityError(options: {
topLevelRule: Rule
prefixPath: TokenType[]
ambiguityIndices: number[]
alternation: Alternation
}): string {
const pathMsg = map(options.prefixPath, (currtok) =>
tokenLabel(currtok)
).join(", ")
const occurrence =
options.alternation.idx === 0 ? "" : options.alternation.idx
let currMessage =
`Ambiguous Alternatives Detected: <${options.ambiguityIndices.join(
" ,"
)}> in <OR${occurrence}>` +
` inside <${options.topLevelRule.name}> Rule,\n` +
`<${pathMsg}> may appears as a prefix path in all these alternatives.\n`
currMessage =
currMessage +
`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#AMBIGUOUS_ALTERNATIVES\n` +
`For Further details.`
return currMessage
},
buildEmptyRepetitionError(options: {
topLevelRule: Rule
repetition: IProductionWithOccurrence
}): string {
let dslName = getProductionDslName(options.repetition)
if (options.repetition.idx !== 0) {
dslName += options.repetition.idx
}
const errMsg =
`The repetition <${dslName}> within Rule <${options.topLevelRule.name}> can never consume any tokens.\n` +
`This could lead to an infinite loop.`
return errMsg
},
// TODO: remove - `errors_public` from nyc.config.js exclude
// once this method is fully removed from this file
buildTokenNameError(options: {
tokenType: TokenType
expectedPattern: RegExp
}): string {
/* istanbul ignore next */
return "deprecated"
},
buildEmptyAlternationError(options: {
topLevelRule: Rule
alternation: Alternation
emptyChoiceIdx: number
}): string {
const errMsg =
`Ambiguous empty alternative: <${options.emptyChoiceIdx + 1}>` +
` in <OR${options.alternation.idx}> inside <${options.topLevelRule.name}> Rule.\n` +
`Only the last alternative may be an empty alternative.`
return errMsg
},
buildTooManyAlternativesError(options: {
topLevelRule: Rule
alternation: Alternation
}): string {
const errMsg =
`An Alternation cannot have more than 256 alternatives:\n` +
`<OR${options.alternation.idx}> inside <${
options.topLevelRule.name
}> Rule.\n has ${
options.alternation.definition.length + 1
} alternatives.`
return errMsg
},
buildLeftRecursionError(options: {
topLevelRule: Rule
leftRecursionPath: Rule[]
}): string {
const ruleName = options.topLevelRule.name
const pathNames = map(
options.leftRecursionPath,
(currRule) => currRule.name
)
const leftRecursivePath = `${ruleName} --> ${pathNames
.concat([ruleName])
.join(" --> ")}`
const errMsg =
`Left Recursion found in grammar.\n` +
`rule: <${ruleName}> can be invoked from itself (directly or indirectly)\n` +
`without consuming any Tokens. The grammar path that causes this is: \n ${leftRecursivePath}\n` +
` To fix this refactor your grammar to remove the left recursion.\n` +
`see: https://en.wikipedia.org/wiki/LL_parser#Left_factoring.`
return errMsg
},
// TODO: remove - `errors_public` from nyc.config.js exclude
// once this method is fully removed from this file
buildInvalidRuleNameError(options: {
topLevelRule: Rule
expectedPattern: RegExp
}): string {
/* istanbul ignore next */
return "deprecated"
},
buildDuplicateRuleNameError(options: {
topLevelRule: Rule | string
grammarName: string
}): string {
let ruleName
if (options.topLevelRule instanceof Rule) {
ruleName = options.topLevelRule.name
} else {
ruleName = options.topLevelRule
}
const errMsg = `Duplicate definition, rule: ->${ruleName}<- is already defined in the grammar: ->${options.grammarName}<-`
return errMsg
}
}

View File

@@ -0,0 +1,74 @@
import includes from "lodash/includes"
import {
IToken,
IRecognitionException,
IRecognizerContext
} from "@chevrotain/types"
const MISMATCHED_TOKEN_EXCEPTION = "MismatchedTokenException"
const NO_VIABLE_ALT_EXCEPTION = "NoViableAltException"
const EARLY_EXIT_EXCEPTION = "EarlyExitException"
const NOT_ALL_INPUT_PARSED_EXCEPTION = "NotAllInputParsedException"
const RECOGNITION_EXCEPTION_NAMES = [
MISMATCHED_TOKEN_EXCEPTION,
NO_VIABLE_ALT_EXCEPTION,
EARLY_EXIT_EXCEPTION,
NOT_ALL_INPUT_PARSED_EXCEPTION
]
Object.freeze(RECOGNITION_EXCEPTION_NAMES)
// hacks to bypass no support for custom Errors in javascript/typescript
export function isRecognitionException(error: Error) {
// can't do instanceof on hacked custom js exceptions
return includes(RECOGNITION_EXCEPTION_NAMES, error.name)
}
abstract class RecognitionException
extends Error
implements IRecognitionException
{
context: IRecognizerContext
resyncedTokens: IToken[] = []
protected constructor(message: string, public token: IToken) {
super(message)
// fix prototype chain when typescript target is ES5
Object.setPrototypeOf(this, new.target.prototype)
/* istanbul ignore next - V8 workaround to remove constructor from stacktrace when typescript target is ES5 */
if (Error.captureStackTrace) {
Error.captureStackTrace(this, this.constructor)
}
}
}
export class MismatchedTokenException extends RecognitionException {
constructor(message: string, token: IToken, public previousToken: IToken) {
super(message, token)
this.name = MISMATCHED_TOKEN_EXCEPTION
}
}
export class NoViableAltException extends RecognitionException {
constructor(message: string, token: IToken, public previousToken: IToken) {
super(message, token)
this.name = NO_VIABLE_ALT_EXCEPTION
}
}
export class NotAllInputParsedException extends RecognitionException {
constructor(message: string, token: IToken) {
super(message, token)
this.name = NOT_ALL_INPUT_PARSED_EXCEPTION
}
}
export class EarlyExitException extends RecognitionException {
constructor(message: string, token: IToken, public previousToken: IToken) {
super(message, token)
this.name = EARLY_EXIT_EXCEPTION
}
}

View File

@@ -0,0 +1,708 @@
import first from "lodash/first"
import isEmpty from "lodash/isEmpty"
import drop from "lodash/drop"
import flatten from "lodash/flatten"
import filter from "lodash/filter"
import reject from "lodash/reject"
import difference from "lodash/difference"
import map from "lodash/map"
import forEach from "lodash/forEach"
import groupBy from "lodash/groupBy"
import reduce from "lodash/reduce"
import pickBy from "lodash/pickBy"
import values from "lodash/values"
import includes from "lodash/includes"
import flatMap from "lodash/flatMap"
import clone from "lodash/clone"
import {
IParserAmbiguousAlternativesDefinitionError,
IParserDuplicatesDefinitionError,
IParserEmptyAlternativeDefinitionError,
ParserDefinitionErrorType
} from "../parser/parser"
import { getProductionDslName, isOptionalProd } from "@chevrotain/gast"
import {
Alternative,
containsPath,
getLookaheadPathsForOptionalProd,
getLookaheadPathsForOr,
getProdType,
isStrictPrefixOfPath
} from "./lookahead"
import { nextPossibleTokensAfter } from "./interpreter"
import {
Alternation,
Alternative as AlternativeGAST,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Terminal
} from "@chevrotain/gast"
import { GAstVisitor } from "@chevrotain/gast"
import {
ILookaheadStrategy,
IProduction,
IProductionWithOccurrence,
TokenType,
Rule
} from "@chevrotain/types"
import {
IGrammarValidatorErrorMessageProvider,
IParserDefinitionError
} from "./types"
import dropRight from "lodash/dropRight"
import compact from "lodash/compact"
import { tokenStructuredMatcher } from "../../scan/tokens"
export function validateLookahead(options: {
lookaheadStrategy: ILookaheadStrategy
rules: Rule[]
tokenTypes: TokenType[]
grammarName: string
}): IParserDefinitionError[] {
const lookaheadValidationErrorMessages = options.lookaheadStrategy.validate({
rules: options.rules,
tokenTypes: options.tokenTypes,
grammarName: options.grammarName
})
return map(lookaheadValidationErrorMessages, (errorMessage) => ({
type: ParserDefinitionErrorType.CUSTOM_LOOKAHEAD_VALIDATION,
...errorMessage
}))
}
export function validateGrammar(
topLevels: Rule[],
tokenTypes: TokenType[],
errMsgProvider: IGrammarValidatorErrorMessageProvider,
grammarName: string
): IParserDefinitionError[] {
const duplicateErrors: IParserDefinitionError[] = flatMap(
topLevels,
(currTopLevel) => validateDuplicateProductions(currTopLevel, errMsgProvider)
)
const termsNamespaceConflictErrors = checkTerminalAndNoneTerminalsNameSpace(
topLevels,
tokenTypes,
errMsgProvider
)
const tooManyAltsErrors = flatMap(topLevels, (curRule) =>
validateTooManyAlts(curRule, errMsgProvider)
)
const duplicateRulesError = flatMap(topLevels, (curRule) =>
validateRuleDoesNotAlreadyExist(
curRule,
topLevels,
grammarName,
errMsgProvider
)
)
return duplicateErrors.concat(
termsNamespaceConflictErrors,
tooManyAltsErrors,
duplicateRulesError
)
}
function validateDuplicateProductions(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserDuplicatesDefinitionError[] {
const collectorVisitor = new OccurrenceValidationCollector()
topLevelRule.accept(collectorVisitor)
const allRuleProductions = collectorVisitor.allProductions
const productionGroups = groupBy(
allRuleProductions,
identifyProductionForDuplicates
)
const duplicates: any = pickBy(productionGroups, (currGroup) => {
return currGroup.length > 1
})
const errors = map(values(duplicates), (currDuplicates: any) => {
const firstProd: any = first(currDuplicates)
const msg = errMsgProvider.buildDuplicateFoundError(
topLevelRule,
currDuplicates
)
const dslName = getProductionDslName(firstProd)
const defError: IParserDuplicatesDefinitionError = {
message: msg,
type: ParserDefinitionErrorType.DUPLICATE_PRODUCTIONS,
ruleName: topLevelRule.name,
dslName: dslName,
occurrence: firstProd.idx
}
const param = getExtraProductionArgument(firstProd)
if (param) {
defError.parameter = param
}
return defError
})
return errors
}
export function identifyProductionForDuplicates(
prod: IProductionWithOccurrence
): string {
return `${getProductionDslName(prod)}_#_${
prod.idx
}_#_${getExtraProductionArgument(prod)}`
}
function getExtraProductionArgument(prod: IProductionWithOccurrence): string {
if (prod instanceof Terminal) {
return prod.terminalType.name
} else if (prod instanceof NonTerminal) {
return prod.nonTerminalName
} else {
return ""
}
}
export class OccurrenceValidationCollector extends GAstVisitor {
public allProductions: IProductionWithOccurrence[] = []
public visitNonTerminal(subrule: NonTerminal): void {
this.allProductions.push(subrule)
}
public visitOption(option: Option): void {
this.allProductions.push(option)
}
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
this.allProductions.push(manySep)
}
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
this.allProductions.push(atLeastOne)
}
public visitRepetitionMandatoryWithSeparator(
atLeastOneSep: RepetitionMandatoryWithSeparator
): void {
this.allProductions.push(atLeastOneSep)
}
public visitRepetition(many: Repetition): void {
this.allProductions.push(many)
}
public visitAlternation(or: Alternation): void {
this.allProductions.push(or)
}
public visitTerminal(terminal: Terminal): void {
this.allProductions.push(terminal)
}
}
export function validateRuleDoesNotAlreadyExist(
rule: Rule,
allRules: Rule[],
className: string,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserDefinitionError[] {
const errors = []
const occurrences = reduce(
allRules,
(result, curRule) => {
if (curRule.name === rule.name) {
return result + 1
}
return result
},
0
)
if (occurrences > 1) {
const errMsg = errMsgProvider.buildDuplicateRuleNameError({
topLevelRule: rule,
grammarName: className
})
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.DUPLICATE_RULE_NAME,
ruleName: rule.name
})
}
return errors
}
// TODO: is there anyway to get only the rule names of rules inherited from the super grammars?
// This is not part of the IGrammarErrorProvider because the validation cannot be performed on
// The grammar structure, only at runtime.
export function validateRuleIsOverridden(
ruleName: string,
definedRulesNames: string[],
className: string
): IParserDefinitionError[] {
const errors = []
let errMsg
if (!includes(definedRulesNames, ruleName)) {
errMsg =
`Invalid rule override, rule: ->${ruleName}<- cannot be overridden in the grammar: ->${className}<-` +
`as it is not defined in any of the super grammars `
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.INVALID_RULE_OVERRIDE,
ruleName: ruleName
})
}
return errors
}
export function validateNoLeftRecursion(
topRule: Rule,
currRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
path: Rule[] = []
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = []
const nextNonTerminals = getFirstNoneTerminal(currRule.definition)
if (isEmpty(nextNonTerminals)) {
return []
} else {
const ruleName = topRule.name
const foundLeftRecursion = includes(nextNonTerminals, topRule)
if (foundLeftRecursion) {
errors.push({
message: errMsgProvider.buildLeftRecursionError({
topLevelRule: topRule,
leftRecursionPath: path
}),
type: ParserDefinitionErrorType.LEFT_RECURSION,
ruleName: ruleName
})
}
// we are only looking for cyclic paths leading back to the specific topRule
// other cyclic paths are ignored, we still need this difference to avoid infinite loops...
const validNextSteps = difference(nextNonTerminals, path.concat([topRule]))
const errorsFromNextSteps = flatMap(validNextSteps, (currRefRule) => {
const newPath = clone(path)
newPath.push(currRefRule)
return validateNoLeftRecursion(
topRule,
currRefRule,
errMsgProvider,
newPath
)
})
return errors.concat(errorsFromNextSteps)
}
}
export function getFirstNoneTerminal(definition: IProduction[]): Rule[] {
let result: Rule[] = []
if (isEmpty(definition)) {
return result
}
const firstProd = first(definition)
/* istanbul ignore else */
if (firstProd instanceof NonTerminal) {
result.push(firstProd.referencedRule)
} else if (
firstProd instanceof AlternativeGAST ||
firstProd instanceof Option ||
firstProd instanceof RepetitionMandatory ||
firstProd instanceof RepetitionMandatoryWithSeparator ||
firstProd instanceof RepetitionWithSeparator ||
firstProd instanceof Repetition
) {
result = result.concat(
getFirstNoneTerminal(<IProduction[]>firstProd.definition)
)
} else if (firstProd instanceof Alternation) {
// each sub definition in alternation is a FLAT
result = flatten(
map(firstProd.definition, (currSubDef) =>
getFirstNoneTerminal((<AlternativeGAST>currSubDef).definition)
)
)
} else if (firstProd instanceof Terminal) {
// nothing to see, move along
} else {
throw Error("non exhaustive match")
}
const isFirstOptional = isOptionalProd(firstProd)
const hasMore = definition.length > 1
if (isFirstOptional && hasMore) {
const rest = drop(definition)
return result.concat(getFirstNoneTerminal(rest))
} else {
return result
}
}
class OrCollector extends GAstVisitor {
public alternations: Alternation[] = []
public visitAlternation(node: Alternation): void {
this.alternations.push(node)
}
}
export function validateEmptyOrAlternative(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserEmptyAlternativeDefinitionError[] {
const orCollector = new OrCollector()
topLevelRule.accept(orCollector)
const ors = orCollector.alternations
const errors = flatMap<Alternation, IParserEmptyAlternativeDefinitionError>(
ors,
(currOr) => {
const exceptLast = dropRight(currOr.definition)
return flatMap(exceptLast, (currAlternative, currAltIdx) => {
const possibleFirstInAlt = nextPossibleTokensAfter(
[currAlternative],
[],
tokenStructuredMatcher,
1
)
if (isEmpty(possibleFirstInAlt)) {
return [
{
message: errMsgProvider.buildEmptyAlternationError({
topLevelRule: topLevelRule,
alternation: currOr,
emptyChoiceIdx: currAltIdx
}),
type: ParserDefinitionErrorType.NONE_LAST_EMPTY_ALT,
ruleName: topLevelRule.name,
occurrence: currOr.idx,
alternative: currAltIdx + 1
}
]
} else {
return []
}
})
}
)
return errors
}
export function validateAmbiguousAlternationAlternatives(
topLevelRule: Rule,
globalMaxLookahead: number,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserAmbiguousAlternativesDefinitionError[] {
const orCollector = new OrCollector()
topLevelRule.accept(orCollector)
let ors = orCollector.alternations
// New Handling of ignoring ambiguities
// - https://github.com/chevrotain/chevrotain/issues/869
ors = reject(ors, (currOr) => currOr.ignoreAmbiguities === true)
const errors = flatMap(ors, (currOr: Alternation) => {
const currOccurrence = currOr.idx
const actualMaxLookahead = currOr.maxLookahead || globalMaxLookahead
const alternatives = getLookaheadPathsForOr(
currOccurrence,
topLevelRule,
actualMaxLookahead,
currOr
)
const altsAmbiguityErrors = checkAlternativesAmbiguities(
alternatives,
currOr,
topLevelRule,
errMsgProvider
)
const altsPrefixAmbiguityErrors = checkPrefixAlternativesAmbiguities(
alternatives,
currOr,
topLevelRule,
errMsgProvider
)
return altsAmbiguityErrors.concat(altsPrefixAmbiguityErrors)
})
return errors
}
export class RepetitionCollector extends GAstVisitor {
public allProductions: (IProductionWithOccurrence & {
maxLookahead?: number
})[] = []
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
this.allProductions.push(manySep)
}
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
this.allProductions.push(atLeastOne)
}
public visitRepetitionMandatoryWithSeparator(
atLeastOneSep: RepetitionMandatoryWithSeparator
): void {
this.allProductions.push(atLeastOneSep)
}
public visitRepetition(many: Repetition): void {
this.allProductions.push(many)
}
}
export function validateTooManyAlts(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserDefinitionError[] {
const orCollector = new OrCollector()
topLevelRule.accept(orCollector)
const ors = orCollector.alternations
const errors = flatMap(ors, (currOr) => {
if (currOr.definition.length > 255) {
return [
{
message: errMsgProvider.buildTooManyAlternativesError({
topLevelRule: topLevelRule,
alternation: currOr
}),
type: ParserDefinitionErrorType.TOO_MANY_ALTS,
ruleName: topLevelRule.name,
occurrence: currOr.idx
}
]
} else {
return []
}
})
return errors
}
export function validateSomeNonEmptyLookaheadPath(
topLevelRules: Rule[],
maxLookahead: number,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = []
forEach(topLevelRules, (currTopRule) => {
const collectorVisitor = new RepetitionCollector()
currTopRule.accept(collectorVisitor)
const allRuleProductions = collectorVisitor.allProductions
forEach(allRuleProductions, (currProd) => {
const prodType = getProdType(currProd)
const actualMaxLookahead = currProd.maxLookahead || maxLookahead
const currOccurrence = currProd.idx
const paths = getLookaheadPathsForOptionalProd(
currOccurrence,
currTopRule,
prodType,
actualMaxLookahead
)
const pathsInsideProduction = paths[0]
if (isEmpty(flatten(pathsInsideProduction))) {
const errMsg = errMsgProvider.buildEmptyRepetitionError({
topLevelRule: currTopRule,
repetition: currProd
})
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.NO_NON_EMPTY_LOOKAHEAD,
ruleName: currTopRule.name
})
}
})
})
return errors
}
export interface IAmbiguityDescriptor {
alts: number[]
path: TokenType[]
}
function checkAlternativesAmbiguities(
alternatives: Alternative[],
alternation: Alternation,
rule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserAmbiguousAlternativesDefinitionError[] {
const foundAmbiguousPaths: Alternative = []
const identicalAmbiguities = reduce(
alternatives,
(result, currAlt, currAltIdx) => {
// ignore (skip) ambiguities with this alternative
if (alternation.definition[currAltIdx].ignoreAmbiguities === true) {
return result
}
forEach(currAlt, (currPath) => {
const altsCurrPathAppearsIn = [currAltIdx]
forEach(alternatives, (currOtherAlt, currOtherAltIdx) => {
if (
currAltIdx !== currOtherAltIdx &&
containsPath(currOtherAlt, currPath) &&
// ignore (skip) ambiguities with this "other" alternative
alternation.definition[currOtherAltIdx].ignoreAmbiguities !== true
) {
altsCurrPathAppearsIn.push(currOtherAltIdx)
}
})
if (
altsCurrPathAppearsIn.length > 1 &&
!containsPath(foundAmbiguousPaths, currPath)
) {
foundAmbiguousPaths.push(currPath)
result.push({
alts: altsCurrPathAppearsIn,
path: currPath
})
}
})
return result
},
[] as { alts: number[]; path: TokenType[] }[]
)
const currErrors = map(identicalAmbiguities, (currAmbDescriptor) => {
const ambgIndices = map(
currAmbDescriptor.alts,
(currAltIdx) => currAltIdx + 1
)
const currMessage = errMsgProvider.buildAlternationAmbiguityError({
topLevelRule: rule,
alternation: alternation,
ambiguityIndices: ambgIndices,
prefixPath: currAmbDescriptor.path
})
return {
message: currMessage,
type: ParserDefinitionErrorType.AMBIGUOUS_ALTS,
ruleName: rule.name,
occurrence: alternation.idx,
alternatives: currAmbDescriptor.alts
}
})
return currErrors
}
export function checkPrefixAlternativesAmbiguities(
alternatives: Alternative[],
alternation: Alternation,
rule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserAmbiguousAlternativesDefinitionError[] {
// flatten
const pathsAndIndices = reduce(
alternatives,
(result, currAlt, idx) => {
const currPathsAndIdx = map(currAlt, (currPath) => {
return { idx: idx, path: currPath }
})
return result.concat(currPathsAndIdx)
},
[] as { idx: number; path: TokenType[] }[]
)
const errors = compact(
flatMap(pathsAndIndices, (currPathAndIdx) => {
const alternativeGast = alternation.definition[currPathAndIdx.idx]
// ignore (skip) ambiguities with this alternative
if (alternativeGast.ignoreAmbiguities === true) {
return []
}
const targetIdx = currPathAndIdx.idx
const targetPath = currPathAndIdx.path
const prefixAmbiguitiesPathsAndIndices = filter(
pathsAndIndices,
(searchPathAndIdx) => {
// prefix ambiguity can only be created from lower idx (higher priority) path
return (
// ignore (skip) ambiguities with this "other" alternative
alternation.definition[searchPathAndIdx.idx].ignoreAmbiguities !==
true &&
searchPathAndIdx.idx < targetIdx &&
// checking for strict prefix because identical lookaheads
// will be be detected using a different validation.
isStrictPrefixOfPath(searchPathAndIdx.path, targetPath)
)
}
)
const currPathPrefixErrors = map(
prefixAmbiguitiesPathsAndIndices,
(currAmbPathAndIdx): IParserAmbiguousAlternativesDefinitionError => {
const ambgIndices = [currAmbPathAndIdx.idx + 1, targetIdx + 1]
const occurrence = alternation.idx === 0 ? "" : alternation.idx
const message = errMsgProvider.buildAlternationPrefixAmbiguityError({
topLevelRule: rule,
alternation: alternation,
ambiguityIndices: ambgIndices,
prefixPath: currAmbPathAndIdx.path
})
return {
message: message,
type: ParserDefinitionErrorType.AMBIGUOUS_PREFIX_ALTS,
ruleName: rule.name,
occurrence: occurrence,
alternatives: ambgIndices
}
}
)
return currPathPrefixErrors
})
)
return errors
}
function checkTerminalAndNoneTerminalsNameSpace(
topLevels: Rule[],
tokenTypes: TokenType[],
errMsgProvider: IGrammarValidatorErrorMessageProvider
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = []
const tokenNames = map(tokenTypes, (currToken) => currToken.name)
forEach(topLevels, (currRule) => {
const currRuleName = currRule.name
if (includes(tokenNames, currRuleName)) {
const errMsg = errMsgProvider.buildNamespaceConflictError(currRule)
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.CONFLICT_TOKENS_RULES_NAMESPACE,
ruleName: currRuleName
})
}
})
return errors
}

View File

@@ -0,0 +1,71 @@
import flatten from "lodash/flatten"
import uniq from "lodash/uniq"
import map from "lodash/map"
import { NonTerminal, Terminal } from "@chevrotain/gast"
import {
isBranchingProd,
isOptionalProd,
isSequenceProd
} from "@chevrotain/gast"
import { IProduction, TokenType } from "@chevrotain/types"
export function first(prod: IProduction): TokenType[] {
/* istanbul ignore else */
if (prod instanceof NonTerminal) {
// this could in theory cause infinite loops if
// (1) prod A refs prod B.
// (2) prod B refs prod A
// (3) AB can match the empty set
// in other words a cycle where everything is optional so the first will keep
// looking ahead for the next optional part and will never exit
// currently there is no safeguard for this unique edge case because
// (1) not sure a grammar in which this can happen is useful for anything (productive)
return first((<NonTerminal>prod).referencedRule)
} else if (prod instanceof Terminal) {
return firstForTerminal(<Terminal>prod)
} else if (isSequenceProd(prod)) {
return firstForSequence(prod)
} else if (isBranchingProd(prod)) {
return firstForBranching(prod)
} else {
throw Error("non exhaustive match")
}
}
export function firstForSequence(prod: {
definition: IProduction[]
}): TokenType[] {
let firstSet: TokenType[] = []
const seq = prod.definition
let nextSubProdIdx = 0
let hasInnerProdsRemaining = seq.length > nextSubProdIdx
let currSubProd
// so we enter the loop at least once (if the definition is not empty
let isLastInnerProdOptional = true
// scan a sequence until it's end or until we have found a NONE optional production in it
while (hasInnerProdsRemaining && isLastInnerProdOptional) {
currSubProd = seq[nextSubProdIdx]
isLastInnerProdOptional = isOptionalProd(currSubProd)
firstSet = firstSet.concat(first(currSubProd))
nextSubProdIdx = nextSubProdIdx + 1
hasInnerProdsRemaining = seq.length > nextSubProdIdx
}
return uniq(firstSet)
}
export function firstForBranching(prod: {
definition: IProduction[]
}): TokenType[] {
const allAlternativesFirsts: TokenType[][] = map(
prod.definition,
(innerProd) => {
return first(innerProd)
}
)
return uniq(flatten<TokenType>(allAlternativesFirsts))
}
export function firstForTerminal(terminal: Terminal): TokenType[] {
return [terminal.terminalType]
}

View File

@@ -0,0 +1,68 @@
import { RestWalker } from "./rest"
import { first } from "./first"
import forEach from "lodash/forEach"
import assign from "lodash/assign"
import { IN } from "../constants"
import { Alternative, NonTerminal, Rule, Terminal } from "@chevrotain/gast"
import { IProduction, TokenType } from "@chevrotain/types"
// This ResyncFollowsWalker computes all of the follows required for RESYNC
// (skipping reference production).
export class ResyncFollowsWalker extends RestWalker {
public follows: Record<string, TokenType[]> = {}
constructor(private topProd: Rule) {
super()
}
startWalking(): Record<string, TokenType[]> {
this.walk(this.topProd)
return this.follows
}
walkTerminal(
terminal: Terminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// do nothing! just like in the public sector after 13:00
}
walkProdRef(
refProd: NonTerminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {
const followName =
buildBetweenProdsFollowPrefix(refProd.referencedRule, refProd.idx) +
this.topProd.name
const fullRest: IProduction[] = currRest.concat(prevRest)
const restProd = new Alternative({ definition: fullRest })
const t_in_topProd_follows = first(restProd)
this.follows[followName] = t_in_topProd_follows
}
}
export function computeAllProdsFollows(
topProductions: Rule[]
): Record<string, TokenType[]> {
const reSyncFollows = {}
forEach(topProductions, (topProd) => {
const currRefsFollow = new ResyncFollowsWalker(topProd).startWalking()
assign(reSyncFollows, currRefsFollow)
})
return reSyncFollows
}
export function buildBetweenProdsFollowPrefix(
inner: Rule,
occurenceInParent: number
): string {
return inner.name + occurenceInParent + IN
}
export function buildInProdFollowPrefix(terminal: Terminal): string {
const terminalName = terminal.terminalType.name
return terminalName + terminal.idx + IN
}

View File

@@ -0,0 +1,51 @@
import { Rule } from "@chevrotain/gast"
import forEach from "lodash/forEach"
import defaults from "lodash/defaults"
import { resolveGrammar as orgResolveGrammar } from "../resolver"
import { validateGrammar as orgValidateGrammar } from "../checks"
import {
defaultGrammarResolverErrorProvider,
defaultGrammarValidatorErrorProvider
} from "../../errors_public"
import { TokenType } from "@chevrotain/types"
import {
IGrammarResolverErrorMessageProvider,
IGrammarValidatorErrorMessageProvider,
IParserDefinitionError
} from "../types"
type ResolveGrammarOpts = {
rules: Rule[]
errMsgProvider?: IGrammarResolverErrorMessageProvider
}
export function resolveGrammar(
options: ResolveGrammarOpts
): IParserDefinitionError[] {
const actualOptions: Required<ResolveGrammarOpts> = defaults(options, {
errMsgProvider: defaultGrammarResolverErrorProvider
})
const topRulesTable: { [ruleName: string]: Rule } = {}
forEach(options.rules, (rule) => {
topRulesTable[rule.name] = rule
})
return orgResolveGrammar(topRulesTable, actualOptions.errMsgProvider)
}
export function validateGrammar(options: {
rules: Rule[]
tokenTypes: TokenType[]
grammarName: string
errMsgProvider: IGrammarValidatorErrorMessageProvider
}): IParserDefinitionError[] {
options = defaults(options, {
errMsgProvider: defaultGrammarValidatorErrorProvider
})
return orgValidateGrammar(
options.rules,
options.tokenTypes,
options.errMsgProvider,
options.grammarName
)
}

View File

@@ -0,0 +1,612 @@
import { RestWalker } from "./rest"
import _first from "lodash/first"
import isEmpty from "lodash/isEmpty"
import dropRight from "lodash/dropRight"
import drop from "lodash/drop"
import last from "lodash/last"
import forEach from "lodash/forEach"
import clone from "lodash/clone"
import { first } from "./first"
import { TokenMatcher } from "../parser/parser"
import {
Alternation,
Alternative,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule,
Terminal
} from "@chevrotain/gast"
import {
IGrammarPath,
IProduction,
ISyntacticContentAssistPath,
IToken,
ITokenGrammarPath,
TokenType
} from "@chevrotain/types"
export abstract class AbstractNextPossibleTokensWalker extends RestWalker {
protected possibleTokTypes: TokenType[] = []
protected ruleStack: string[]
protected occurrenceStack: number[]
protected nextProductionName = ""
protected nextProductionOccurrence = 0
protected found = false
protected isAtEndOfPath = false
constructor(protected topProd: Rule, protected path: IGrammarPath) {
super()
}
startWalking(): TokenType[] {
this.found = false
if (this.path.ruleStack[0] !== this.topProd.name) {
throw Error("The path does not start with the walker's top Rule!")
}
// immutable for the win
this.ruleStack = clone(this.path.ruleStack).reverse() // intelij bug requires assertion
this.occurrenceStack = clone(this.path.occurrenceStack).reverse() // intelij bug requires assertion
// already verified that the first production is valid, we now seek the 2nd production
this.ruleStack.pop()
this.occurrenceStack.pop()
this.updateExpectedNext()
this.walk(this.topProd)
return this.possibleTokTypes
}
walk(
prod: { definition: IProduction[] },
prevRest: IProduction[] = []
): void {
// stop scanning once we found the path
if (!this.found) {
super.walk(prod, prevRest)
}
}
walkProdRef(
refProd: NonTerminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// found the next production, need to keep walking in it
if (
refProd.referencedRule.name === this.nextProductionName &&
refProd.idx === this.nextProductionOccurrence
) {
const fullRest = currRest.concat(prevRest)
this.updateExpectedNext()
this.walk(refProd.referencedRule, <any>fullRest)
}
}
updateExpectedNext(): void {
// need to consume the Terminal
if (isEmpty(this.ruleStack)) {
// must reset nextProductionXXX to avoid walking down another Top Level production while what we are
// really seeking is the last Terminal...
this.nextProductionName = ""
this.nextProductionOccurrence = 0
this.isAtEndOfPath = true
} else {
this.nextProductionName = this.ruleStack.pop()!
this.nextProductionOccurrence = this.occurrenceStack.pop()!
}
}
}
export class NextAfterTokenWalker extends AbstractNextPossibleTokensWalker {
private nextTerminalName = ""
private nextTerminalOccurrence = 0
constructor(topProd: Rule, protected path: ITokenGrammarPath) {
super(topProd, path)
this.nextTerminalName = this.path.lastTok.name
this.nextTerminalOccurrence = this.path.lastTokOccurrence
}
walkTerminal(
terminal: Terminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (
this.isAtEndOfPath &&
terminal.terminalType.name === this.nextTerminalName &&
terminal.idx === this.nextTerminalOccurrence &&
!this.found
) {
const fullRest = currRest.concat(prevRest)
const restProd = new Alternative({ definition: fullRest })
this.possibleTokTypes = first(restProd)
this.found = true
}
}
}
export type AlternativesFirstTokens = TokenType[][]
export interface IFirstAfterRepetition {
token: TokenType | undefined
occurrence: number | undefined
isEndOfRule: boolean | undefined
}
/**
* This walker only "walks" a single "TOP" level in the Grammar Ast, this means
* it never "follows" production refs
*/
export class AbstractNextTerminalAfterProductionWalker extends RestWalker {
protected result: IFirstAfterRepetition = {
token: undefined,
occurrence: undefined,
isEndOfRule: undefined
}
constructor(protected topRule: Rule, protected occurrence: number) {
super()
}
startWalking(): IFirstAfterRepetition {
this.walk(this.topRule)
return this.result
}
}
export class NextTerminalAfterManyWalker extends AbstractNextTerminalAfterProductionWalker {
walkMany(
manyProd: Repetition,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (manyProd.idx === this.occurrence) {
const firstAfterMany = _first(currRest.concat(prevRest))
this.result.isEndOfRule = firstAfterMany === undefined
if (firstAfterMany instanceof Terminal) {
this.result.token = firstAfterMany.terminalType
this.result.occurrence = firstAfterMany.idx
}
} else {
super.walkMany(manyProd, currRest, prevRest)
}
}
}
export class NextTerminalAfterManySepWalker extends AbstractNextTerminalAfterProductionWalker {
walkManySep(
manySepProd: RepetitionWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (manySepProd.idx === this.occurrence) {
const firstAfterManySep = _first(currRest.concat(prevRest))
this.result.isEndOfRule = firstAfterManySep === undefined
if (firstAfterManySep instanceof Terminal) {
this.result.token = firstAfterManySep.terminalType
this.result.occurrence = firstAfterManySep.idx
}
} else {
super.walkManySep(manySepProd, currRest, prevRest)
}
}
}
export class NextTerminalAfterAtLeastOneWalker extends AbstractNextTerminalAfterProductionWalker {
walkAtLeastOne(
atLeastOneProd: RepetitionMandatory,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (atLeastOneProd.idx === this.occurrence) {
const firstAfterAtLeastOne = _first(currRest.concat(prevRest))
this.result.isEndOfRule = firstAfterAtLeastOne === undefined
if (firstAfterAtLeastOne instanceof Terminal) {
this.result.token = firstAfterAtLeastOne.terminalType
this.result.occurrence = firstAfterAtLeastOne.idx
}
} else {
super.walkAtLeastOne(atLeastOneProd, currRest, prevRest)
}
}
}
// TODO: reduce code duplication in the AfterWalkers
export class NextTerminalAfterAtLeastOneSepWalker extends AbstractNextTerminalAfterProductionWalker {
walkAtLeastOneSep(
atleastOneSepProd: RepetitionMandatoryWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (atleastOneSepProd.idx === this.occurrence) {
const firstAfterfirstAfterAtLeastOneSep = _first(
currRest.concat(prevRest)
)
this.result.isEndOfRule = firstAfterfirstAfterAtLeastOneSep === undefined
if (firstAfterfirstAfterAtLeastOneSep instanceof Terminal) {
this.result.token = firstAfterfirstAfterAtLeastOneSep.terminalType
this.result.occurrence = firstAfterfirstAfterAtLeastOneSep.idx
}
} else {
super.walkAtLeastOneSep(atleastOneSepProd, currRest, prevRest)
}
}
}
export interface PartialPathAndSuffixes {
partialPath: TokenType[]
suffixDef: IProduction[]
}
export function possiblePathsFrom(
targetDef: IProduction[],
maxLength: number,
currPath: TokenType[] = []
): PartialPathAndSuffixes[] {
// avoid side effects
currPath = clone(currPath)
let result: PartialPathAndSuffixes[] = []
let i = 0
// TODO: avoid inner funcs
function remainingPathWith(nextDef: IProduction[]) {
return nextDef.concat(drop(targetDef, i + 1))
}
// TODO: avoid inner funcs
function getAlternativesForProd(definition: IProduction[]) {
const alternatives = possiblePathsFrom(
remainingPathWith(definition),
maxLength,
currPath
)
return result.concat(alternatives)
}
/**
* Mandatory productions will halt the loop as the paths computed from their recursive calls will already contain the
* following (rest) of the targetDef.
*
* For optional productions (Option/Repetition/...) the loop will continue to represent the paths that do not include the
* the optional production.
*/
while (currPath.length < maxLength && i < targetDef.length) {
const prod = targetDef[i]
/* istanbul ignore else */
if (prod instanceof Alternative) {
return getAlternativesForProd(prod.definition)
} else if (prod instanceof NonTerminal) {
return getAlternativesForProd(prod.definition)
} else if (prod instanceof Option) {
result = getAlternativesForProd(prod.definition)
} else if (prod instanceof RepetitionMandatory) {
const newDef = prod.definition.concat([
new Repetition({
definition: prod.definition
})
])
return getAlternativesForProd(newDef)
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
const newDef = [
new Alternative({ definition: prod.definition }),
new Repetition({
definition: [new Terminal({ terminalType: prod.separator })].concat(
<any>prod.definition
)
})
]
return getAlternativesForProd(newDef)
} else if (prod instanceof RepetitionWithSeparator) {
const newDef = prod.definition.concat([
new Repetition({
definition: [new Terminal({ terminalType: prod.separator })].concat(
<any>prod.definition
)
})
])
result = getAlternativesForProd(newDef)
} else if (prod instanceof Repetition) {
const newDef = prod.definition.concat([
new Repetition({
definition: prod.definition
})
])
result = getAlternativesForProd(newDef)
} else if (prod instanceof Alternation) {
forEach(prod.definition, (currAlt) => {
// TODO: this is a limited check for empty alternatives
// It would prevent a common case of infinite loops during parser initialization.
// However **in-directly** empty alternatives may still cause issues.
if (isEmpty(currAlt.definition) === false) {
result = getAlternativesForProd(currAlt.definition)
}
})
return result
} else if (prod instanceof Terminal) {
currPath.push(prod.terminalType)
} else {
throw Error("non exhaustive match")
}
i++
}
result.push({
partialPath: currPath,
suffixDef: drop(targetDef, i)
})
return result
}
interface IPathToExamine {
idx: number
def: IProduction[]
ruleStack: string[]
occurrenceStack: number[]
}
export function nextPossibleTokensAfter(
initialDef: IProduction[],
tokenVector: IToken[],
tokMatcher: TokenMatcher,
maxLookAhead: number
): ISyntacticContentAssistPath[] {
const EXIT_NON_TERMINAL: any = "EXIT_NONE_TERMINAL"
// to avoid creating a new Array each time.
const EXIT_NON_TERMINAL_ARR = [EXIT_NON_TERMINAL]
const EXIT_ALTERNATIVE: any = "EXIT_ALTERNATIVE"
let foundCompletePath = false
const tokenVectorLength = tokenVector.length
const minimalAlternativesIndex = tokenVectorLength - maxLookAhead - 1
const result: ISyntacticContentAssistPath[] = []
const possiblePaths: IPathToExamine[] = []
possiblePaths.push({
idx: -1,
def: initialDef,
ruleStack: [],
occurrenceStack: []
})
while (!isEmpty(possiblePaths)) {
const currPath = possiblePaths.pop()!
// skip alternatives if no more results can be found (assuming deterministic grammar with fixed lookahead)
if (currPath === EXIT_ALTERNATIVE) {
if (
foundCompletePath &&
last(possiblePaths)!.idx <= minimalAlternativesIndex
) {
// remove irrelevant alternative
possiblePaths.pop()
}
continue
}
const currDef = currPath.def
const currIdx = currPath.idx
const currRuleStack = currPath.ruleStack
const currOccurrenceStack = currPath.occurrenceStack
// For Example: an empty path could exist in a valid grammar in the case of an EMPTY_ALT
if (isEmpty(currDef)) {
continue
}
const prod = currDef[0]
/* istanbul ignore else */
if (prod === EXIT_NON_TERMINAL) {
const nextPath = {
idx: currIdx,
def: drop(currDef),
ruleStack: dropRight(currRuleStack),
occurrenceStack: dropRight(currOccurrenceStack)
}
possiblePaths.push(nextPath)
} else if (prod instanceof Terminal) {
/* istanbul ignore else */
if (currIdx < tokenVectorLength - 1) {
const nextIdx = currIdx + 1
const actualToken = tokenVector[nextIdx]
if (tokMatcher!(actualToken, prod.terminalType)) {
const nextPath = {
idx: nextIdx,
def: drop(currDef),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPath)
}
// end of the line
} else if (currIdx === tokenVectorLength - 1) {
// IGNORE ABOVE ELSE
result.push({
nextTokenType: prod.terminalType,
nextTokenOccurrence: prod.idx,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
})
foundCompletePath = true
} else {
throw Error("non exhaustive match")
}
} else if (prod instanceof NonTerminal) {
const newRuleStack = clone(currRuleStack)
newRuleStack.push(prod.nonTerminalName)
const newOccurrenceStack = clone(currOccurrenceStack)
newOccurrenceStack.push(prod.idx)
const nextPath = {
idx: currIdx,
def: prod.definition.concat(EXIT_NON_TERMINAL_ARR, drop(currDef)),
ruleStack: newRuleStack,
occurrenceStack: newOccurrenceStack
}
possiblePaths.push(nextPath)
} else if (prod instanceof Option) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: drop(currDef),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWithout)
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE)
const nextPathWith = {
idx: currIdx,
def: prod.definition.concat(drop(currDef)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWith)
} else if (prod instanceof RepetitionMandatory) {
// TODO:(THE NEW operators here take a while...) (convert once?)
const secondIteration = new Repetition({
definition: prod.definition,
idx: prod.idx
})
const nextDef = prod.definition.concat([secondIteration], drop(currDef))
const nextPath = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPath)
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
// TODO:(THE NEW operators here take a while...) (convert once?)
const separatorGast = new Terminal({
terminalType: prod.separator
})
const secondIteration = new Repetition({
definition: [<any>separatorGast].concat(prod.definition),
idx: prod.idx
})
const nextDef = prod.definition.concat([secondIteration], drop(currDef))
const nextPath = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPath)
} else if (prod instanceof RepetitionWithSeparator) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: drop(currDef),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWithout)
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE)
const separatorGast = new Terminal({
terminalType: prod.separator
})
const nthRepetition = new Repetition({
definition: [<any>separatorGast].concat(prod.definition),
idx: prod.idx
})
const nextDef = prod.definition.concat([nthRepetition], drop(currDef))
const nextPathWith = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWith)
} else if (prod instanceof Repetition) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: drop(currDef),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWithout)
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE)
// TODO: an empty repetition will cause infinite loops here, will the parser detect this in selfAnalysis?
const nthRepetition = new Repetition({
definition: prod.definition,
idx: prod.idx
})
const nextDef = prod.definition.concat([nthRepetition], drop(currDef))
const nextPathWith = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(nextPathWith)
} else if (prod instanceof Alternation) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
for (let i = prod.definition.length - 1; i >= 0; i--) {
const currAlt: any = prod.definition[i]
const currAltPath = {
idx: currIdx,
def: currAlt.definition.concat(drop(currDef)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
}
possiblePaths.push(currAltPath)
possiblePaths.push(EXIT_ALTERNATIVE)
}
} else if (prod instanceof Alternative) {
possiblePaths.push({
idx: currIdx,
def: prod.definition.concat(drop(currDef)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack
})
} else if (prod instanceof Rule) {
// last because we should only encounter at most a single one of these per invocation.
possiblePaths.push(
expandTopLevelRule(prod, currIdx, currRuleStack, currOccurrenceStack)
)
} else {
throw Error("non exhaustive match")
}
}
return result
}
function expandTopLevelRule(
topRule: Rule,
currIdx: number,
currRuleStack: string[],
currOccurrenceStack: number[]
): IPathToExamine {
const newRuleStack = clone(currRuleStack)
newRuleStack.push(topRule.name)
const newCurrOccurrenceStack = clone(currOccurrenceStack)
// top rule is always assumed to have been called with occurrence index 1
newCurrOccurrenceStack.push(1)
return {
idx: currIdx,
def: topRule.definition,
ruleStack: newRuleStack,
occurrenceStack: newCurrOccurrenceStack
}
}

View File

@@ -0,0 +1,33 @@
// Lookahead keys are 32Bit integers in the form
// TTTTTTTT-ZZZZZZZZZZZZ-YYYY-XXXXXXXX
// XXXX -> Occurrence Index bitmap.
// YYYY -> DSL Method Type bitmap.
// ZZZZZZZZZZZZZZZ -> Rule short Index bitmap.
// TTTTTTTTT -> alternation alternative index bitmap
export const BITS_FOR_METHOD_TYPE = 4
export const BITS_FOR_OCCURRENCE_IDX = 8
export const BITS_FOR_RULE_IDX = 12
// TODO: validation, this means that there may at most 2^8 --> 256 alternatives for an alternation.
export const BITS_FOR_ALT_IDX = 8
// short string used as part of mapping keys.
// being short improves the performance when composing KEYS for maps out of these
// The 5 - 8 bits (16 possible values, are reserved for the DSL method indices)
export const OR_IDX = 1 << BITS_FOR_OCCURRENCE_IDX
export const OPTION_IDX = 2 << BITS_FOR_OCCURRENCE_IDX
export const MANY_IDX = 3 << BITS_FOR_OCCURRENCE_IDX
export const AT_LEAST_ONE_IDX = 4 << BITS_FOR_OCCURRENCE_IDX
export const MANY_SEP_IDX = 5 << BITS_FOR_OCCURRENCE_IDX
export const AT_LEAST_ONE_SEP_IDX = 6 << BITS_FOR_OCCURRENCE_IDX
// this actually returns a number, but it is always used as a string (object prop key)
export function getKeyForAutomaticLookahead(
ruleIdx: number,
dslMethodIdx: number,
occurrence: number
): number {
return occurrence | dslMethodIdx | ruleIdx
}
const BITS_START_FOR_ALT_IDX = 32 - BITS_FOR_ALT_IDX

View File

@@ -0,0 +1,140 @@
import {
ILookaheadStrategy,
ILookaheadValidationError,
IOrAlt,
Rule,
TokenType,
OptionalProductionType
} from "@chevrotain/types"
import flatMap from "lodash/flatMap"
import isEmpty from "lodash/isEmpty"
import { defaultGrammarValidatorErrorProvider } from "../errors_public"
import { DEFAULT_PARSER_CONFIG } from "../parser/parser"
import {
validateAmbiguousAlternationAlternatives,
validateEmptyOrAlternative,
validateNoLeftRecursion,
validateSomeNonEmptyLookaheadPath
} from "./checks"
import {
buildAlternativesLookAheadFunc,
buildLookaheadFuncForOptionalProd,
buildLookaheadFuncForOr,
buildSingleAlternativeLookaheadFunction,
getProdType
} from "./lookahead"
import { IParserDefinitionError } from "./types"
export class LLkLookaheadStrategy implements ILookaheadStrategy {
readonly maxLookahead: number
constructor(options?: { maxLookahead?: number }) {
this.maxLookahead =
options?.maxLookahead ?? DEFAULT_PARSER_CONFIG.maxLookahead
}
validate(options: {
rules: Rule[]
tokenTypes: TokenType[]
grammarName: string
}): ILookaheadValidationError[] {
const leftRecursionErrors = this.validateNoLeftRecursion(options.rules)
if (isEmpty(leftRecursionErrors)) {
const emptyAltErrors = this.validateEmptyOrAlternatives(options.rules)
const ambiguousAltsErrors = this.validateAmbiguousAlternationAlternatives(
options.rules,
this.maxLookahead
)
const emptyRepetitionErrors = this.validateSomeNonEmptyLookaheadPath(
options.rules,
this.maxLookahead
)
const allErrors = [
...leftRecursionErrors,
...emptyAltErrors,
...ambiguousAltsErrors,
...emptyRepetitionErrors
]
return allErrors
}
return leftRecursionErrors
}
validateNoLeftRecursion(rules: Rule[]): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateNoLeftRecursion(
currTopRule,
currTopRule,
defaultGrammarValidatorErrorProvider
)
)
}
validateEmptyOrAlternatives(rules: Rule[]): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateEmptyOrAlternative(
currTopRule,
defaultGrammarValidatorErrorProvider
)
)
}
validateAmbiguousAlternationAlternatives(
rules: Rule[],
maxLookahead: number
): IParserDefinitionError[] {
return flatMap(rules, (currTopRule) =>
validateAmbiguousAlternationAlternatives(
currTopRule,
maxLookahead,
defaultGrammarValidatorErrorProvider
)
)
}
validateSomeNonEmptyLookaheadPath(
rules: Rule[],
maxLookahead: number
): IParserDefinitionError[] {
return validateSomeNonEmptyLookaheadPath(
rules,
maxLookahead,
defaultGrammarValidatorErrorProvider
)
}
buildLookaheadForAlternation(options: {
prodOccurrence: number
rule: Rule
maxLookahead: number
hasPredicates: boolean
dynamicTokensEnabled: boolean
}): (orAlts?: IOrAlt<any>[] | undefined) => number | undefined {
return buildLookaheadFuncForOr(
options.prodOccurrence,
options.rule,
options.maxLookahead,
options.hasPredicates,
options.dynamicTokensEnabled,
buildAlternativesLookAheadFunc
)
}
buildLookaheadForOptional(options: {
prodOccurrence: number
prodType: OptionalProductionType
rule: Rule
maxLookahead: number
dynamicTokensEnabled: boolean
}): () => boolean {
return buildLookaheadFuncForOptionalProd(
options.prodOccurrence,
options.rule,
options.maxLookahead,
options.dynamicTokensEnabled,
getProdType(options.prodType),
buildSingleAlternativeLookaheadFunction
)
}
}

View File

@@ -0,0 +1,741 @@
import isEmpty from "lodash/isEmpty"
import flatten from "lodash/flatten"
import every from "lodash/every"
import map from "lodash/map"
import forEach from "lodash/forEach"
import has from "lodash/has"
import reduce from "lodash/reduce"
import { possiblePathsFrom } from "./interpreter"
import { RestWalker } from "./rest"
import { Predicate, TokenMatcher } from "../parser/parser"
import {
tokenStructuredMatcher,
tokenStructuredMatcherNoCategories
} from "../../scan/tokens"
import {
Alternation,
Alternative as AlternativeGAST,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator
} from "@chevrotain/gast"
import { GAstVisitor } from "@chevrotain/gast"
import {
IOrAlt,
IProduction,
IProductionWithOccurrence,
LookaheadSequence,
LookaheadProductionType,
Rule,
TokenType,
BaseParser
} from "@chevrotain/types"
export enum PROD_TYPE {
OPTION,
REPETITION,
REPETITION_MANDATORY,
REPETITION_MANDATORY_WITH_SEPARATOR,
REPETITION_WITH_SEPARATOR,
ALTERNATION
}
export function getProdType(
prod: IProduction | LookaheadProductionType
): PROD_TYPE {
/* istanbul ignore else */
if (prod instanceof Option || prod === "Option") {
return PROD_TYPE.OPTION
} else if (prod instanceof Repetition || prod === "Repetition") {
return PROD_TYPE.REPETITION
} else if (
prod instanceof RepetitionMandatory ||
prod === "RepetitionMandatory"
) {
return PROD_TYPE.REPETITION_MANDATORY
} else if (
prod instanceof RepetitionMandatoryWithSeparator ||
prod === "RepetitionMandatoryWithSeparator"
) {
return PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR
} else if (
prod instanceof RepetitionWithSeparator ||
prod === "RepetitionWithSeparator"
) {
return PROD_TYPE.REPETITION_WITH_SEPARATOR
} else if (prod instanceof Alternation || prod === "Alternation") {
return PROD_TYPE.ALTERNATION
} else {
throw Error("non exhaustive match")
}
}
export function getLookaheadPaths(options: {
occurrence: number
rule: Rule
prodType: LookaheadProductionType
maxLookahead: number
}): LookaheadSequence[] {
const { occurrence, rule, prodType, maxLookahead } = options
const type = getProdType(prodType)
if (type === PROD_TYPE.ALTERNATION) {
return getLookaheadPathsForOr(occurrence, rule, maxLookahead)
} else {
return getLookaheadPathsForOptionalProd(
occurrence,
rule,
type,
maxLookahead
)
}
}
export function buildLookaheadFuncForOr(
occurrence: number,
ruleGrammar: Rule,
maxLookahead: number,
hasPredicates: boolean,
dynamicTokensEnabled: boolean,
laFuncBuilder: Function
): (orAlts?: IOrAlt<any>[]) => number | undefined {
const lookAheadPaths = getLookaheadPathsForOr(
occurrence,
ruleGrammar,
maxLookahead
)
const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths)
? tokenStructuredMatcherNoCategories
: tokenStructuredMatcher
return laFuncBuilder(
lookAheadPaths,
hasPredicates,
tokenMatcher,
dynamicTokensEnabled
)
}
/**
* When dealing with an Optional production (OPTION/MANY/2nd iteration of AT_LEAST_ONE/...) we need to compare
* the lookahead "inside" the production and the lookahead immediately "after" it in the same top level rule (context free).
*
* Example: given a production:
* ABC(DE)?DF
*
* The optional '(DE)?' should only be entered if we see 'DE'. a single Token 'D' is not sufficient to distinguish between the two
* alternatives.
*
* @returns A Lookahead function which will return true IFF the parser should parse the Optional production.
*/
export function buildLookaheadFuncForOptionalProd(
occurrence: number,
ruleGrammar: Rule,
k: number,
dynamicTokensEnabled: boolean,
prodType: PROD_TYPE,
lookaheadBuilder: (
lookAheadSequence: LookaheadSequence,
tokenMatcher: TokenMatcher,
dynamicTokensEnabled: boolean
) => () => boolean
): () => boolean {
const lookAheadPaths = getLookaheadPathsForOptionalProd(
occurrence,
ruleGrammar,
prodType,
k
)
const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths)
? tokenStructuredMatcherNoCategories
: tokenStructuredMatcher
return lookaheadBuilder(lookAheadPaths[0], tokenMatcher, dynamicTokensEnabled)
}
export type Alternative = TokenType[][]
export function buildAlternativesLookAheadFunc(
alts: LookaheadSequence[],
hasPredicates: boolean,
tokenMatcher: TokenMatcher,
dynamicTokensEnabled: boolean
): (orAlts: IOrAlt<any>[]) => number | undefined {
const numOfAlts = alts.length
const areAllOneTokenLookahead = every(alts, (currAlt) => {
return every(currAlt, (currPath) => {
return currPath.length === 1
})
})
// This version takes into account the predicates as well.
if (hasPredicates) {
/**
* @returns {number} - The chosen alternative index
*/
return function (
this: BaseParser,
orAlts: IOrAlt<any>[]
): number | undefined {
// unfortunately the predicates must be extracted every single time
// as they cannot be cached due to references to parameters(vars) which are no longer valid.
// note that in the common case of no predicates, no cpu time will be wasted on this (see else block)
const predicates: (Predicate | undefined)[] = map(
orAlts,
(currAlt) => currAlt.GATE
)
for (let t = 0; t < numOfAlts; t++) {
const currAlt = alts[t]
const currNumOfPaths = currAlt.length
const currPredicate = predicates[t]
if (currPredicate !== undefined && currPredicate.call(this) === false) {
// if the predicate does not match there is no point in checking the paths
continue
}
nextPath: for (let j = 0; j < currNumOfPaths; j++) {
const currPath = currAlt[j]
const currPathLength = currPath.length
for (let i = 0; i < currPathLength; i++) {
const nextToken = this.LA(i + 1)
if (tokenMatcher(nextToken, currPath[i]) === false) {
// mismatch in current path
// try the next pth
continue nextPath
}
}
// found a full path that matches.
// this will also work for an empty ALT as the loop will be skipped
return t
}
// none of the paths for the current alternative matched
// try the next alternative
}
// none of the alternatives could be matched
return undefined
}
} else if (areAllOneTokenLookahead && !dynamicTokensEnabled) {
// optimized (common) case of all the lookaheads paths requiring only
// a single token lookahead. These Optimizations cannot work if dynamically defined Tokens are used.
const singleTokenAlts = map(alts, (currAlt) => {
return flatten(currAlt)
})
const choiceToAlt = reduce(
singleTokenAlts,
(result, currAlt, idx) => {
forEach(currAlt, (currTokType) => {
if (!has(result, currTokType.tokenTypeIdx!)) {
result[currTokType.tokenTypeIdx!] = idx
}
forEach(currTokType.categoryMatches!, (currExtendingType) => {
if (!has(result, currExtendingType)) {
result[currExtendingType] = idx
}
})
})
return result
},
{} as Record<number, number>
)
/**
* @returns {number} - The chosen alternative index
*/
return function (this: BaseParser): number {
const nextToken = this.LA(1)
return choiceToAlt[nextToken.tokenTypeIdx]
}
} else {
// optimized lookahead without needing to check the predicates at all.
// this causes code duplication which is intentional to improve performance.
/**
* @returns {number} - The chosen alternative index
*/
return function (this: BaseParser): number | undefined {
for (let t = 0; t < numOfAlts; t++) {
const currAlt = alts[t]
const currNumOfPaths = currAlt.length
nextPath: for (let j = 0; j < currNumOfPaths; j++) {
const currPath = currAlt[j]
const currPathLength = currPath.length
for (let i = 0; i < currPathLength; i++) {
const nextToken = this.LA(i + 1)
if (tokenMatcher(nextToken, currPath[i]) === false) {
// mismatch in current path
// try the next pth
continue nextPath
}
}
// found a full path that matches.
// this will also work for an empty ALT as the loop will be skipped
return t
}
// none of the paths for the current alternative matched
// try the next alternative
}
// none of the alternatives could be matched
return undefined
}
}
}
export function buildSingleAlternativeLookaheadFunction(
alt: LookaheadSequence,
tokenMatcher: TokenMatcher,
dynamicTokensEnabled: boolean
): () => boolean {
const areAllOneTokenLookahead = every(alt, (currPath) => {
return currPath.length === 1
})
const numOfPaths = alt.length
// optimized (common) case of all the lookaheads paths requiring only
// a single token lookahead.
if (areAllOneTokenLookahead && !dynamicTokensEnabled) {
const singleTokensTypes = flatten(alt)
if (
singleTokensTypes.length === 1 &&
isEmpty((<any>singleTokensTypes[0]).categoryMatches)
) {
const expectedTokenType = singleTokensTypes[0]
const expectedTokenUniqueKey = (<any>expectedTokenType).tokenTypeIdx
return function (this: BaseParser): boolean {
return this.LA(1).tokenTypeIdx === expectedTokenUniqueKey
}
} else {
const choiceToAlt = reduce(
singleTokensTypes,
(result, currTokType, idx) => {
result[currTokType.tokenTypeIdx!] = true
forEach(currTokType.categoryMatches!, (currExtendingType) => {
result[currExtendingType] = true
})
return result
},
[] as boolean[]
)
return function (this: BaseParser): boolean {
const nextToken = this.LA(1)
return choiceToAlt[nextToken.tokenTypeIdx] === true
}
}
} else {
return function (this: BaseParser): boolean {
nextPath: for (let j = 0; j < numOfPaths; j++) {
const currPath = alt[j]
const currPathLength = currPath.length
for (let i = 0; i < currPathLength; i++) {
const nextToken = this.LA(i + 1)
if (tokenMatcher(nextToken, currPath[i]) === false) {
// mismatch in current path
// try the next pth
continue nextPath
}
}
// found a full path that matches.
return true
}
// none of the paths matched
return false
}
}
}
class RestDefinitionFinderWalker extends RestWalker {
private restDef: IProduction[]
constructor(
private topProd: Rule,
private targetOccurrence: number,
private targetProdType: PROD_TYPE
) {
super()
}
startWalking(): IProduction[] {
this.walk(this.topProd)
return this.restDef
}
private checkIsTarget(
node: IProductionWithOccurrence,
expectedProdType: PROD_TYPE,
currRest: IProduction[],
prevRest: IProduction[]
): boolean {
if (
node.idx === this.targetOccurrence &&
this.targetProdType === expectedProdType
) {
this.restDef = currRest.concat(prevRest)
return true
}
// performance optimization, do not iterate over the entire Grammar ast after we have found the target
return false
}
walkOption(
optionProd: Option,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (!this.checkIsTarget(optionProd, PROD_TYPE.OPTION, currRest, prevRest)) {
super.walkOption(optionProd, currRest, prevRest)
}
}
walkAtLeastOne(
atLeastOneProd: RepetitionMandatory,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (
!this.checkIsTarget(
atLeastOneProd,
PROD_TYPE.REPETITION_MANDATORY,
currRest,
prevRest
)
) {
super.walkOption(atLeastOneProd, currRest, prevRest)
}
}
walkAtLeastOneSep(
atLeastOneSepProd: RepetitionMandatoryWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (
!this.checkIsTarget(
atLeastOneSepProd,
PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,
currRest,
prevRest
)
) {
super.walkOption(atLeastOneSepProd, currRest, prevRest)
}
}
walkMany(
manyProd: Repetition,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (
!this.checkIsTarget(manyProd, PROD_TYPE.REPETITION, currRest, prevRest)
) {
super.walkOption(manyProd, currRest, prevRest)
}
}
walkManySep(
manySepProd: RepetitionWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
if (
!this.checkIsTarget(
manySepProd,
PROD_TYPE.REPETITION_WITH_SEPARATOR,
currRest,
prevRest
)
) {
super.walkOption(manySepProd, currRest, prevRest)
}
}
}
/**
* Returns the definition of a target production in a top level level rule.
*/
class InsideDefinitionFinderVisitor extends GAstVisitor {
public result: IProduction[] = []
constructor(
private targetOccurrence: number,
private targetProdType: PROD_TYPE,
private targetRef?: any
) {
super()
}
private checkIsTarget(
node: { definition: IProduction[] } & IProductionWithOccurrence,
expectedProdName: PROD_TYPE
): void {
if (
node.idx === this.targetOccurrence &&
this.targetProdType === expectedProdName &&
(this.targetRef === undefined || node === this.targetRef)
) {
this.result = node.definition
}
}
public visitOption(node: Option): void {
this.checkIsTarget(node, PROD_TYPE.OPTION)
}
public visitRepetition(node: Repetition): void {
this.checkIsTarget(node, PROD_TYPE.REPETITION)
}
public visitRepetitionMandatory(node: RepetitionMandatory): void {
this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY)
}
public visitRepetitionMandatoryWithSeparator(
node: RepetitionMandatoryWithSeparator
): void {
this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR)
}
public visitRepetitionWithSeparator(node: RepetitionWithSeparator): void {
this.checkIsTarget(node, PROD_TYPE.REPETITION_WITH_SEPARATOR)
}
public visitAlternation(node: Alternation): void {
this.checkIsTarget(node, PROD_TYPE.ALTERNATION)
}
}
function initializeArrayOfArrays(size: number): any[][] {
const result = new Array(size)
for (let i = 0; i < size; i++) {
result[i] = []
}
return result
}
/**
* A sort of hash function between a Path in the grammar and a string.
* Note that this returns multiple "hashes" to support the scenario of token categories.
* - A single path with categories may match multiple **actual** paths.
*/
function pathToHashKeys(path: TokenType[]): string[] {
let keys = [""]
for (let i = 0; i < path.length; i++) {
const tokType = path[i]
const longerKeys = []
for (let j = 0; j < keys.length; j++) {
const currShorterKey = keys[j]
longerKeys.push(currShorterKey + "_" + tokType.tokenTypeIdx)
for (let t = 0; t < tokType.categoryMatches!.length; t++) {
const categoriesKeySuffix = "_" + tokType.categoryMatches![t]
longerKeys.push(currShorterKey + categoriesKeySuffix)
}
}
keys = longerKeys
}
return keys
}
/**
* Imperative style due to being called from a hot spot
*/
function isUniquePrefixHash(
altKnownPathsKeys: Record<string, boolean>[],
searchPathKeys: string[],
idx: number
): boolean {
for (
let currAltIdx = 0;
currAltIdx < altKnownPathsKeys.length;
currAltIdx++
) {
// We only want to test vs the other alternatives
if (currAltIdx === idx) {
continue
}
const otherAltKnownPathsKeys = altKnownPathsKeys[currAltIdx]
for (let searchIdx = 0; searchIdx < searchPathKeys.length; searchIdx++) {
const searchKey = searchPathKeys[searchIdx]
if (otherAltKnownPathsKeys[searchKey] === true) {
return false
}
}
}
// None of the SearchPathKeys were found in any of the other alternatives
return true
}
export function lookAheadSequenceFromAlternatives(
altsDefs: IProduction[],
k: number
): LookaheadSequence[] {
const partialAlts = map(altsDefs, (currAlt) =>
possiblePathsFrom([currAlt], 1)
)
const finalResult = initializeArrayOfArrays(partialAlts.length)
const altsHashes = map(partialAlts, (currAltPaths) => {
const dict: { [key: string]: boolean } = {}
forEach(currAltPaths, (item) => {
const keys = pathToHashKeys(item.partialPath)
forEach(keys, (currKey) => {
dict[currKey] = true
})
})
return dict
})
let newData = partialAlts
// maxLookahead loop
for (let pathLength = 1; pathLength <= k; pathLength++) {
const currDataset = newData
newData = initializeArrayOfArrays(currDataset.length)
// alternatives loop
for (let altIdx = 0; altIdx < currDataset.length; altIdx++) {
const currAltPathsAndSuffixes = currDataset[altIdx]
// paths in current alternative loop
for (
let currPathIdx = 0;
currPathIdx < currAltPathsAndSuffixes.length;
currPathIdx++
) {
const currPathPrefix = currAltPathsAndSuffixes[currPathIdx].partialPath
const suffixDef = currAltPathsAndSuffixes[currPathIdx].suffixDef
const prefixKeys = pathToHashKeys(currPathPrefix)
const isUnique = isUniquePrefixHash(altsHashes, prefixKeys, altIdx)
// End of the line for this path.
if (isUnique || isEmpty(suffixDef) || currPathPrefix.length === k) {
const currAltResult = finalResult[altIdx]
// TODO: Can we implement a containsPath using Maps/Dictionaries?
if (containsPath(currAltResult, currPathPrefix) === false) {
currAltResult.push(currPathPrefix)
// Update all new keys for the current path.
for (let j = 0; j < prefixKeys.length; j++) {
const currKey = prefixKeys[j]
altsHashes[altIdx][currKey] = true
}
}
}
// Expand longer paths
else {
const newPartialPathsAndSuffixes = possiblePathsFrom(
suffixDef,
pathLength + 1,
currPathPrefix
)
newData[altIdx] = newData[altIdx].concat(newPartialPathsAndSuffixes)
// Update keys for new known paths
forEach(newPartialPathsAndSuffixes, (item) => {
const prefixKeys = pathToHashKeys(item.partialPath)
forEach(prefixKeys, (key) => {
altsHashes[altIdx][key] = true
})
})
}
}
}
}
return finalResult
}
export function getLookaheadPathsForOr(
occurrence: number,
ruleGrammar: Rule,
k: number,
orProd?: Alternation
): LookaheadSequence[] {
const visitor = new InsideDefinitionFinderVisitor(
occurrence,
PROD_TYPE.ALTERNATION,
orProd
)
ruleGrammar.accept(visitor)
return lookAheadSequenceFromAlternatives(visitor.result, k)
}
export function getLookaheadPathsForOptionalProd(
occurrence: number,
ruleGrammar: Rule,
prodType: PROD_TYPE,
k: number
): LookaheadSequence[] {
const insideDefVisitor = new InsideDefinitionFinderVisitor(
occurrence,
prodType
)
ruleGrammar.accept(insideDefVisitor)
const insideDef = insideDefVisitor.result
const afterDefWalker = new RestDefinitionFinderWalker(
ruleGrammar,
occurrence,
prodType
)
const afterDef = afterDefWalker.startWalking()
const insideFlat = new AlternativeGAST({ definition: insideDef })
const afterFlat = new AlternativeGAST({ definition: afterDef })
return lookAheadSequenceFromAlternatives([insideFlat, afterFlat], k)
}
export function containsPath(
alternative: Alternative,
searchPath: TokenType[]
): boolean {
compareOtherPath: for (let i = 0; i < alternative.length; i++) {
const otherPath = alternative[i]
if (otherPath.length !== searchPath.length) {
continue
}
for (let j = 0; j < otherPath.length; j++) {
const searchTok = searchPath[j]
const otherTok = otherPath[j]
const matchingTokens =
searchTok === otherTok ||
otherTok.categoryMatchesMap![searchTok.tokenTypeIdx!] !== undefined
if (matchingTokens === false) {
continue compareOtherPath
}
}
return true
}
return false
}
export function isStrictPrefixOfPath(
prefix: TokenType[],
other: TokenType[]
): boolean {
return (
prefix.length < other.length &&
every(prefix, (tokType, idx) => {
const otherTokType = other[idx]
return (
tokType === otherTokType ||
otherTokType.categoryMatchesMap![tokType.tokenTypeIdx!]
)
})
)
}
export function areTokenCategoriesNotUsed(
lookAheadPaths: LookaheadSequence[]
): boolean {
return every(lookAheadPaths, (singleAltPaths) =>
every(singleAltPaths, (singlePath) =>
every(singlePath, (token) => isEmpty(token.categoryMatches!))
)
)
}

View File

@@ -0,0 +1,59 @@
import {
IParserUnresolvedRefDefinitionError,
ParserDefinitionErrorType
} from "../parser/parser"
import forEach from "lodash/forEach"
import values from "lodash/values"
import { NonTerminal, Rule } from "@chevrotain/gast"
import { GAstVisitor } from "@chevrotain/gast"
import {
IGrammarResolverErrorMessageProvider,
IParserDefinitionError
} from "./types"
export function resolveGrammar(
topLevels: Record<string, Rule>,
errMsgProvider: IGrammarResolverErrorMessageProvider
): IParserDefinitionError[] {
const refResolver = new GastRefResolverVisitor(topLevels, errMsgProvider)
refResolver.resolveRefs()
return refResolver.errors
}
export class GastRefResolverVisitor extends GAstVisitor {
public errors: IParserUnresolvedRefDefinitionError[] = []
private currTopLevel: Rule
constructor(
private nameToTopRule: Record<string, Rule>,
private errMsgProvider: IGrammarResolverErrorMessageProvider
) {
super()
}
public resolveRefs(): void {
forEach(values(this.nameToTopRule), (prod) => {
this.currTopLevel = prod
prod.accept(this)
})
}
public visitNonTerminal(node: NonTerminal): void {
const ref = this.nameToTopRule[node.nonTerminalName]
if (!ref) {
const msg = this.errMsgProvider.buildRuleNotFoundError(
this.currTopLevel,
node
)
this.errors.push({
message: msg,
type: ParserDefinitionErrorType.UNRESOLVED_SUBRULE_REF,
ruleName: this.currTopLevel.name,
unresolvedRefName: node.nonTerminalName
})
} else {
node.referencedRule = ref
}
}
}

View File

@@ -0,0 +1,164 @@
import drop from "lodash/drop"
import forEach from "lodash/forEach"
import {
Alternation,
Alternative,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Terminal
} from "@chevrotain/gast"
import { IProduction } from "@chevrotain/types"
/**
* A Grammar Walker that computes the "remaining" grammar "after" a productions in the grammar.
*/
export abstract class RestWalker {
walk(prod: { definition: IProduction[] }, prevRest: any[] = []): void {
forEach(prod.definition, (subProd: IProduction, index) => {
const currRest = drop(prod.definition, index + 1)
/* istanbul ignore else */
if (subProd instanceof NonTerminal) {
this.walkProdRef(subProd, currRest, prevRest)
} else if (subProd instanceof Terminal) {
this.walkTerminal(subProd, currRest, prevRest)
} else if (subProd instanceof Alternative) {
this.walkFlat(subProd, currRest, prevRest)
} else if (subProd instanceof Option) {
this.walkOption(subProd, currRest, prevRest)
} else if (subProd instanceof RepetitionMandatory) {
this.walkAtLeastOne(subProd, currRest, prevRest)
} else if (subProd instanceof RepetitionMandatoryWithSeparator) {
this.walkAtLeastOneSep(subProd, currRest, prevRest)
} else if (subProd instanceof RepetitionWithSeparator) {
this.walkManySep(subProd, currRest, prevRest)
} else if (subProd instanceof Repetition) {
this.walkMany(subProd, currRest, prevRest)
} else if (subProd instanceof Alternation) {
this.walkOr(subProd, currRest, prevRest)
} else {
throw Error("non exhaustive match")
}
})
}
walkTerminal(
terminal: Terminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {}
walkProdRef(
refProd: NonTerminal,
currRest: IProduction[],
prevRest: IProduction[]
): void {}
walkFlat(
flatProd: Alternative,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABCDEF => after the D the rest is EF
const fullOrRest = currRest.concat(prevRest)
this.walk(flatProd, <any>fullOrRest)
}
walkOption(
optionProd: Option,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC(DE)?F => after the (DE)? the rest is F
const fullOrRest = currRest.concat(prevRest)
this.walk(optionProd, <any>fullOrRest)
}
walkAtLeastOne(
atLeastOneProd: RepetitionMandatory,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC(DE)+F => after the (DE)+ the rest is (DE)?F
const fullAtLeastOneRest: IProduction[] = [
new Option({ definition: atLeastOneProd.definition })
].concat(<any>currRest, <any>prevRest)
this.walk(atLeastOneProd, fullAtLeastOneRest)
}
walkAtLeastOneSep(
atLeastOneSepProd: RepetitionMandatoryWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC DE(,DE)* F => after the (,DE)+ the rest is (,DE)?F
const fullAtLeastOneSepRest = restForRepetitionWithSeparator(
atLeastOneSepProd,
currRest,
prevRest
)
this.walk(atLeastOneSepProd, fullAtLeastOneSepRest)
}
walkMany(
manyProd: Repetition,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC(DE)*F => after the (DE)* the rest is (DE)?F
const fullManyRest: IProduction[] = [
new Option({ definition: manyProd.definition })
].concat(<any>currRest, <any>prevRest)
this.walk(manyProd, fullManyRest)
}
walkManySep(
manySepProd: RepetitionWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC (DE(,DE)*)? F => after the (,DE)* the rest is (,DE)?F
const fullManySepRest = restForRepetitionWithSeparator(
manySepProd,
currRest,
prevRest
)
this.walk(manySepProd, fullManySepRest)
}
walkOr(
orProd: Alternation,
currRest: IProduction[],
prevRest: IProduction[]
): void {
// ABC(D|E|F)G => when finding the (D|E|F) the rest is G
const fullOrRest = currRest.concat(prevRest)
// walk all different alternatives
forEach(orProd.definition, (alt) => {
// wrapping each alternative in a single definition wrapper
// to avoid errors in computing the rest of that alternative in the invocation to computeInProdFollows
// (otherwise for OR([alt1,alt2]) alt2 will be considered in 'rest' of alt1
const prodWrapper = new Alternative({ definition: [alt] })
this.walk(prodWrapper, <any>fullOrRest)
})
}
}
function restForRepetitionWithSeparator(
repSepProd: RepetitionWithSeparator,
currRest: IProduction[],
prevRest: IProduction[]
) {
const repSepRest = [
new Option({
definition: [
new Terminal({ terminalType: repSepProd.separator }) as IProduction
].concat(repSepProd.definition)
}) as IProduction
]
const fullRepSepRest: IProduction[] = repSepRest.concat(currRest, prevRest)
return fullRepSepRest
}

View File

@@ -0,0 +1,94 @@
import {
Alternation,
IProductionWithOccurrence,
NonTerminal,
Rule,
TokenType
} from "@chevrotain/types"
export interface IParserDefinitionError {
message: string
type: ParserDefinitionErrorType
ruleName?: string
}
export declare enum ParserDefinitionErrorType {
INVALID_RULE_NAME = 0,
DUPLICATE_RULE_NAME = 1,
INVALID_RULE_OVERRIDE = 2,
DUPLICATE_PRODUCTIONS = 3,
UNRESOLVED_SUBRULE_REF = 4,
LEFT_RECURSION = 5,
NONE_LAST_EMPTY_ALT = 6,
AMBIGUOUS_ALTS = 7,
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
INVALID_TOKEN_NAME = 9,
NO_NON_EMPTY_LOOKAHEAD = 10,
AMBIGUOUS_PREFIX_ALTS = 11,
TOO_MANY_ALTS = 12,
CUSTOM_LOOKAHEAD_VALIDATION = 13
}
export interface IGrammarValidatorErrorMessageProvider {
buildDuplicateFoundError(
topLevelRule: Rule,
duplicateProds: IProductionWithOccurrence[]
): string
buildNamespaceConflictError(topLevelRule: Rule): string
buildAlternationPrefixAmbiguityError(options: {
topLevelRule: Rule
prefixPath: TokenType[]
ambiguityIndices: number[]
alternation: Alternation
}): string
buildAlternationAmbiguityError(options: {
topLevelRule: Rule
prefixPath: TokenType[]
ambiguityIndices: number[]
alternation: Alternation
}): string
buildEmptyRepetitionError(options: {
topLevelRule: Rule
repetition: IProductionWithOccurrence
}): string
/**
* @deprecated - There are no longer constraints on Token names
* This method will be removed from the interface in future versions.
* Providing it will currently have no impact on the runtime.
*/
buildTokenNameError(options: {
tokenType: TokenType
expectedPattern: RegExp
}): any
buildEmptyAlternationError(options: {
topLevelRule: Rule
alternation: Alternation
emptyChoiceIdx: number
}): any
buildTooManyAlternativesError(options: {
topLevelRule: Rule
alternation: Alternation
}): string
buildLeftRecursionError(options: {
topLevelRule: Rule
leftRecursionPath: Rule[]
}): string
/**
* @deprecated - There are no longer constraints on Rule names
* This method will be removed from the interface in future versions.
* Providing it will currently have no impact on the runtime.
*/
buildInvalidRuleNameError(options: {
topLevelRule: Rule
expectedPattern: RegExp
}): string
buildDuplicateRuleNameError(options: {
topLevelRule: Rule | string
grammarName: string
}): string
}
export interface IGrammarResolverErrorMessageProvider {
buildRuleNotFoundError(topLevelRule: Rule, undefinedRule: NonTerminal): string
}

View File

@@ -0,0 +1,323 @@
import isEmpty from "lodash/isEmpty"
import map from "lodash/map"
import forEach from "lodash/forEach"
import values from "lodash/values"
import has from "lodash/has"
import clone from "lodash/clone"
import { toFastProperties } from "@chevrotain/utils"
import { computeAllProdsFollows } from "../grammar/follow"
import { createTokenInstance, EOF } from "../../scan/tokens_public"
import {
defaultGrammarValidatorErrorProvider,
defaultParserErrorProvider
} from "../errors_public"
import {
resolveGrammar,
validateGrammar
} from "../grammar/gast/gast_resolver_public"
import {
CstNode,
IParserConfig,
IRecognitionException,
IRuleConfig,
IToken,
TokenType,
TokenVocabulary
} from "@chevrotain/types"
import { Recoverable } from "./traits/recoverable"
import { LooksAhead } from "./traits/looksahead"
import { TreeBuilder } from "./traits/tree_builder"
import { LexerAdapter } from "./traits/lexer_adapter"
import { RecognizerApi } from "./traits/recognizer_api"
import { RecognizerEngine } from "./traits/recognizer_engine"
import { ErrorHandler } from "./traits/error_handler"
import { MixedInParser } from "./traits/parser_traits"
import { ContentAssist } from "./traits/context_assist"
import { GastRecorder } from "./traits/gast_recorder"
import { PerformanceTracer } from "./traits/perf_tracer"
import { applyMixins } from "./utils/apply_mixins"
import { IParserDefinitionError } from "../grammar/types"
import { Rule } from "@chevrotain/gast"
import { IParserConfigInternal, ParserMethodInternal } from "./types"
import { validateLookahead } from "../grammar/checks"
export const END_OF_FILE = createTokenInstance(
EOF,
"",
NaN,
NaN,
NaN,
NaN,
NaN,
NaN
)
Object.freeze(END_OF_FILE)
export type TokenMatcher = (token: IToken, tokType: TokenType) => boolean
export const DEFAULT_PARSER_CONFIG: Required<
Omit<IParserConfigInternal, "lookaheadStrategy">
> = Object.freeze({
recoveryEnabled: false,
maxLookahead: 3,
dynamicTokensEnabled: false,
outputCst: true,
errorMessageProvider: defaultParserErrorProvider,
nodeLocationTracking: "none",
traceInitPerf: false,
skipValidations: false
})
export const DEFAULT_RULE_CONFIG: Required<IRuleConfig<any>> = Object.freeze({
recoveryValueFunc: () => undefined,
resyncEnabled: true
})
export enum ParserDefinitionErrorType {
INVALID_RULE_NAME = 0,
DUPLICATE_RULE_NAME = 1,
INVALID_RULE_OVERRIDE = 2,
DUPLICATE_PRODUCTIONS = 3,
UNRESOLVED_SUBRULE_REF = 4,
LEFT_RECURSION = 5,
NONE_LAST_EMPTY_ALT = 6,
AMBIGUOUS_ALTS = 7,
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
INVALID_TOKEN_NAME = 9,
NO_NON_EMPTY_LOOKAHEAD = 10,
AMBIGUOUS_PREFIX_ALTS = 11,
TOO_MANY_ALTS = 12,
CUSTOM_LOOKAHEAD_VALIDATION = 13
}
export interface IParserDuplicatesDefinitionError
extends IParserDefinitionError {
dslName: string
occurrence: number
parameter?: string
}
export interface IParserEmptyAlternativeDefinitionError
extends IParserDefinitionError {
occurrence: number
alternative: number
}
export interface IParserAmbiguousAlternativesDefinitionError
extends IParserDefinitionError {
occurrence: number | string
alternatives: number[]
}
export interface IParserUnresolvedRefDefinitionError
extends IParserDefinitionError {
unresolvedRefName: string
}
export interface IParserState {
errors: IRecognitionException[]
lexerState: any
RULE_STACK: number[]
CST_STACK: CstNode[]
}
export type Predicate = () => boolean
export function EMPTY_ALT(): () => undefined
export function EMPTY_ALT<T>(value: T): () => T
export function EMPTY_ALT(value: any = undefined) {
return function () {
return value
}
}
export class Parser {
// Set this flag to true if you don't want the Parser to throw error when problems in it's definition are detected.
// (normally during the parser's constructor).
// This is a design time flag, it will not affect the runtime error handling of the parser, just design time errors,
// for example: duplicate rule names, referencing an unresolved subrule, ect...
// This flag should not be enabled during normal usage, it is used in special situations, for example when
// needing to display the parser definition errors in some GUI(online playground).
static DEFER_DEFINITION_ERRORS_HANDLING: boolean = false
/**
* @deprecated use the **instance** method with the same name instead
*/
static performSelfAnalysis(parserInstance: Parser): void {
throw Error(
"The **static** `performSelfAnalysis` method has been deprecated." +
"\t\nUse the **instance** method with the same name instead."
)
}
public performSelfAnalysis(this: MixedInParser): void {
this.TRACE_INIT("performSelfAnalysis", () => {
let defErrorsMsgs
this.selfAnalysisDone = true
const className = this.className
this.TRACE_INIT("toFastProps", () => {
// Without this voodoo magic the parser would be x3-x4 slower
// It seems it is better to invoke `toFastProperties` **before**
// Any manipulations of the `this` object done during the recording phase.
toFastProperties(this)
})
this.TRACE_INIT("Grammar Recording", () => {
try {
this.enableRecording()
// Building the GAST
forEach(this.definedRulesNames, (currRuleName) => {
const wrappedRule = (this as any)[
currRuleName
] as ParserMethodInternal<unknown[], unknown>
const originalGrammarAction = wrappedRule["originalGrammarAction"]
let recordedRuleGast!: Rule
this.TRACE_INIT(`${currRuleName} Rule`, () => {
recordedRuleGast = this.topLevelRuleRecord(
currRuleName,
originalGrammarAction
)
})
this.gastProductionsCache[currRuleName] = recordedRuleGast
})
} finally {
this.disableRecording()
}
})
let resolverErrors: IParserDefinitionError[] = []
this.TRACE_INIT("Grammar Resolving", () => {
resolverErrors = resolveGrammar({
rules: values(this.gastProductionsCache)
})
this.definitionErrors = this.definitionErrors.concat(resolverErrors)
})
this.TRACE_INIT("Grammar Validations", () => {
// only perform additional grammar validations IFF no resolving errors have occurred.
// as unresolved grammar may lead to unhandled runtime exceptions in the follow up validations.
if (isEmpty(resolverErrors) && this.skipValidations === false) {
const validationErrors = validateGrammar({
rules: values(this.gastProductionsCache),
tokenTypes: values(this.tokensMap),
errMsgProvider: defaultGrammarValidatorErrorProvider,
grammarName: className
})
const lookaheadValidationErrors = validateLookahead({
lookaheadStrategy: this.lookaheadStrategy,
rules: values(this.gastProductionsCache),
tokenTypes: values(this.tokensMap),
grammarName: className
})
this.definitionErrors = this.definitionErrors.concat(
validationErrors,
lookaheadValidationErrors
)
}
})
// this analysis may fail if the grammar is not perfectly valid
if (isEmpty(this.definitionErrors)) {
// The results of these computations are not needed unless error recovery is enabled.
if (this.recoveryEnabled) {
this.TRACE_INIT("computeAllProdsFollows", () => {
const allFollows = computeAllProdsFollows(
values(this.gastProductionsCache)
)
this.resyncFollows = allFollows
})
}
this.TRACE_INIT("ComputeLookaheadFunctions", () => {
this.lookaheadStrategy.initialize?.({
rules: values(this.gastProductionsCache)
})
this.preComputeLookaheadFunctions(values(this.gastProductionsCache))
})
}
if (
!Parser.DEFER_DEFINITION_ERRORS_HANDLING &&
!isEmpty(this.definitionErrors)
) {
defErrorsMsgs = map(
this.definitionErrors,
(defError) => defError.message
)
throw new Error(
`Parser Definition Errors detected:\n ${defErrorsMsgs.join(
"\n-------------------------------\n"
)}`
)
}
})
}
definitionErrors: IParserDefinitionError[] = []
selfAnalysisDone = false
protected skipValidations: boolean
constructor(tokenVocabulary: TokenVocabulary, config: IParserConfig) {
const that: MixedInParser = this as any
that.initErrorHandler(config)
that.initLexerAdapter()
that.initLooksAhead(config)
that.initRecognizerEngine(tokenVocabulary, config)
that.initRecoverable(config)
that.initTreeBuilder(config)
that.initContentAssist()
that.initGastRecorder(config)
that.initPerformanceTracer(config)
if (has(config, "ignoredIssues")) {
throw new Error(
"The <ignoredIssues> IParserConfig property has been deprecated.\n\t" +
"Please use the <IGNORE_AMBIGUITIES> flag on the relevant DSL method instead.\n\t" +
"See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#IGNORING_AMBIGUITIES\n\t" +
"For further details."
)
}
this.skipValidations = has(config, "skipValidations")
? (config.skipValidations as boolean) // casting assumes the end user passing the correct type
: DEFAULT_PARSER_CONFIG.skipValidations
}
}
applyMixins(Parser, [
Recoverable,
LooksAhead,
TreeBuilder,
LexerAdapter,
RecognizerEngine,
RecognizerApi,
ErrorHandler,
ContentAssist,
GastRecorder,
PerformanceTracer
])
export class CstParser extends Parser {
constructor(
tokenVocabulary: TokenVocabulary,
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG
) {
const configClone = clone(config)
configClone.outputCst = true
super(tokenVocabulary, configClone)
}
}
export class EmbeddedActionsParser extends Parser {
constructor(
tokenVocabulary: TokenVocabulary,
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG
) {
const configClone = clone(config)
configClone.outputCst = false
super(tokenVocabulary, configClone)
}
}

View File

@@ -0,0 +1,52 @@
import {
ISyntacticContentAssistPath,
IToken,
ITokenGrammarPath,
TokenType
} from "@chevrotain/types"
import {
NextAfterTokenWalker,
nextPossibleTokensAfter
} from "../../grammar/interpreter"
import first from "lodash/first"
import isUndefined from "lodash/isUndefined"
import { MixedInParser } from "./parser_traits"
export class ContentAssist {
initContentAssist() {}
public computeContentAssist(
this: MixedInParser,
startRuleName: string,
precedingInput: IToken[]
): ISyntacticContentAssistPath[] {
const startRuleGast = this.gastProductionsCache[startRuleName]
if (isUndefined(startRuleGast)) {
throw Error(`Rule ->${startRuleName}<- does not exist in this grammar.`)
}
return nextPossibleTokensAfter(
[startRuleGast],
precedingInput,
this.tokenMatcher,
this.maxLookahead
)
}
// TODO: should this be a member method or a utility? it does not have any state or usage of 'this'...
// TODO: should this be more explicitly part of the public API?
public getNextPossibleTokenTypes(
this: MixedInParser,
grammarPath: ITokenGrammarPath
): TokenType[] {
const topRuleName = first(grammarPath.ruleStack)!
const gastProductions = this.getGAstProductions()
const topProduction = gastProductions[topRuleName]
const nextPossibleTokenTypes = new NextAfterTokenWalker(
topProduction,
grammarPath
).startWalking()
return nextPossibleTokenTypes
}
}

View File

@@ -0,0 +1,123 @@
import {
IParserConfig,
IParserErrorMessageProvider,
IRecognitionException
} from "@chevrotain/types"
import {
EarlyExitException,
isRecognitionException,
NoViableAltException
} from "../../exceptions_public"
import has from "lodash/has"
import clone from "lodash/clone"
import {
getLookaheadPathsForOptionalProd,
getLookaheadPathsForOr,
PROD_TYPE
} from "../../grammar/lookahead"
import { MixedInParser } from "./parser_traits"
import { DEFAULT_PARSER_CONFIG } from "../parser"
/**
* Trait responsible for runtime parsing errors.
*/
export class ErrorHandler {
_errors: IRecognitionException[]
errorMessageProvider: IParserErrorMessageProvider
initErrorHandler(config: IParserConfig) {
this._errors = []
this.errorMessageProvider = has(config, "errorMessageProvider")
? (config.errorMessageProvider as IParserErrorMessageProvider) // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.errorMessageProvider
}
SAVE_ERROR(
this: MixedInParser,
error: IRecognitionException
): IRecognitionException {
if (isRecognitionException(error)) {
error.context = {
ruleStack: this.getHumanReadableRuleStack(),
ruleOccurrenceStack: clone(this.RULE_OCCURRENCE_STACK)
}
this._errors.push(error)
return error
} else {
throw Error("Trying to save an Error which is not a RecognitionException")
}
}
get errors(): IRecognitionException[] {
return clone(this._errors)
}
set errors(newErrors: IRecognitionException[]) {
this._errors = newErrors
}
// TODO: consider caching the error message computed information
raiseEarlyExitException(
this: MixedInParser,
occurrence: number,
prodType: PROD_TYPE,
userDefinedErrMsg: string | undefined
): never {
const ruleName = this.getCurrRuleFullName()
const ruleGrammar = this.getGAstProductions()[ruleName]
const lookAheadPathsPerAlternative = getLookaheadPathsForOptionalProd(
occurrence,
ruleGrammar,
prodType,
this.maxLookahead
)
const insideProdPaths = lookAheadPathsPerAlternative[0]
const actualTokens = []
for (let i = 1; i <= this.maxLookahead; i++) {
actualTokens.push(this.LA(i))
}
const msg = this.errorMessageProvider.buildEarlyExitMessage({
expectedIterationPaths: insideProdPaths,
actual: actualTokens,
previous: this.LA(0),
customUserDescription: userDefinedErrMsg,
ruleName: ruleName
})
throw this.SAVE_ERROR(new EarlyExitException(msg, this.LA(1), this.LA(0)))
}
// TODO: consider caching the error message computed information
raiseNoAltException(
this: MixedInParser,
occurrence: number,
errMsgTypes: string | undefined
): never {
const ruleName = this.getCurrRuleFullName()
const ruleGrammar = this.getGAstProductions()[ruleName]
// TODO: getLookaheadPathsForOr can be slow for large enough maxLookahead and certain grammars, consider caching ?
const lookAheadPathsPerAlternative = getLookaheadPathsForOr(
occurrence,
ruleGrammar,
this.maxLookahead
)
const actualTokens = []
for (let i = 1; i <= this.maxLookahead; i++) {
actualTokens.push(this.LA(i))
}
const previousToken = this.LA(0)
const errMsg = this.errorMessageProvider.buildNoViableAltMessage({
expectedPathsPerAlt: lookAheadPathsPerAlternative,
actual: actualTokens,
previous: previousToken,
customUserDescription: errMsgTypes,
ruleName: this.getCurrRuleFullName()
})
throw this.SAVE_ERROR(
new NoViableAltException(errMsg, this.LA(1), previousToken)
)
}
}

View File

@@ -0,0 +1,446 @@
import {
AtLeastOneSepMethodOpts,
ConsumeMethodOpts,
CstNode,
DSLMethodOpts,
DSLMethodOptsWithErr,
GrammarAction,
IOrAlt,
IParserConfig,
IProduction,
IToken,
ManySepMethodOpts,
OrMethodOpts,
SubruleMethodOpts,
TokenType
} from "@chevrotain/types"
import peek from "lodash/last"
import isArray from "lodash/isArray"
import some from "lodash/some"
import forEach from "lodash/forEach"
import isFunction from "lodash/isFunction"
import has from "lodash/has"
import { MixedInParser } from "./parser_traits"
import {
Alternation,
Alternative,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule,
Terminal
} from "@chevrotain/gast"
import { Lexer } from "../../../scan/lexer_public"
import { augmentTokenTypes, hasShortKeyProperty } from "../../../scan/tokens"
import { createToken, createTokenInstance } from "../../../scan/tokens_public"
import { END_OF_FILE } from "../parser"
import { BITS_FOR_OCCURRENCE_IDX } from "../../grammar/keys"
import { ParserMethodInternal } from "../types"
type ProdWithDef = IProduction & { definition?: IProduction[] }
const RECORDING_NULL_OBJECT = {
description: "This Object indicates the Parser is during Recording Phase"
}
Object.freeze(RECORDING_NULL_OBJECT)
const HANDLE_SEPARATOR = true
const MAX_METHOD_IDX = Math.pow(2, BITS_FOR_OCCURRENCE_IDX) - 1
const RFT = createToken({ name: "RECORDING_PHASE_TOKEN", pattern: Lexer.NA })
augmentTokenTypes([RFT])
const RECORDING_PHASE_TOKEN = createTokenInstance(
RFT,
"This IToken indicates the Parser is in Recording Phase\n\t" +
"" +
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
// Using "-1" instead of NaN (as in EOF) because an actual number is less likely to
// cause errors if the output of LA or CONSUME would be (incorrectly) used during the recording phase.
-1,
-1,
-1,
-1,
-1,
-1
)
Object.freeze(RECORDING_PHASE_TOKEN)
const RECORDING_PHASE_CSTNODE: CstNode = {
name:
"This CSTNode indicates the Parser is in Recording Phase\n\t" +
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
children: {}
}
/**
* This trait handles the creation of the GAST structure for Chevrotain Grammars
*/
export class GastRecorder {
recordingProdStack: ProdWithDef[]
RECORDING_PHASE: boolean
initGastRecorder(this: MixedInParser, config: IParserConfig): void {
this.recordingProdStack = []
this.RECORDING_PHASE = false
}
enableRecording(this: MixedInParser): void {
this.RECORDING_PHASE = true
this.TRACE_INIT("Enable Recording", () => {
/**
* Warning Dark Voodoo Magic upcoming!
* We are "replacing" the public parsing DSL methods API
* With **new** alternative implementations on the Parser **instance**
*
* So far this is the only way I've found to avoid performance regressions during parsing time.
* - Approx 30% performance regression was measured on Chrome 75 Canary when attempting to replace the "internal"
* implementations directly instead.
*/
for (let i = 0; i < 10; i++) {
const idx = i > 0 ? i : ""
this[`CONSUME${idx}` as "CONSUME"] = function (arg1, arg2) {
return this.consumeInternalRecord(arg1, i, arg2)
}
this[`SUBRULE${idx}` as "SUBRULE"] = function (arg1, arg2) {
return this.subruleInternalRecord(arg1, i, arg2) as any
}
this[`OPTION${idx}` as "OPTION"] = function (arg1) {
return this.optionInternalRecord(arg1, i)
}
this[`OR${idx}` as "OR"] = function (arg1) {
return this.orInternalRecord(arg1, i)
}
this[`MANY${idx}` as "MANY"] = function (arg1) {
this.manyInternalRecord(i, arg1)
}
this[`MANY_SEP${idx}` as "MANY_SEP"] = function (arg1) {
this.manySepFirstInternalRecord(i, arg1)
}
this[`AT_LEAST_ONE${idx}` as "AT_LEAST_ONE"] = function (arg1) {
this.atLeastOneInternalRecord(i, arg1)
}
this[`AT_LEAST_ONE_SEP${idx}` as "AT_LEAST_ONE_SEP"] = function (arg1) {
this.atLeastOneSepFirstInternalRecord(i, arg1)
}
}
// DSL methods with the idx(suffix) as an argument
this[`consume`] = function (idx, arg1, arg2) {
return this.consumeInternalRecord(arg1, idx, arg2)
}
this[`subrule`] = function (idx, arg1, arg2) {
return this.subruleInternalRecord(arg1, idx, arg2) as any
}
this[`option`] = function (idx, arg1) {
return this.optionInternalRecord(arg1, idx)
}
this[`or`] = function (idx, arg1) {
return this.orInternalRecord(arg1, idx)
}
this[`many`] = function (idx, arg1) {
this.manyInternalRecord(idx, arg1)
}
this[`atLeastOne`] = function (idx, arg1) {
this.atLeastOneInternalRecord(idx, arg1)
}
this.ACTION = this.ACTION_RECORD
this.BACKTRACK = this.BACKTRACK_RECORD
this.LA = this.LA_RECORD
})
}
disableRecording(this: MixedInParser) {
this.RECORDING_PHASE = false
// By deleting these **instance** properties, any future invocation
// will be deferred to the original methods on the **prototype** object
// This seems to get rid of any incorrect optimizations that V8 may
// do during the recording phase.
this.TRACE_INIT("Deleting Recording methods", () => {
const that: any = this
for (let i = 0; i < 10; i++) {
const idx = i > 0 ? i : ""
delete that[`CONSUME${idx}`]
delete that[`SUBRULE${idx}`]
delete that[`OPTION${idx}`]
delete that[`OR${idx}`]
delete that[`MANY${idx}`]
delete that[`MANY_SEP${idx}`]
delete that[`AT_LEAST_ONE${idx}`]
delete that[`AT_LEAST_ONE_SEP${idx}`]
}
delete that[`consume`]
delete that[`subrule`]
delete that[`option`]
delete that[`or`]
delete that[`many`]
delete that[`atLeastOne`]
delete that.ACTION
delete that.BACKTRACK
delete that.LA
})
}
// Parser methods are called inside an ACTION?
// Maybe try/catch/finally on ACTIONS while disabling the recorders state changes?
// @ts-expect-error -- noop place holder
ACTION_RECORD<T>(this: MixedInParser, impl: () => T): T {
// NO-OP during recording
}
// Executing backtracking logic will break our recording logic assumptions
BACKTRACK_RECORD<T>(
grammarRule: (...args: any[]) => T,
args?: any[]
): () => boolean {
return () => true
}
// LA is part of the official API and may be used for custom lookahead logic
// by end users who may forget to wrap it in ACTION or inside a GATE
LA_RECORD(howMuch: number): IToken {
// We cannot use the RECORD_PHASE_TOKEN here because someone may depend
// On LA return EOF at the end of the input so an infinite loop may occur.
return END_OF_FILE
}
topLevelRuleRecord(name: string, def: Function): Rule {
try {
const newTopLevelRule = new Rule({ definition: [], name: name })
newTopLevelRule.name = name
this.recordingProdStack.push(newTopLevelRule)
def.call(this)
this.recordingProdStack.pop()
return newTopLevelRule
} catch (originalError) {
if (originalError.KNOWN_RECORDER_ERROR !== true) {
try {
originalError.message =
originalError.message +
'\n\t This error was thrown during the "grammar recording phase" For more info see:\n\t' +
"https://chevrotain.io/docs/guide/internals.html#grammar-recording"
} catch (mutabilityError) {
// We may not be able to modify the original error object
throw originalError
}
}
throw originalError
}
}
// Implementation of parsing DSL
optionInternalRecord<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number
): OUT {
return recordProd.call(this, Option, actionORMethodDef, occurrence)
}
atLeastOneInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
recordProd.call(this, RepetitionMandatory, actionORMethodDef, occurrence)
}
atLeastOneSepFirstInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
options: AtLeastOneSepMethodOpts<OUT>
): void {
recordProd.call(
this,
RepetitionMandatoryWithSeparator,
options,
occurrence,
HANDLE_SEPARATOR
)
}
manyInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
recordProd.call(this, Repetition, actionORMethodDef, occurrence)
}
manySepFirstInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
options: ManySepMethodOpts<OUT>
): void {
recordProd.call(
this,
RepetitionWithSeparator,
options,
occurrence,
HANDLE_SEPARATOR
)
}
orInternalRecord<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
occurrence: number
): T {
return recordOrProd.call(this, altsOrOpts, occurrence)
}
subruleInternalRecord<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
occurrence: number,
options?: SubruleMethodOpts<ARGS>
): R | CstNode {
assertMethodIdxIsValid(occurrence)
if (!ruleToCall || has(ruleToCall, "ruleName") === false) {
const error: any = new Error(
`<SUBRULE${getIdxSuffix(occurrence)}> argument is invalid` +
` expecting a Parser method reference but got: <${JSON.stringify(
ruleToCall
)}>` +
`\n inside top level rule: <${
(<Rule>this.recordingProdStack[0]).name
}>`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
const prevProd: any = peek(this.recordingProdStack)
const ruleName = ruleToCall.ruleName
const newNoneTerminal = new NonTerminal({
idx: occurrence,
nonTerminalName: ruleName,
label: options?.LABEL,
// The resolving of the `referencedRule` property will be done once all the Rule's GASTs have been created
referencedRule: undefined
})
prevProd.definition.push(newNoneTerminal)
return this.outputCst ? RECORDING_PHASE_CSTNODE : <any>RECORDING_NULL_OBJECT
}
consumeInternalRecord(
this: MixedInParser,
tokType: TokenType,
occurrence: number,
options?: ConsumeMethodOpts
): IToken {
assertMethodIdxIsValid(occurrence)
if (!hasShortKeyProperty(tokType)) {
const error: any = new Error(
`<CONSUME${getIdxSuffix(occurrence)}> argument is invalid` +
` expecting a TokenType reference but got: <${JSON.stringify(
tokType
)}>` +
`\n inside top level rule: <${
(<Rule>this.recordingProdStack[0]).name
}>`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
const prevProd: any = peek(this.recordingProdStack)
const newNoneTerminal = new Terminal({
idx: occurrence,
terminalType: tokType,
label: options?.LABEL
})
prevProd.definition.push(newNoneTerminal)
return RECORDING_PHASE_TOKEN
}
}
function recordProd(
prodConstructor: any,
mainProdArg: any,
occurrence: number,
handleSep: boolean = false
): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
const grammarAction = isFunction(mainProdArg) ? mainProdArg : mainProdArg.DEF
const newProd = new prodConstructor({ definition: [], idx: occurrence })
if (handleSep) {
newProd.separator = mainProdArg.SEP
}
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
newProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
}
this.recordingProdStack.push(newProd)
grammarAction.call(this)
prevProd.definition.push(newProd)
this.recordingProdStack.pop()
return RECORDING_NULL_OBJECT
}
function recordOrProd(mainProdArg: any, occurrence: number): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
// Only an array of alternatives
const hasOptions = isArray(mainProdArg) === false
const alts: IOrAlt<unknown>[] =
hasOptions === false ? mainProdArg : mainProdArg.DEF
const newOrProd = new Alternation({
definition: [],
idx: occurrence,
ignoreAmbiguities: hasOptions && mainProdArg.IGNORE_AMBIGUITIES === true
})
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
newOrProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
}
const hasPredicates = some(alts, (currAlt: any) => isFunction(currAlt.GATE))
newOrProd.hasPredicates = hasPredicates
prevProd.definition.push(newOrProd)
forEach(alts, (currAlt) => {
const currAltFlat = new Alternative({ definition: [] })
newOrProd.definition.push(currAltFlat)
if (has(currAlt, "IGNORE_AMBIGUITIES")) {
currAltFlat.ignoreAmbiguities = currAlt.IGNORE_AMBIGUITIES as boolean // assumes end user provides the correct config value/type
}
// **implicit** ignoreAmbiguities due to usage of gate
else if (has(currAlt, "GATE")) {
currAltFlat.ignoreAmbiguities = true
}
this.recordingProdStack.push(currAltFlat)
currAlt.ALT.call(this)
this.recordingProdStack.pop()
})
return RECORDING_NULL_OBJECT
}
function getIdxSuffix(idx: number): string {
return idx === 0 ? "" : `${idx}`
}
function assertMethodIdxIsValid(idx: number): void {
if (idx < 0 || idx > MAX_METHOD_IDX) {
const error: any = new Error(
// The stack trace will contain all the needed details
`Invalid DSL Method idx value: <${idx}>\n\t` +
`Idx value must be a none negative value smaller than ${
MAX_METHOD_IDX + 1
}`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
}

View File

@@ -0,0 +1,86 @@
import { END_OF_FILE } from "../parser"
import { IToken } from "@chevrotain/types"
import { MixedInParser } from "./parser_traits"
/**
* Trait responsible abstracting over the interaction with Lexer output (Token vector).
*
* This could be generalized to support other kinds of lexers, e.g.
* - Just in Time Lexing / Lexer-Less parsing.
* - Streaming Lexer.
*/
export class LexerAdapter {
tokVector: IToken[]
tokVectorLength: number
currIdx: number
initLexerAdapter() {
this.tokVector = []
this.tokVectorLength = 0
this.currIdx = -1
}
set input(newInput: IToken[]) {
// @ts-ignore - `this parameter` not supported in setters/getters
// - https://www.typescriptlang.org/docs/handbook/functions.html#this-parameters
if (this.selfAnalysisDone !== true) {
throw Error(
`Missing <performSelfAnalysis> invocation at the end of the Parser's constructor.`
)
}
// @ts-ignore - `this parameter` not supported in setters/getters
// - https://www.typescriptlang.org/docs/handbook/functions.html#this-parameters
this.reset()
this.tokVector = newInput
this.tokVectorLength = newInput.length
}
get input(): IToken[] {
return this.tokVector
}
// skips a token and returns the next token
SKIP_TOKEN(this: MixedInParser): IToken {
if (this.currIdx <= this.tokVector.length - 2) {
this.consumeToken()
return this.LA(1)
} else {
return END_OF_FILE
}
}
// Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
// or lexers dependent on parser context.
LA(this: MixedInParser, howMuch: number): IToken {
const soughtIdx = this.currIdx + howMuch
if (soughtIdx < 0 || this.tokVectorLength <= soughtIdx) {
return END_OF_FILE
} else {
return this.tokVector[soughtIdx]
}
}
consumeToken(this: MixedInParser) {
this.currIdx++
}
exportLexerState(this: MixedInParser): number {
return this.currIdx
}
importLexerState(this: MixedInParser, newState: number) {
this.currIdx = newState
}
resetLexerState(this: MixedInParser): void {
this.currIdx = -1
}
moveToTerminatedState(this: MixedInParser): void {
this.currIdx = this.tokVector.length - 1
}
getLexerPosition(this: MixedInParser): number {
return this.exportLexerState()
}
}

View File

@@ -0,0 +1,270 @@
import forEach from "lodash/forEach"
import has from "lodash/has"
import { DEFAULT_PARSER_CONFIG } from "../parser"
import {
ILookaheadStrategy,
IParserConfig,
OptionalProductionType
} from "@chevrotain/types"
import {
AT_LEAST_ONE_IDX,
AT_LEAST_ONE_SEP_IDX,
getKeyForAutomaticLookahead,
MANY_IDX,
MANY_SEP_IDX,
OPTION_IDX,
OR_IDX
} from "../../grammar/keys"
import { MixedInParser } from "./parser_traits"
import {
Alternation,
GAstVisitor,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule
} from "@chevrotain/gast"
import { getProductionDslName } from "@chevrotain/gast"
import { LLkLookaheadStrategy } from "../../grammar/llk_lookahead"
/**
* Trait responsible for the lookahead related utilities and optimizations.
*/
export class LooksAhead {
maxLookahead: number
lookAheadFuncsCache: any
dynamicTokensEnabled: boolean
lookaheadStrategy: ILookaheadStrategy
initLooksAhead(config: IParserConfig) {
this.dynamicTokensEnabled = has(config, "dynamicTokensEnabled")
? (config.dynamicTokensEnabled as boolean) // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.dynamicTokensEnabled
this.maxLookahead = has(config, "maxLookahead")
? (config.maxLookahead as number) // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.maxLookahead
this.lookaheadStrategy = has(config, "lookaheadStrategy")
? (config.lookaheadStrategy as ILookaheadStrategy) // assumes end user provides the correct config value/type
: new LLkLookaheadStrategy({ maxLookahead: this.maxLookahead })
this.lookAheadFuncsCache = new Map()
}
preComputeLookaheadFunctions(this: MixedInParser, rules: Rule[]): void {
forEach(rules, (currRule) => {
this.TRACE_INIT(`${currRule.name} Rule Lookahead`, () => {
const {
alternation,
repetition,
option,
repetitionMandatory,
repetitionMandatoryWithSeparator,
repetitionWithSeparator
} = collectMethods(currRule)
forEach(alternation, (currProd) => {
const prodIdx = currProd.idx === 0 ? "" : currProd.idx
this.TRACE_INIT(`${getProductionDslName(currProd)}${prodIdx}`, () => {
const laFunc = this.lookaheadStrategy.buildLookaheadForAlternation({
prodOccurrence: currProd.idx,
rule: currRule,
maxLookahead: currProd.maxLookahead || this.maxLookahead,
hasPredicates: currProd.hasPredicates,
dynamicTokensEnabled: this.dynamicTokensEnabled
})
const key = getKeyForAutomaticLookahead(
this.fullRuleNameToShort[currRule.name],
OR_IDX,
currProd.idx
)
this.setLaFuncCache(key, laFunc)
})
})
forEach(repetition, (currProd) => {
this.computeLookaheadFunc(
currRule,
currProd.idx,
MANY_IDX,
"Repetition",
currProd.maxLookahead,
getProductionDslName(currProd)
)
})
forEach(option, (currProd) => {
this.computeLookaheadFunc(
currRule,
currProd.idx,
OPTION_IDX,
"Option",
currProd.maxLookahead,
getProductionDslName(currProd)
)
})
forEach(repetitionMandatory, (currProd) => {
this.computeLookaheadFunc(
currRule,
currProd.idx,
AT_LEAST_ONE_IDX,
"RepetitionMandatory",
currProd.maxLookahead,
getProductionDslName(currProd)
)
})
forEach(repetitionMandatoryWithSeparator, (currProd) => {
this.computeLookaheadFunc(
currRule,
currProd.idx,
AT_LEAST_ONE_SEP_IDX,
"RepetitionMandatoryWithSeparator",
currProd.maxLookahead,
getProductionDslName(currProd)
)
})
forEach(repetitionWithSeparator, (currProd) => {
this.computeLookaheadFunc(
currRule,
currProd.idx,
MANY_SEP_IDX,
"RepetitionWithSeparator",
currProd.maxLookahead,
getProductionDslName(currProd)
)
})
})
})
}
computeLookaheadFunc(
this: MixedInParser,
rule: Rule,
prodOccurrence: number,
prodKey: number,
prodType: OptionalProductionType,
prodMaxLookahead: number | undefined,
dslMethodName: string
): void {
this.TRACE_INIT(
`${dslMethodName}${prodOccurrence === 0 ? "" : prodOccurrence}`,
() => {
const laFunc = this.lookaheadStrategy.buildLookaheadForOptional({
prodOccurrence,
rule,
maxLookahead: prodMaxLookahead || this.maxLookahead,
dynamicTokensEnabled: this.dynamicTokensEnabled,
prodType
})
const key = getKeyForAutomaticLookahead(
this.fullRuleNameToShort[rule.name],
prodKey,
prodOccurrence
)
this.setLaFuncCache(key, laFunc)
}
)
}
// this actually returns a number, but it is always used as a string (object prop key)
getKeyForAutomaticLookahead(
this: MixedInParser,
dslMethodIdx: number,
occurrence: number
): number {
const currRuleShortName: any = this.getLastExplicitRuleShortName()
return getKeyForAutomaticLookahead(
currRuleShortName,
dslMethodIdx,
occurrence
)
}
getLaFuncFromCache(this: MixedInParser, key: number): Function {
return this.lookAheadFuncsCache.get(key)
}
/* istanbul ignore next */
setLaFuncCache(this: MixedInParser, key: number, value: Function): void {
this.lookAheadFuncsCache.set(key, value)
}
}
class DslMethodsCollectorVisitor extends GAstVisitor {
public dslMethods: {
option: Option[]
alternation: Alternation[]
repetition: Repetition[]
repetitionWithSeparator: RepetitionWithSeparator[]
repetitionMandatory: RepetitionMandatory[]
repetitionMandatoryWithSeparator: RepetitionMandatoryWithSeparator[]
} = {
option: [],
alternation: [],
repetition: [],
repetitionWithSeparator: [],
repetitionMandatory: [],
repetitionMandatoryWithSeparator: []
}
reset() {
this.dslMethods = {
option: [],
alternation: [],
repetition: [],
repetitionWithSeparator: [],
repetitionMandatory: [],
repetitionMandatoryWithSeparator: []
}
}
public visitOption(option: Option): void {
this.dslMethods.option.push(option)
}
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
this.dslMethods.repetitionWithSeparator.push(manySep)
}
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
this.dslMethods.repetitionMandatory.push(atLeastOne)
}
public visitRepetitionMandatoryWithSeparator(
atLeastOneSep: RepetitionMandatoryWithSeparator
): void {
this.dslMethods.repetitionMandatoryWithSeparator.push(atLeastOneSep)
}
public visitRepetition(many: Repetition): void {
this.dslMethods.repetition.push(many)
}
public visitAlternation(or: Alternation): void {
this.dslMethods.alternation.push(or)
}
}
const collectorVisitor = new DslMethodsCollectorVisitor()
export function collectMethods(rule: Rule): {
option: Option[]
alternation: Alternation[]
repetition: Repetition[]
repetitionWithSeparator: RepetitionWithSeparator[]
repetitionMandatory: RepetitionMandatory[]
repetitionMandatoryWithSeparator: RepetitionMandatoryWithSeparator[]
} {
collectorVisitor.reset()
rule.accept(collectorVisitor)
const dslMethods = collectorVisitor.dslMethods
// avoid uncleaned references
collectorVisitor.reset()
return <any>dslMethods
}

View File

@@ -0,0 +1,58 @@
import { ErrorHandler } from "./error_handler"
import { LexerAdapter } from "./lexer_adapter"
import { LooksAhead } from "./looksahead"
import { RecognizerApi } from "./recognizer_api"
import { RecognizerEngine } from "./recognizer_engine"
import { Recoverable } from "./recoverable"
import { TreeBuilder } from "./tree_builder"
import {
Parser as ParserConstructorImpel,
CstParser as CstParserConstructorImpel,
EmbeddedActionsParser as EmbeddedActionsParserConstructorImpl
} from "../parser"
import * as defs from "@chevrotain/types"
import { ContentAssist } from "./context_assist"
import { GastRecorder } from "./gast_recorder"
import { PerformanceTracer } from "./perf_tracer"
/**
* This Type combines all the Parser traits.
* It is used in all traits in the "this type assertion"
* - https://github.com/Microsoft/TypeScript/wiki/What%27s-new-in-TypeScript#specifying-the-type-of-this-for-functions
* This enables strong Type Checks inside trait methods that invoke methods from other traits.
* This pattern is very similar to "self types" in Scala.
* - https://docs.scala-lang.org/tour/self-types.html
*/
export type MixedInParser = ParserConstructorImpel &
ErrorHandler &
LexerAdapter &
LooksAhead &
RecognizerApi &
RecognizerEngine &
Recoverable &
TreeBuilder &
ContentAssist &
GastRecorder &
PerformanceTracer
interface MixedInCstParserConstructor {
new (
tokenVocabulary: defs.TokenVocabulary,
config?: defs.IParserConfig
): defs.CstParser
}
export const CstParser: MixedInCstParserConstructor = <any>(
CstParserConstructorImpel
)
interface MixedInEmbeddedActionsParserConstructor {
new (
tokenVocabulary: defs.TokenVocabulary,
config?: defs.IParserConfig
): defs.EmbeddedActionsParser
}
export const EmbeddedActionsParser: MixedInEmbeddedActionsParserConstructor = <
any
>EmbeddedActionsParserConstructorImpl

View File

@@ -0,0 +1,54 @@
import { IParserConfig } from "@chevrotain/types"
import has from "lodash/has"
import { timer } from "@chevrotain/utils"
import { MixedInParser } from "./parser_traits"
import { DEFAULT_PARSER_CONFIG } from "../parser"
/**
* Trait responsible for runtime parsing errors.
*/
export class PerformanceTracer {
traceInitPerf: boolean | number
traceInitMaxIdent: number
traceInitIndent: number
initPerformanceTracer(config: IParserConfig) {
if (has(config, "traceInitPerf")) {
const userTraceInitPerf = config.traceInitPerf
const traceIsNumber = typeof userTraceInitPerf === "number"
this.traceInitMaxIdent = traceIsNumber
? <number>userTraceInitPerf
: Infinity
this.traceInitPerf = traceIsNumber
? userTraceInitPerf > 0
: (userTraceInitPerf as boolean) // assumes end user provides the correct config value/type
} else {
this.traceInitMaxIdent = 0
this.traceInitPerf = DEFAULT_PARSER_CONFIG.traceInitPerf
}
this.traceInitIndent = -1
}
TRACE_INIT<T>(this: MixedInParser, phaseDesc: string, phaseImpl: () => T): T {
// No need to optimize this using NOOP pattern because
// It is not called in a hot spot...
if (this.traceInitPerf === true) {
this.traceInitIndent++
const indent = new Array(this.traceInitIndent + 1).join("\t")
if (this.traceInitIndent < this.traceInitMaxIdent) {
console.log(`${indent}--> <${phaseDesc}>`)
}
const { time, value } = timer(phaseImpl)
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
const traceMethod = time > 10 ? console.warn : console.log
if (this.traceInitIndent < this.traceInitMaxIdent) {
traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`)
}
this.traceInitIndent--
return value
} else {
return phaseImpl()
}
}
}

View File

@@ -0,0 +1,720 @@
import {
AtLeastOneSepMethodOpts,
ConsumeMethodOpts,
DSLMethodOpts,
DSLMethodOptsWithErr,
GrammarAction,
IOrAlt,
IRuleConfig,
ISerializedGast,
IToken,
ManySepMethodOpts,
OrMethodOpts,
SubruleMethodOpts,
TokenType
} from "@chevrotain/types"
import values from "lodash/values"
import includes from "lodash/includes"
import { isRecognitionException } from "../../exceptions_public"
import { DEFAULT_RULE_CONFIG, ParserDefinitionErrorType } from "../parser"
import { defaultGrammarValidatorErrorProvider } from "../../errors_public"
import { validateRuleIsOverridden } from "../../grammar/checks"
import { MixedInParser } from "./parser_traits"
import { Rule, serializeGrammar } from "@chevrotain/gast"
import { IParserDefinitionError } from "../../grammar/types"
import { ParserMethodInternal } from "../types"
/**
* This trait is responsible for implementing the public API
* for defining Chevrotain parsers, i.e:
* - CONSUME
* - RULE
* - OPTION
* - ...
*/
export class RecognizerApi {
ACTION<T>(this: MixedInParser, impl: () => T): T {
return impl.call(this)
}
consume(
this: MixedInParser,
idx: number,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, idx, options)
}
subrule<ARGS extends unknown[], R>(
this: MixedInParser,
idx: number,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, idx, options)
}
option<OUT>(
this: MixedInParser,
idx: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, idx)
}
or(
this: MixedInParser,
idx: number,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>
): any {
return this.orInternal(altsOrOpts, idx)
}
many(
this: MixedInParser,
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void {
return this.manyInternal(idx, actionORMethodDef)
}
atLeastOne(
this: MixedInParser,
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void {
return this.atLeastOneInternal(idx, actionORMethodDef)
}
CONSUME(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 0, options)
}
CONSUME1(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 1, options)
}
CONSUME2(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 2, options)
}
CONSUME3(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 3, options)
}
CONSUME4(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 4, options)
}
CONSUME5(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 5, options)
}
CONSUME6(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 6, options)
}
CONSUME7(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 7, options)
}
CONSUME8(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 8, options)
}
CONSUME9(
this: MixedInParser,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken {
return this.consumeInternal(tokType, 9, options)
}
SUBRULE<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 0, options)
}
SUBRULE1<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 1, options)
}
SUBRULE2<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 2, options)
}
SUBRULE3<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 3, options)
}
SUBRULE4<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 4, options)
}
SUBRULE5<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 5, options)
}
SUBRULE6<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 6, options)
}
SUBRULE7<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 7, options)
}
SUBRULE8<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 8, options)
}
SUBRULE9<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
options?: SubruleMethodOpts<ARGS>
): R {
return this.subruleInternal(ruleToCall, 9, options)
}
OPTION<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 0)
}
OPTION1<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 1)
}
OPTION2<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 2)
}
OPTION3<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 3)
}
OPTION4<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 4)
}
OPTION5<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 5)
}
OPTION6<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 6)
}
OPTION7<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 7)
}
OPTION8<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 8)
}
OPTION9<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT | undefined {
return this.optionInternal(actionORMethodDef, 9)
}
OR<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 0)
}
OR1<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 1)
}
OR2<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 2)
}
OR3<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 3)
}
OR4<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 4)
}
OR5<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 5)
}
OR6<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 6)
}
OR7<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 7)
}
OR8<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 8)
}
OR9<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
): T {
return this.orInternal(altsOrOpts, 9)
}
MANY<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(0, actionORMethodDef)
}
MANY1<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(1, actionORMethodDef)
}
MANY2<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(2, actionORMethodDef)
}
MANY3<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(3, actionORMethodDef)
}
MANY4<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(4, actionORMethodDef)
}
MANY5<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(5, actionORMethodDef)
}
MANY6<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(6, actionORMethodDef)
}
MANY7<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(7, actionORMethodDef)
}
MANY8<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(8, actionORMethodDef)
}
MANY9<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
this.manyInternal(9, actionORMethodDef)
}
MANY_SEP<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(0, options)
}
MANY_SEP1<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(1, options)
}
MANY_SEP2<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(2, options)
}
MANY_SEP3<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(3, options)
}
MANY_SEP4<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(4, options)
}
MANY_SEP5<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(5, options)
}
MANY_SEP6<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(6, options)
}
MANY_SEP7<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(7, options)
}
MANY_SEP8<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(8, options)
}
MANY_SEP9<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
this.manySepFirstInternal(9, options)
}
AT_LEAST_ONE<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(0, actionORMethodDef)
}
AT_LEAST_ONE1<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
return this.atLeastOneInternal(1, actionORMethodDef)
}
AT_LEAST_ONE2<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(2, actionORMethodDef)
}
AT_LEAST_ONE3<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(3, actionORMethodDef)
}
AT_LEAST_ONE4<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(4, actionORMethodDef)
}
AT_LEAST_ONE5<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(5, actionORMethodDef)
}
AT_LEAST_ONE6<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(6, actionORMethodDef)
}
AT_LEAST_ONE7<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(7, actionORMethodDef)
}
AT_LEAST_ONE8<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(8, actionORMethodDef)
}
AT_LEAST_ONE9<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
this.atLeastOneInternal(9, actionORMethodDef)
}
AT_LEAST_ONE_SEP<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(0, options)
}
AT_LEAST_ONE_SEP1<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(1, options)
}
AT_LEAST_ONE_SEP2<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(2, options)
}
AT_LEAST_ONE_SEP3<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(3, options)
}
AT_LEAST_ONE_SEP4<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(4, options)
}
AT_LEAST_ONE_SEP5<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(5, options)
}
AT_LEAST_ONE_SEP6<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(6, options)
}
AT_LEAST_ONE_SEP7<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(7, options)
}
AT_LEAST_ONE_SEP8<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(8, options)
}
AT_LEAST_ONE_SEP9<OUT>(
this: MixedInParser,
options: AtLeastOneSepMethodOpts<OUT>
): void {
this.atLeastOneSepFirstInternal(9, options)
}
RULE<T>(
this: MixedInParser,
name: string,
implementation: (...implArgs: any[]) => T,
config: IRuleConfig<T> = DEFAULT_RULE_CONFIG
): (idxInCallingRule?: number, ...args: any[]) => T | any {
if (includes(this.definedRulesNames, name)) {
const errMsg =
defaultGrammarValidatorErrorProvider.buildDuplicateRuleNameError({
topLevelRule: name,
grammarName: this.className
})
const error = {
message: errMsg,
type: ParserDefinitionErrorType.DUPLICATE_RULE_NAME,
ruleName: name
}
this.definitionErrors.push(error)
}
this.definedRulesNames.push(name)
const ruleImplementation = this.defineRule(name, implementation, config)
;(this as any)[name] = ruleImplementation
return ruleImplementation
}
OVERRIDE_RULE<T>(
this: MixedInParser,
name: string,
impl: (...implArgs: any[]) => T,
config: IRuleConfig<T> = DEFAULT_RULE_CONFIG
): (idxInCallingRule?: number, ...args: any[]) => T {
const ruleErrors: IParserDefinitionError[] = validateRuleIsOverridden(
name,
this.definedRulesNames,
this.className
)
this.definitionErrors = this.definitionErrors.concat(ruleErrors)
const ruleImplementation = this.defineRule(name, impl, config)
;(this as any)[name] = ruleImplementation
return ruleImplementation
}
BACKTRACK<T>(
this: MixedInParser,
grammarRule: (...args: any[]) => T,
args?: any[]
): () => boolean {
return function () {
// save org state
this.isBackTrackingStack.push(1)
const orgState = this.saveRecogState()
try {
grammarRule.apply(this, args)
// if no exception was thrown we have succeed parsing the rule.
return true
} catch (e) {
if (isRecognitionException(e)) {
return false
} else {
throw e
}
} finally {
this.reloadRecogState(orgState)
this.isBackTrackingStack.pop()
}
}
}
// GAST export APIs
public getGAstProductions(this: MixedInParser): Record<string, Rule> {
return this.gastProductionsCache
}
public getSerializedGastProductions(this: MixedInParser): ISerializedGast[] {
return serializeGrammar(values(this.gastProductionsCache))
}
}

View File

@@ -0,0 +1,860 @@
import {
AtLeastOneSepMethodOpts,
ConsumeMethodOpts,
DSLMethodOpts,
DSLMethodOptsWithErr,
GrammarAction,
IOrAlt,
IParserConfig,
IRuleConfig,
IToken,
ManySepMethodOpts,
OrMethodOpts,
ParserMethod,
SubruleMethodOpts,
TokenType,
TokenTypeDictionary,
TokenVocabulary
} from "@chevrotain/types"
import isEmpty from "lodash/isEmpty"
import isArray from "lodash/isArray"
import flatten from "lodash/flatten"
import every from "lodash/every"
import uniq from "lodash/uniq"
import isObject from "lodash/isObject"
import has from "lodash/has"
import values from "lodash/values"
import reduce from "lodash/reduce"
import clone from "lodash/clone"
import {
AT_LEAST_ONE_IDX,
AT_LEAST_ONE_SEP_IDX,
BITS_FOR_METHOD_TYPE,
BITS_FOR_OCCURRENCE_IDX,
MANY_IDX,
MANY_SEP_IDX,
OPTION_IDX,
OR_IDX
} from "../../grammar/keys"
import {
isRecognitionException,
MismatchedTokenException,
NotAllInputParsedException
} from "../../exceptions_public"
import { PROD_TYPE } from "../../grammar/lookahead"
import {
AbstractNextTerminalAfterProductionWalker,
NextTerminalAfterAtLeastOneSepWalker,
NextTerminalAfterAtLeastOneWalker,
NextTerminalAfterManySepWalker,
NextTerminalAfterManyWalker
} from "../../grammar/interpreter"
import { DEFAULT_RULE_CONFIG, IParserState, TokenMatcher } from "../parser"
import { IN_RULE_RECOVERY_EXCEPTION } from "./recoverable"
import { EOF } from "../../../scan/tokens_public"
import { MixedInParser } from "./parser_traits"
import {
augmentTokenTypes,
isTokenType,
tokenStructuredMatcher,
tokenStructuredMatcherNoCategories
} from "../../../scan/tokens"
import { Rule } from "@chevrotain/gast"
import { ParserMethodInternal } from "../types"
/**
* This trait is responsible for the runtime parsing engine
* Used by the official API (recognizer_api.ts)
*/
export class RecognizerEngine {
isBackTrackingStack: boolean[]
className: string
RULE_STACK: number[]
RULE_OCCURRENCE_STACK: number[]
definedRulesNames: string[]
tokensMap: { [fqn: string]: TokenType }
gastProductionsCache: Record<string, Rule>
shortRuleNameToFull: Record<string, string>
fullRuleNameToShort: Record<string, number>
// The shortName Index must be coded "after" the first 8bits to enable building unique lookahead keys
ruleShortNameIdx: number
tokenMatcher: TokenMatcher
subruleIdx: number
initRecognizerEngine(
tokenVocabulary: TokenVocabulary,
config: IParserConfig
) {
this.className = this.constructor.name
// TODO: would using an ES6 Map or plain object be faster (CST building scenario)
this.shortRuleNameToFull = {}
this.fullRuleNameToShort = {}
this.ruleShortNameIdx = 256
this.tokenMatcher = tokenStructuredMatcherNoCategories
this.subruleIdx = 0
this.definedRulesNames = []
this.tokensMap = {}
this.isBackTrackingStack = []
this.RULE_STACK = []
this.RULE_OCCURRENCE_STACK = []
this.gastProductionsCache = {}
if (has(config, "serializedGrammar")) {
throw Error(
"The Parser's configuration can no longer contain a <serializedGrammar> property.\n" +
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_6-0-0\n" +
"\tFor Further details."
)
}
if (isArray(tokenVocabulary)) {
// This only checks for Token vocabularies provided as arrays.
// That is good enough because the main objective is to detect users of pre-V4.0 APIs
// rather than all edge cases of empty Token vocabularies.
if (isEmpty(tokenVocabulary as any[])) {
throw Error(
"A Token Vocabulary cannot be empty.\n" +
"\tNote that the first argument for the parser constructor\n" +
"\tis no longer a Token vector (since v4.0)."
)
}
if (typeof (tokenVocabulary as any[])[0].startOffset === "number") {
throw Error(
"The Parser constructor no longer accepts a token vector as the first argument.\n" +
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_4-0-0\n" +
"\tFor Further details."
)
}
}
if (isArray(tokenVocabulary)) {
this.tokensMap = reduce(
tokenVocabulary,
(acc, tokType: TokenType) => {
acc[tokType.name] = tokType
return acc
},
{} as { [tokenName: string]: TokenType }
)
} else if (
has(tokenVocabulary, "modes") &&
every(flatten(values((<any>tokenVocabulary).modes)), isTokenType)
) {
const allTokenTypes = flatten(values((<any>tokenVocabulary).modes))
const uniqueTokens = uniq(allTokenTypes)
this.tokensMap = <any>reduce(
uniqueTokens,
(acc, tokType: TokenType) => {
acc[tokType.name] = tokType
return acc
},
{} as { [tokenName: string]: TokenType }
)
} else if (isObject(tokenVocabulary)) {
this.tokensMap = clone(tokenVocabulary as TokenTypeDictionary)
} else {
throw new Error(
"<tokensDictionary> argument must be An Array of Token constructors," +
" A dictionary of Token constructors or an IMultiModeLexerDefinition"
)
}
// always add EOF to the tokenNames -> constructors map. it is useful to assure all the input has been
// parsed with a clear error message ("expecting EOF but found ...")
this.tokensMap["EOF"] = EOF
const allTokenTypes = has(tokenVocabulary, "modes")
? flatten(values((<any>tokenVocabulary).modes))
: values(tokenVocabulary)
const noTokenCategoriesUsed = every(allTokenTypes, (tokenConstructor) =>
isEmpty(tokenConstructor.categoryMatches)
)
this.tokenMatcher = noTokenCategoriesUsed
? tokenStructuredMatcherNoCategories
: tokenStructuredMatcher
// Because ES2015+ syntax should be supported for creating Token classes
// We cannot assume that the Token classes were created using the "extendToken" utilities
// Therefore we must augment the Token classes both on Lexer initialization and on Parser initialization
augmentTokenTypes(values(this.tokensMap))
}
defineRule<ARGS extends unknown[], R>(
this: MixedInParser,
ruleName: string,
impl: (...args: ARGS) => R,
config: IRuleConfig<R>
): ParserMethodInternal<ARGS, R> {
if (this.selfAnalysisDone) {
throw Error(
`Grammar rule <${ruleName}> may not be defined after the 'performSelfAnalysis' method has been called'\n` +
`Make sure that all grammar rule definitions are done before 'performSelfAnalysis' is called.`
)
}
const resyncEnabled: boolean = has(config, "resyncEnabled")
? (config.resyncEnabled as boolean) // assumes end user provides the correct config value/type
: DEFAULT_RULE_CONFIG.resyncEnabled
const recoveryValueFunc = has(config, "recoveryValueFunc")
? (config.recoveryValueFunc as () => R) // assumes end user provides the correct config value/type
: DEFAULT_RULE_CONFIG.recoveryValueFunc
// performance optimization: Use small integers as keys for the longer human readable "full" rule names.
// this greatly improves Map access time (as much as 8% for some performance benchmarks).
const shortName =
this.ruleShortNameIdx << (BITS_FOR_METHOD_TYPE + BITS_FOR_OCCURRENCE_IDX)
this.ruleShortNameIdx++
this.shortRuleNameToFull[shortName] = ruleName
this.fullRuleNameToShort[ruleName] = shortName
let invokeRuleWithTry: ParserMethod<ARGS, R>
// Micro optimization, only check the condition **once** on rule definition
// instead of **every single** rule invocation.
if (this.outputCst === true) {
invokeRuleWithTry = function invokeRuleWithTry(
this: MixedInParser,
...args: ARGS
): R {
try {
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx)
impl.apply(this, args)
const cst = this.CST_STACK[this.CST_STACK.length - 1]
this.cstPostRule(cst)
return cst as unknown as R
} catch (e) {
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R
} finally {
this.ruleFinallyStateUpdate()
}
}
} else {
invokeRuleWithTry = function invokeRuleWithTryCst(
this: MixedInParser,
...args: ARGS
): R {
try {
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx)
return impl.apply(this, args)
} catch (e) {
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R
} finally {
this.ruleFinallyStateUpdate()
}
}
}
const wrappedGrammarRule: ParserMethodInternal<ARGS, R> = Object.assign(
invokeRuleWithTry as any,
{ ruleName, originalGrammarAction: impl }
)
return wrappedGrammarRule
}
invokeRuleCatch(
this: MixedInParser,
e: Error,
resyncEnabledConfig: boolean,
recoveryValueFunc: Function
): unknown {
const isFirstInvokedRule = this.RULE_STACK.length === 1
// note the reSync is always enabled for the first rule invocation, because we must always be able to
// reSync with EOF and just output some INVALID ParseTree
// during backtracking reSync recovery is disabled, otherwise we can't be certain the backtracking
// path is really the most valid one
const reSyncEnabled =
resyncEnabledConfig && !this.isBackTracking() && this.recoveryEnabled
if (isRecognitionException(e)) {
const recogError: any = e
if (reSyncEnabled) {
const reSyncTokType = this.findReSyncTokenType()
if (this.isInCurrentRuleReSyncSet(reSyncTokType)) {
recogError.resyncedTokens = this.reSyncTo(reSyncTokType)
if (this.outputCst) {
const partialCstResult: any =
this.CST_STACK[this.CST_STACK.length - 1]
partialCstResult.recoveredNode = true
return partialCstResult
} else {
return recoveryValueFunc(e)
}
} else {
if (this.outputCst) {
const partialCstResult: any =
this.CST_STACK[this.CST_STACK.length - 1]
partialCstResult.recoveredNode = true
recogError.partialCstResult = partialCstResult
}
// to be handled Further up the call stack
throw recogError
}
} else if (isFirstInvokedRule) {
// otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case
this.moveToTerminatedState()
// the parser should never throw one of its own errors outside its flow.
// even if error recovery is disabled
return recoveryValueFunc(e)
} else {
// to be recovered Further up the call stack
throw recogError
}
} else {
// some other Error type which we don't know how to handle (for example a built in JavaScript Error)
throw e
}
}
// Implementation of parsing DSL
optionInternal<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number
): OUT | undefined {
const key = this.getKeyForAutomaticLookahead(OPTION_IDX, occurrence)
return this.optionInternalLogic(actionORMethodDef, occurrence, key)
}
optionInternalLogic<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number,
key: number
): OUT | undefined {
let lookAheadFunc = this.getLaFuncFromCache(key)
let action: GrammarAction<OUT>
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF
const predicate = actionORMethodDef.GATE
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookAheadFunc
lookAheadFunc = () => {
return predicate.call(this) && orgLookaheadFunction.call(this)
}
}
} else {
action = actionORMethodDef
}
if (lookAheadFunc.call(this) === true) {
return action.call(this)
}
return undefined
}
atLeastOneInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
const laKey = this.getKeyForAutomaticLookahead(
AT_LEAST_ONE_IDX,
prodOccurrence
)
return this.atLeastOneInternalLogic(
prodOccurrence,
actionORMethodDef,
laKey
)
}
atLeastOneInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>,
key: number
): void {
let lookAheadFunc = this.getLaFuncFromCache(key)
let action
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF
const predicate = actionORMethodDef.GATE
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookAheadFunc
lookAheadFunc = () => {
return predicate.call(this) && orgLookaheadFunction.call(this)
}
}
} else {
action = actionORMethodDef
}
if ((<Function>lookAheadFunc).call(this) === true) {
let notStuck = this.doSingleRepetition(action)
while (
(<Function>lookAheadFunc).call(this) === true &&
notStuck === true
) {
notStuck = this.doSingleRepetition(action)
}
} else {
throw this.raiseEarlyExitException(
prodOccurrence,
PROD_TYPE.REPETITION_MANDATORY,
(<DSLMethodOptsWithErr<OUT>>actionORMethodDef).ERR_MSG
)
}
// note that while it may seem that this can cause an error because by using a recursive call to
// AT_LEAST_ONE we change the grammar to AT_LEAST_TWO, AT_LEAST_THREE ... , the possible recursive call
// from the tryInRepetitionRecovery(...) will only happen IFF there really are TWO/THREE/.... items.
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.atLeastOneInternal,
[prodOccurrence, actionORMethodDef],
<any>lookAheadFunc,
AT_LEAST_ONE_IDX,
prodOccurrence,
NextTerminalAfterAtLeastOneWalker
)
}
atLeastOneSepFirstInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: AtLeastOneSepMethodOpts<OUT>
): void {
const laKey = this.getKeyForAutomaticLookahead(
AT_LEAST_ONE_SEP_IDX,
prodOccurrence
)
this.atLeastOneSepFirstInternalLogic(prodOccurrence, options, laKey)
}
atLeastOneSepFirstInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: AtLeastOneSepMethodOpts<OUT>,
key: number
): void {
const action = options.DEF
const separator = options.SEP
const firstIterationLookaheadFunc = this.getLaFuncFromCache(key)
// 1st iteration
if (firstIterationLookaheadFunc.call(this) === true) {
;(<GrammarAction<OUT>>action).call(this)
// TODO: Optimization can move this function construction into "attemptInRepetitionRecovery"
// because it is only needed in error recovery scenarios.
const separatorLookAheadFunc = () => {
return this.tokenMatcher(this.LA(1), separator)
}
// 2nd..nth iterations
while (this.tokenMatcher(this.LA(1), separator) === true) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator)
// No need for checking infinite loop here due to consuming the separator.
;(<GrammarAction<OUT>>action).call(this)
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
NextTerminalAfterAtLeastOneSepWalker
],
separatorLookAheadFunc,
AT_LEAST_ONE_SEP_IDX,
prodOccurrence,
NextTerminalAfterAtLeastOneSepWalker
)
} else {
throw this.raiseEarlyExitException(
prodOccurrence,
PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,
options.ERR_MSG
)
}
}
manyInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
const laKey = this.getKeyForAutomaticLookahead(MANY_IDX, prodOccurrence)
return this.manyInternalLogic(prodOccurrence, actionORMethodDef, laKey)
}
manyInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
key: number
) {
let lookaheadFunction = this.getLaFuncFromCache(key)
let action
if (typeof actionORMethodDef !== "function") {
action = actionORMethodDef.DEF
const predicate = actionORMethodDef.GATE
// predicate present
if (predicate !== undefined) {
const orgLookaheadFunction = lookaheadFunction
lookaheadFunction = () => {
return predicate.call(this) && orgLookaheadFunction.call(this)
}
}
} else {
action = actionORMethodDef
}
let notStuck = true
while (lookaheadFunction.call(this) === true && notStuck === true) {
notStuck = this.doSingleRepetition(action)
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.manyInternal,
[prodOccurrence, actionORMethodDef],
<any>lookaheadFunction,
MANY_IDX,
prodOccurrence,
NextTerminalAfterManyWalker,
// The notStuck parameter is only relevant when "attemptInRepetitionRecovery"
// is invoked from manyInternal, in the MANY_SEP case and AT_LEAST_ONE[_SEP]
// An infinite loop cannot occur as:
// - Either the lookahead is guaranteed to consume something (Single Token Separator)
// - AT_LEAST_ONE by definition is guaranteed to consume something (or error out).
notStuck
)
}
manySepFirstInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: ManySepMethodOpts<OUT>
): void {
const laKey = this.getKeyForAutomaticLookahead(MANY_SEP_IDX, prodOccurrence)
this.manySepFirstInternalLogic(prodOccurrence, options, laKey)
}
manySepFirstInternalLogic<OUT>(
this: MixedInParser,
prodOccurrence: number,
options: ManySepMethodOpts<OUT>,
key: number
): void {
const action = options.DEF
const separator = options.SEP
const firstIterationLaFunc = this.getLaFuncFromCache(key)
// 1st iteration
if (firstIterationLaFunc.call(this) === true) {
action.call(this)
const separatorLookAheadFunc = () => {
return this.tokenMatcher(this.LA(1), separator)
}
// 2nd..nth iterations
while (this.tokenMatcher(this.LA(1), separator) === true) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator)
// No need for checking infinite loop here due to consuming the separator.
action.call(this)
}
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
NextTerminalAfterManySepWalker
],
separatorLookAheadFunc,
MANY_SEP_IDX,
prodOccurrence,
NextTerminalAfterManySepWalker
)
}
}
repetitionSepSecondInternal<OUT>(
this: MixedInParser,
prodOccurrence: number,
separator: TokenType,
separatorLookAheadFunc: () => boolean,
action: GrammarAction<OUT>,
nextTerminalAfterWalker: typeof AbstractNextTerminalAfterProductionWalker
): void {
while (separatorLookAheadFunc()) {
// note that this CONSUME will never enter recovery because
// the separatorLookAheadFunc checks that the separator really does exist.
this.CONSUME(separator)
action.call(this)
}
// we can only arrive to this function after an error
// has occurred (hence the name 'second') so the following
// IF will always be entered, its possible to remove it...
// however it is kept to avoid confusion and be consistent.
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
/* istanbul ignore else */
this.attemptInRepetitionRecovery(
this.repetitionSepSecondInternal,
[
prodOccurrence,
separator,
separatorLookAheadFunc,
action,
nextTerminalAfterWalker
],
separatorLookAheadFunc,
AT_LEAST_ONE_SEP_IDX,
prodOccurrence,
nextTerminalAfterWalker
)
}
doSingleRepetition(this: MixedInParser, action: Function): any {
const beforeIteration = this.getLexerPosition()
action.call(this)
const afterIteration = this.getLexerPosition()
// This boolean will indicate if this repetition progressed
// or if we are "stuck" (potential infinite loop in the repetition).
return afterIteration > beforeIteration
}
orInternal<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
occurrence: number
): T {
const laKey = this.getKeyForAutomaticLookahead(OR_IDX, occurrence)
const alts = isArray(altsOrOpts) ? altsOrOpts : altsOrOpts.DEF
const laFunc = this.getLaFuncFromCache(laKey)
const altIdxToTake = laFunc.call(this, alts)
if (altIdxToTake !== undefined) {
const chosenAlternative: any = alts[altIdxToTake]
return chosenAlternative.ALT.call(this)
}
this.raiseNoAltException(
occurrence,
(altsOrOpts as OrMethodOpts<unknown>).ERR_MSG
)
}
ruleFinallyStateUpdate(this: MixedInParser): void {
this.RULE_STACK.pop()
this.RULE_OCCURRENCE_STACK.pop()
// NOOP when cst is disabled
this.cstFinallyStateUpdate()
if (this.RULE_STACK.length === 0 && this.isAtEndOfInput() === false) {
const firstRedundantTok = this.LA(1)
const errMsg = this.errorMessageProvider.buildNotAllInputParsedMessage({
firstRedundant: firstRedundantTok,
ruleName: this.getCurrRuleFullName()
})
this.SAVE_ERROR(new NotAllInputParsedException(errMsg, firstRedundantTok))
}
}
subruleInternal<ARGS extends unknown[], R>(
this: MixedInParser,
ruleToCall: ParserMethodInternal<ARGS, R>,
idx: number,
options?: SubruleMethodOpts<ARGS>
): R {
let ruleResult
try {
const args = options !== undefined ? options.ARGS : undefined
this.subruleIdx = idx
ruleResult = ruleToCall.apply(this, args)
this.cstPostNonTerminal(
ruleResult,
options !== undefined && options.LABEL !== undefined
? options.LABEL
: ruleToCall.ruleName
)
return ruleResult
} catch (e) {
throw this.subruleInternalError(e, options, ruleToCall.ruleName)
}
}
subruleInternalError(
this: MixedInParser,
e: any,
options: SubruleMethodOpts<unknown[]> | undefined,
ruleName: string
): void {
if (isRecognitionException(e) && e.partialCstResult !== undefined) {
this.cstPostNonTerminal(
e.partialCstResult,
options !== undefined && options.LABEL !== undefined
? options.LABEL
: ruleName
)
delete e.partialCstResult
}
throw e
}
consumeInternal(
this: MixedInParser,
tokType: TokenType,
idx: number,
options: ConsumeMethodOpts | undefined
): IToken {
let consumedToken!: IToken
try {
const nextToken = this.LA(1)
if (this.tokenMatcher(nextToken, tokType) === true) {
this.consumeToken()
consumedToken = nextToken
} else {
this.consumeInternalError(tokType, nextToken, options)
}
} catch (eFromConsumption) {
consumedToken = this.consumeInternalRecovery(
tokType,
idx,
eFromConsumption
)
}
this.cstPostTerminal(
options !== undefined && options.LABEL !== undefined
? options.LABEL
: tokType.name,
consumedToken
)
return consumedToken
}
consumeInternalError(
this: MixedInParser,
tokType: TokenType,
nextToken: IToken,
options: ConsumeMethodOpts | undefined
): void {
let msg
const previousToken = this.LA(0)
if (options !== undefined && options.ERR_MSG) {
msg = options.ERR_MSG
} else {
msg = this.errorMessageProvider.buildMismatchTokenMessage({
expected: tokType,
actual: nextToken,
previous: previousToken,
ruleName: this.getCurrRuleFullName()
})
}
throw this.SAVE_ERROR(
new MismatchedTokenException(msg, nextToken, previousToken)
)
}
consumeInternalRecovery(
this: MixedInParser,
tokType: TokenType,
idx: number,
eFromConsumption: Error
): IToken {
// no recovery allowed during backtracking, otherwise backtracking may recover invalid syntax and accept it
// but the original syntax could have been parsed successfully without any backtracking + recovery
if (
this.recoveryEnabled &&
// TODO: more robust checking of the exception type. Perhaps Typescript extending expressions?
eFromConsumption.name === "MismatchedTokenException" &&
!this.isBackTracking()
) {
const follows = this.getFollowsForInRuleRecovery(<any>tokType, idx)
try {
return this.tryInRuleRecovery(<any>tokType, follows)
} catch (eFromInRuleRecovery) {
if (eFromInRuleRecovery.name === IN_RULE_RECOVERY_EXCEPTION) {
// failed in RuleRecovery.
// throw the original error in order to trigger reSync error recovery
throw eFromConsumption
} else {
throw eFromInRuleRecovery
}
}
} else {
throw eFromConsumption
}
}
saveRecogState(this: MixedInParser): IParserState {
// errors is a getter which will clone the errors array
const savedErrors = this.errors
const savedRuleStack = clone(this.RULE_STACK)
return {
errors: savedErrors,
lexerState: this.exportLexerState(),
RULE_STACK: savedRuleStack,
CST_STACK: this.CST_STACK
}
}
reloadRecogState(this: MixedInParser, newState: IParserState) {
this.errors = newState.errors
this.importLexerState(newState.lexerState)
this.RULE_STACK = newState.RULE_STACK
}
ruleInvocationStateUpdate(
this: MixedInParser,
shortName: number,
fullName: string,
idxInCallingRule: number
): void {
this.RULE_OCCURRENCE_STACK.push(idxInCallingRule)
this.RULE_STACK.push(shortName)
// NOOP when cst is disabled
this.cstInvocationStateUpdate(fullName)
}
isBackTracking(this: MixedInParser): boolean {
return this.isBackTrackingStack.length !== 0
}
getCurrRuleFullName(this: MixedInParser): string {
const shortName = this.getLastExplicitRuleShortName()
return this.shortRuleNameToFull[shortName]
}
shortRuleNameToFullName(this: MixedInParser, shortName: number) {
return this.shortRuleNameToFull[shortName]
}
public isAtEndOfInput(this: MixedInParser): boolean {
return this.tokenMatcher(this.LA(1), EOF)
}
public reset(this: MixedInParser): void {
this.resetLexerState()
this.subruleIdx = 0
this.isBackTrackingStack = []
this.errors = []
this.RULE_STACK = []
// TODO: extract a specific reset for TreeBuilder trait
this.CST_STACK = []
this.RULE_OCCURRENCE_STACK = []
}
}

View File

@@ -0,0 +1,471 @@
import {
createTokenInstance,
EOF,
tokenMatcher
} from "../../../scan/tokens_public"
import {
AbstractNextTerminalAfterProductionWalker,
IFirstAfterRepetition
} from "../../grammar/interpreter"
import isEmpty from "lodash/isEmpty"
import dropRight from "lodash/dropRight"
import flatten from "lodash/flatten"
import map from "lodash/map"
import find from "lodash/find"
import has from "lodash/has"
import includes from "lodash/includes"
import clone from "lodash/clone"
import {
IParserConfig,
IToken,
ITokenGrammarPath,
TokenType
} from "@chevrotain/types"
import { MismatchedTokenException } from "../../exceptions_public"
import { IN } from "../../constants"
import { MixedInParser } from "./parser_traits"
import { DEFAULT_PARSER_CONFIG } from "../parser"
export const EOF_FOLLOW_KEY: any = {}
export interface IFollowKey {
ruleName: string
idxInCallingRule: number
inRule: string
}
export const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"
export class InRuleRecoveryException extends Error {
constructor(message: string) {
super(message)
this.name = IN_RULE_RECOVERY_EXCEPTION
}
}
/**
* This trait is responsible for the error recovery and fault tolerant logic
*/
export class Recoverable {
recoveryEnabled: boolean
firstAfterRepMap: Record<string, IFirstAfterRepetition>
resyncFollows: Record<string, TokenType[]>
initRecoverable(config: IParserConfig) {
this.firstAfterRepMap = {}
this.resyncFollows = {}
this.recoveryEnabled = has(config, "recoveryEnabled")
? (config.recoveryEnabled as boolean) // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.recoveryEnabled
// performance optimization, NOOP will be inlined which
// effectively means that this optional feature does not exist
// when not used.
if (this.recoveryEnabled) {
this.attemptInRepetitionRecovery = attemptInRepetitionRecovery
}
}
public getTokenToInsert(tokType: TokenType): IToken {
const tokToInsert = createTokenInstance(
tokType,
"",
NaN,
NaN,
NaN,
NaN,
NaN,
NaN
)
tokToInsert.isInsertedInRecovery = true
return tokToInsert
}
public canTokenTypeBeInsertedInRecovery(tokType: TokenType): boolean {
return true
}
public canTokenTypeBeDeletedInRecovery(tokType: TokenType): boolean {
return true
}
tryInRepetitionRecovery(
this: MixedInParser,
grammarRule: Function,
grammarRuleArgs: any[],
lookAheadFunc: () => boolean,
expectedTokType: TokenType
): void {
// TODO: can the resyncTokenType be cached?
const reSyncTokType = this.findReSyncTokenType()
const savedLexerState = this.exportLexerState()
const resyncedTokens: IToken[] = []
let passedResyncPoint = false
const nextTokenWithoutResync = this.LA(1)
let currToken = this.LA(1)
const generateErrorMessage = () => {
const previousToken = this.LA(0)
// we are preemptively re-syncing before an error has been detected, therefor we must reproduce
// the error that would have been thrown
const msg = this.errorMessageProvider.buildMismatchTokenMessage({
expected: expectedTokType,
actual: nextTokenWithoutResync,
previous: previousToken,
ruleName: this.getCurrRuleFullName()
})
const error = new MismatchedTokenException(
msg,
nextTokenWithoutResync,
this.LA(0)
)
// the first token here will be the original cause of the error, this is not part of the resyncedTokens property.
error.resyncedTokens = dropRight(resyncedTokens)
this.SAVE_ERROR(error)
}
while (!passedResyncPoint) {
// re-synced to a point where we can safely exit the repetition/
if (this.tokenMatcher(currToken, expectedTokType)) {
generateErrorMessage()
return // must return here to avoid reverting the inputIdx
} else if (lookAheadFunc.call(this)) {
// we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule
generateErrorMessage()
// recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule
grammarRule.apply(this, grammarRuleArgs)
return // must return here to avoid reverting the inputIdx
} else if (this.tokenMatcher(currToken, reSyncTokType)) {
passedResyncPoint = true
} else {
currToken = this.SKIP_TOKEN()
this.addToResyncTokens(currToken, resyncedTokens)
}
}
// we were unable to find a CLOSER point to resync inside the Repetition, reset the state.
// The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by
// "between rules" resync recovery later in the flow.
this.importLexerState(savedLexerState)
}
shouldInRepetitionRecoveryBeTried(
this: MixedInParser,
expectTokAfterLastMatch: TokenType,
nextTokIdx: number,
notStuck: boolean | undefined
): boolean {
// Edge case of arriving from a MANY repetition which is stuck
// Attempting recovery in this case could cause an infinite loop
if (notStuck === false) {
return false
}
// no need to recover, next token is what we expect...
if (this.tokenMatcher(this.LA(1), expectTokAfterLastMatch)) {
return false
}
// error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path
// and prefer some backtracking path that includes recovered errors.
if (this.isBackTracking()) {
return false
}
// if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm
// because if it works, it makes the least amount of changes to the input stream (greedy algorithm)
//noinspection RedundantIfStatementJS
if (
this.canPerformInRuleRecovery(
expectTokAfterLastMatch,
this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx)
)
) {
return false
}
return true
}
// Error Recovery functionality
getFollowsForInRuleRecovery(
this: MixedInParser,
tokType: TokenType,
tokIdxInRule: number
): TokenType[] {
const grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule)
const follows = this.getNextPossibleTokenTypes(grammarPath)
return follows
}
tryInRuleRecovery(
this: MixedInParser,
expectedTokType: TokenType,
follows: TokenType[]
): IToken {
if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) {
const tokToInsert = this.getTokenToInsert(expectedTokType)
return tokToInsert
}
if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) {
const nextTok = this.SKIP_TOKEN()
this.consumeToken()
return nextTok
}
throw new InRuleRecoveryException("sad sad panda")
}
canPerformInRuleRecovery(
this: MixedInParser,
expectedToken: TokenType,
follows: TokenType[]
): boolean {
return (
this.canRecoverWithSingleTokenInsertion(expectedToken, follows) ||
this.canRecoverWithSingleTokenDeletion(expectedToken)
)
}
canRecoverWithSingleTokenInsertion(
this: MixedInParser,
expectedTokType: TokenType,
follows: TokenType[]
): boolean {
if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) {
return false
}
// must know the possible following tokens to perform single token insertion
if (isEmpty(follows)) {
return false
}
const mismatchedTok = this.LA(1)
const isMisMatchedTokInFollows =
find(follows, (possibleFollowsTokType: TokenType) => {
return this.tokenMatcher(mismatchedTok, possibleFollowsTokType)
}) !== undefined
return isMisMatchedTokInFollows
}
canRecoverWithSingleTokenDeletion(
this: MixedInParser,
expectedTokType: TokenType
): boolean {
if (!this.canTokenTypeBeDeletedInRecovery(expectedTokType)) {
return false
}
const isNextTokenWhatIsExpected = this.tokenMatcher(
this.LA(2),
expectedTokType
)
return isNextTokenWhatIsExpected
}
isInCurrentRuleReSyncSet(
this: MixedInParser,
tokenTypeIdx: TokenType
): boolean {
const followKey = this.getCurrFollowKey()
const currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey)
return includes(currentRuleReSyncSet, tokenTypeIdx)
}
findReSyncTokenType(this: MixedInParser): TokenType {
const allPossibleReSyncTokTypes = this.flattenFollowSet()
// this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input
let nextToken = this.LA(1)
let k = 2
while (true) {
const foundMatch = find(allPossibleReSyncTokTypes, (resyncTokType) => {
const canMatch = tokenMatcher(nextToken, resyncTokType)
return canMatch
})
if (foundMatch !== undefined) {
return foundMatch
}
nextToken = this.LA(k)
k++
}
}
getCurrFollowKey(this: MixedInParser): IFollowKey {
// the length is at least one as we always add the ruleName to the stack before invoking the rule.
if (this.RULE_STACK.length === 1) {
return EOF_FOLLOW_KEY
}
const currRuleShortName = this.getLastExplicitRuleShortName()
const currRuleIdx = this.getLastExplicitRuleOccurrenceIndex()
const prevRuleShortName = this.getPreviousExplicitRuleShortName()
return {
ruleName: this.shortRuleNameToFullName(currRuleShortName),
idxInCallingRule: currRuleIdx,
inRule: this.shortRuleNameToFullName(prevRuleShortName)
}
}
buildFullFollowKeyStack(this: MixedInParser): IFollowKey[] {
const explicitRuleStack = this.RULE_STACK
const explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK
return map(explicitRuleStack, (ruleName, idx) => {
if (idx === 0) {
return EOF_FOLLOW_KEY
}
return {
ruleName: this.shortRuleNameToFullName(ruleName),
idxInCallingRule: explicitOccurrenceStack[idx],
inRule: this.shortRuleNameToFullName(explicitRuleStack[idx - 1])
}
})
}
flattenFollowSet(this: MixedInParser): TokenType[] {
const followStack = map(this.buildFullFollowKeyStack(), (currKey) => {
return this.getFollowSetFromFollowKey(currKey)
})
return <any>flatten(followStack)
}
getFollowSetFromFollowKey(
this: MixedInParser,
followKey: IFollowKey
): TokenType[] {
if (followKey === EOF_FOLLOW_KEY) {
return [EOF]
}
const followName =
followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule
return this.resyncFollows[followName]
}
// It does not make any sense to include a virtual EOF token in the list of resynced tokens
// as EOF does not really exist and thus does not contain any useful information (line/column numbers)
addToResyncTokens(
this: MixedInParser,
token: IToken,
resyncTokens: IToken[]
): IToken[] {
if (!this.tokenMatcher(token, EOF)) {
resyncTokens.push(token)
}
return resyncTokens
}
reSyncTo(this: MixedInParser, tokType: TokenType): IToken[] {
const resyncedTokens: IToken[] = []
let nextTok = this.LA(1)
while (this.tokenMatcher(nextTok, tokType) === false) {
nextTok = this.SKIP_TOKEN()
this.addToResyncTokens(nextTok, resyncedTokens)
}
// the last token is not part of the error.
return dropRight(resyncedTokens)
}
attemptInRepetitionRecovery(
this: MixedInParser,
prodFunc: Function,
args: any[],
lookaheadFunc: () => boolean,
dslMethodIdx: number,
prodOccurrence: number,
nextToksWalker: typeof AbstractNextTerminalAfterProductionWalker,
notStuck?: boolean
): void {
// by default this is a NO-OP
// The actual implementation is with the function(not method) below
}
getCurrentGrammarPath(
this: MixedInParser,
tokType: TokenType,
tokIdxInRule: number
): ITokenGrammarPath {
const pathRuleStack: string[] = this.getHumanReadableRuleStack()
const pathOccurrenceStack: number[] = clone(this.RULE_OCCURRENCE_STACK)
const grammarPath: any = {
ruleStack: pathRuleStack,
occurrenceStack: pathOccurrenceStack,
lastTok: tokType,
lastTokOccurrence: tokIdxInRule
}
return grammarPath
}
getHumanReadableRuleStack(this: MixedInParser): string[] {
return map(this.RULE_STACK, (currShortName) =>
this.shortRuleNameToFullName(currShortName)
)
}
}
export function attemptInRepetitionRecovery(
this: MixedInParser,
prodFunc: Function,
args: any[],
lookaheadFunc: () => boolean,
dslMethodIdx: number,
prodOccurrence: number,
nextToksWalker: typeof AbstractNextTerminalAfterProductionWalker,
notStuck?: boolean
): void {
const key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence)
let firstAfterRepInfo = this.firstAfterRepMap[key]
if (firstAfterRepInfo === undefined) {
const currRuleName = this.getCurrRuleFullName()
const ruleGrammar = this.getGAstProductions()[currRuleName]
const walker: AbstractNextTerminalAfterProductionWalker =
new nextToksWalker(ruleGrammar, prodOccurrence)
firstAfterRepInfo = walker.startWalking()
this.firstAfterRepMap[key] = firstAfterRepInfo
}
let expectTokAfterLastMatch = firstAfterRepInfo.token
let nextTokIdx = firstAfterRepInfo.occurrence
const isEndOfRule = firstAfterRepInfo.isEndOfRule
// special edge case of a TOP most repetition after which the input should END.
// this will force an attempt for inRule recovery in that scenario.
if (
this.RULE_STACK.length === 1 &&
isEndOfRule &&
expectTokAfterLastMatch === undefined
) {
expectTokAfterLastMatch = EOF
nextTokIdx = 1
}
// We don't have anything to re-sync to...
// this condition was extracted from `shouldInRepetitionRecoveryBeTried` to act as a type-guard
if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) {
return
}
if (
this.shouldInRepetitionRecoveryBeTried(
expectTokAfterLastMatch,
nextTokIdx,
notStuck
)
) {
// TODO: performance optimization: instead of passing the original args here, we modify
// the args param (or create a new one) and make sure the lookahead func is explicitly provided
// to avoid searching the cache for it once more.
this.tryInRepetitionRecovery(
prodFunc,
args,
lookaheadFunc,
expectTokAfterLastMatch
)
}
}

View File

@@ -0,0 +1,278 @@
import {
addNoneTerminalToCst,
addTerminalToCst,
setNodeLocationFull,
setNodeLocationOnlyOffset
} from "../../cst/cst"
import noop from "lodash/noop"
import has from "lodash/has"
import keys from "lodash/keys"
import isUndefined from "lodash/isUndefined"
import {
createBaseSemanticVisitorConstructor,
createBaseVisitorConstructorWithDefaults
} from "../../cst/cst_visitor"
import {
CstNode,
CstNodeLocation,
ICstVisitor,
IParserConfig,
IToken,
nodeLocationTrackingOptions
} from "@chevrotain/types"
import { MixedInParser } from "./parser_traits"
import { DEFAULT_PARSER_CONFIG } from "../parser"
/**
* This trait is responsible for the CST building logic.
*/
export class TreeBuilder {
outputCst: boolean
CST_STACK: CstNode[]
baseCstVisitorConstructor: Function
baseCstVisitorWithDefaultsConstructor: Function
// dynamically assigned Methods
setNodeLocationFromNode: (
nodeLocation: CstNodeLocation,
locationInformation: CstNodeLocation
) => void
setNodeLocationFromToken: (
nodeLocation: CstNodeLocation,
locationInformation: CstNodeLocation
) => void
cstPostRule: (this: MixedInParser, ruleCstNode: CstNode) => void
setInitialNodeLocation: (cstNode: CstNode) => void
nodeLocationTracking: nodeLocationTrackingOptions
initTreeBuilder(this: MixedInParser, config: IParserConfig) {
this.CST_STACK = []
// outputCst is no longer exposed/defined in the pubic API
this.outputCst = (config as any).outputCst
this.nodeLocationTracking = has(config, "nodeLocationTracking")
? (config.nodeLocationTracking as nodeLocationTrackingOptions) // assumes end user provides the correct config value/type
: DEFAULT_PARSER_CONFIG.nodeLocationTracking
if (!this.outputCst) {
this.cstInvocationStateUpdate = noop
this.cstFinallyStateUpdate = noop
this.cstPostTerminal = noop
this.cstPostNonTerminal = noop
this.cstPostRule = noop
} else {
if (/full/i.test(this.nodeLocationTracking)) {
if (this.recoveryEnabled) {
this.setNodeLocationFromToken = setNodeLocationFull
this.setNodeLocationFromNode = setNodeLocationFull
this.cstPostRule = noop
this.setInitialNodeLocation = this.setInitialNodeLocationFullRecovery
} else {
this.setNodeLocationFromToken = noop
this.setNodeLocationFromNode = noop
this.cstPostRule = this.cstPostRuleFull
this.setInitialNodeLocation = this.setInitialNodeLocationFullRegular
}
} else if (/onlyOffset/i.test(this.nodeLocationTracking)) {
if (this.recoveryEnabled) {
this.setNodeLocationFromToken = <any>setNodeLocationOnlyOffset
this.setNodeLocationFromNode = <any>setNodeLocationOnlyOffset
this.cstPostRule = noop
this.setInitialNodeLocation =
this.setInitialNodeLocationOnlyOffsetRecovery
} else {
this.setNodeLocationFromToken = noop
this.setNodeLocationFromNode = noop
this.cstPostRule = this.cstPostRuleOnlyOffset
this.setInitialNodeLocation =
this.setInitialNodeLocationOnlyOffsetRegular
}
} else if (/none/i.test(this.nodeLocationTracking)) {
this.setNodeLocationFromToken = noop
this.setNodeLocationFromNode = noop
this.cstPostRule = noop
this.setInitialNodeLocation = noop
} else {
throw Error(
`Invalid <nodeLocationTracking> config option: "${config.nodeLocationTracking}"`
)
}
}
}
setInitialNodeLocationOnlyOffsetRecovery(
this: MixedInParser,
cstNode: any
): void {
cstNode.location = {
startOffset: NaN,
endOffset: NaN
}
}
setInitialNodeLocationOnlyOffsetRegular(
this: MixedInParser,
cstNode: any
): void {
cstNode.location = {
// without error recovery the starting Location of a new CstNode is guaranteed
// To be the next Token's startOffset (for valid inputs).
// For invalid inputs there won't be any CSTOutput so this potential
// inaccuracy does not matter
startOffset: this.LA(1).startOffset,
endOffset: NaN
}
}
setInitialNodeLocationFullRecovery(this: MixedInParser, cstNode: any): void {
cstNode.location = {
startOffset: NaN,
startLine: NaN,
startColumn: NaN,
endOffset: NaN,
endLine: NaN,
endColumn: NaN
}
}
/**
* @see setInitialNodeLocationOnlyOffsetRegular for explanation why this work
* @param cstNode
*/
setInitialNodeLocationFullRegular(this: MixedInParser, cstNode: any): void {
const nextToken = this.LA(1)
cstNode.location = {
startOffset: nextToken.startOffset,
startLine: nextToken.startLine,
startColumn: nextToken.startColumn,
endOffset: NaN,
endLine: NaN,
endColumn: NaN
}
}
cstInvocationStateUpdate(this: MixedInParser, fullRuleName: string): void {
const cstNode: CstNode = {
name: fullRuleName,
children: Object.create(null)
}
this.setInitialNodeLocation(cstNode)
this.CST_STACK.push(cstNode)
}
cstFinallyStateUpdate(this: MixedInParser): void {
this.CST_STACK.pop()
}
cstPostRuleFull(this: MixedInParser, ruleCstNode: CstNode): void {
// casts to `required<CstNodeLocation>` are safe because `cstPostRuleFull` should only be invoked when full location is enabled
const prevToken = this.LA(0) as Required<CstNodeLocation>
const loc = ruleCstNode.location as Required<CstNodeLocation>
// If this condition is true it means we consumed at least one Token
// In this CstNode.
if (loc.startOffset <= prevToken.startOffset === true) {
loc.endOffset = prevToken.endOffset
loc.endLine = prevToken.endLine
loc.endColumn = prevToken.endColumn
}
// "empty" CstNode edge case
else {
loc.startOffset = NaN
loc.startLine = NaN
loc.startColumn = NaN
}
}
cstPostRuleOnlyOffset(this: MixedInParser, ruleCstNode: CstNode): void {
const prevToken = this.LA(0)
// `location' is not null because `cstPostRuleOnlyOffset` will only be invoked when location tracking is enabled.
const loc = ruleCstNode.location!
// If this condition is true it means we consumed at least one Token
// In this CstNode.
if (loc.startOffset <= prevToken.startOffset === true) {
loc.endOffset = prevToken.endOffset
}
// "empty" CstNode edge case
else {
loc.startOffset = NaN
}
}
cstPostTerminal(
this: MixedInParser,
key: string,
consumedToken: IToken
): void {
const rootCst = this.CST_STACK[this.CST_STACK.length - 1]
addTerminalToCst(rootCst, consumedToken, key)
// This is only used when **both** error recovery and CST Output are enabled.
this.setNodeLocationFromToken(rootCst.location!, <any>consumedToken)
}
cstPostNonTerminal(
this: MixedInParser,
ruleCstResult: CstNode,
ruleName: string
): void {
const preCstNode = this.CST_STACK[this.CST_STACK.length - 1]
addNoneTerminalToCst(preCstNode, ruleName, ruleCstResult)
// This is only used when **both** error recovery and CST Output are enabled.
this.setNodeLocationFromNode(preCstNode.location!, ruleCstResult.location!)
}
getBaseCstVisitorConstructor<IN = any, OUT = any>(
this: MixedInParser
): {
new (...args: any[]): ICstVisitor<IN, OUT>
} {
if (isUndefined(this.baseCstVisitorConstructor)) {
const newBaseCstVisitorConstructor = createBaseSemanticVisitorConstructor(
this.className,
keys(this.gastProductionsCache)
)
this.baseCstVisitorConstructor = newBaseCstVisitorConstructor
return newBaseCstVisitorConstructor
}
return <any>this.baseCstVisitorConstructor
}
getBaseCstVisitorConstructorWithDefaults<IN = any, OUT = any>(
this: MixedInParser
): {
new (...args: any[]): ICstVisitor<IN, OUT>
} {
if (isUndefined(this.baseCstVisitorWithDefaultsConstructor)) {
const newConstructor = createBaseVisitorConstructorWithDefaults(
this.className,
keys(this.gastProductionsCache),
this.getBaseCstVisitorConstructor()
)
this.baseCstVisitorWithDefaultsConstructor = newConstructor
return newConstructor
}
return <any>this.baseCstVisitorWithDefaultsConstructor
}
getLastExplicitRuleShortName(this: MixedInParser): number {
const ruleStack = this.RULE_STACK
return ruleStack[ruleStack.length - 1]
}
getPreviousExplicitRuleShortName(this: MixedInParser): number {
const ruleStack = this.RULE_STACK
return ruleStack[ruleStack.length - 2]
}
getLastExplicitRuleOccurrenceIndex(this: MixedInParser): number {
const occurrenceStack = this.RULE_OCCURRENCE_STACK
return occurrenceStack[occurrenceStack.length - 1]
}
}

View File

@@ -0,0 +1,16 @@
/**
* Helper common type definitions
* Particularly useful when expending the public API
* to include additional **internal** properties.
*/
import { IParserConfig, ParserMethod } from "@chevrotain/types"
export type ParserMethodInternal<ARGS extends unknown[], R> = ParserMethod<
ARGS,
R
> & {
ruleName: string
originalGrammarAction: Function
}
export type IParserConfigInternal = IParserConfig & { outputCst: boolean }

View File

@@ -0,0 +1,28 @@
export function applyMixins(derivedCtor: any, baseCtors: any[]) {
baseCtors.forEach((baseCtor) => {
const baseProto = baseCtor.prototype
Object.getOwnPropertyNames(baseProto).forEach((propName) => {
if (propName === "constructor") {
return
}
const basePropDescriptor = Object.getOwnPropertyDescriptor(
baseProto,
propName
)
// Handle Accessors
if (
basePropDescriptor &&
(basePropDescriptor.get || basePropDescriptor.set)
) {
Object.defineProperty(
derivedCtor.prototype,
propName,
basePropDescriptor
)
} else {
derivedCtor.prototype[propName] = baseCtor.prototype[propName]
}
})
})
}

1169
_node_modules/chevrotain/src/scan/lexer.ts generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
import { ILexerErrorMessageProvider, IToken } from "@chevrotain/types"
export const defaultLexerErrorProvider: ILexerErrorMessageProvider = {
buildUnableToPopLexerModeMessage(token: IToken): string {
return `Unable to pop Lexer Mode after encountering Token ->${token.image}<- The Mode Stack is empty`
},
buildUnexpectedCharactersMessage(
fullText: string,
startOffset: number,
length: number,
line?: number,
column?: number
): string {
return (
`unexpected character: ->${fullText.charAt(
startOffset
)}<- at offset: ${startOffset},` + ` skipped ${length} characters.`
)
}
}

View File

@@ -0,0 +1,969 @@
import {
analyzeTokenTypes,
charCodeToOptimizedIndex,
cloneEmptyGroups,
DEFAULT_MODE,
IAnalyzeResult,
IPatternConfig,
LineTerminatorOptimizedTester,
performRuntimeChecks,
performWarningRuntimeChecks,
SUPPORT_STICKY,
validatePatterns
} from "./lexer"
import noop from "lodash/noop"
import isEmpty from "lodash/isEmpty"
import isArray from "lodash/isArray"
import last from "lodash/last"
import reject from "lodash/reject"
import map from "lodash/map"
import forEach from "lodash/forEach"
import keys from "lodash/keys"
import isUndefined from "lodash/isUndefined"
import identity from "lodash/identity"
import assign from "lodash/assign"
import reduce from "lodash/reduce"
import clone from "lodash/clone"
import { PRINT_WARNING, timer, toFastProperties } from "@chevrotain/utils"
import { augmentTokenTypes } from "./tokens"
import {
CustomPatternMatcherFunc,
CustomPatternMatcherReturn,
ILexerConfig,
ILexerDefinitionError,
ILexingError,
IMultiModeLexerDefinition,
IToken,
TokenType
} from "@chevrotain/types"
import { defaultLexerErrorProvider } from "./lexer_errors_public"
import { clearRegExpParserCache } from "./reg_exp_parser"
export interface ILexingResult {
tokens: IToken[]
groups: { [groupName: string]: IToken[] }
errors: ILexingError[]
}
export enum LexerDefinitionErrorType {
MISSING_PATTERN,
INVALID_PATTERN,
EOI_ANCHOR_FOUND,
UNSUPPORTED_FLAGS_FOUND,
DUPLICATE_PATTERNS_FOUND,
INVALID_GROUP_TYPE_FOUND,
PUSH_MODE_DOES_NOT_EXIST,
MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE,
MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY,
MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST,
LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED,
SOI_ANCHOR_FOUND,
EMPTY_MATCH_PATTERN,
NO_LINE_BREAKS_FLAGS,
UNREACHABLE_PATTERN,
IDENTIFY_TERMINATOR,
CUSTOM_LINE_BREAK,
MULTI_MODE_LEXER_LONGER_ALT_NOT_IN_CURRENT_MODE
}
export interface IRegExpExec {
exec: CustomPatternMatcherFunc
}
const DEFAULT_LEXER_CONFIG: Required<ILexerConfig> = {
deferDefinitionErrorsHandling: false,
positionTracking: "full",
lineTerminatorsPattern: /\n|\r\n?/g,
lineTerminatorCharacters: ["\n", "\r"],
ensureOptimizations: false,
safeMode: false,
errorMessageProvider: defaultLexerErrorProvider,
traceInitPerf: false,
skipValidations: false,
recoveryEnabled: true
}
Object.freeze(DEFAULT_LEXER_CONFIG)
export class Lexer {
public static SKIPPED =
"This marks a skipped Token pattern, this means each token identified by it will" +
"be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace."
public static NA = /NOT_APPLICABLE/
public lexerDefinitionErrors: ILexerDefinitionError[] = []
public lexerDefinitionWarning: ILexerDefinitionError[] = []
protected patternIdxToConfig: Record<string, IPatternConfig[]> = {}
protected charCodeToPatternIdxToConfig: {
[modeName: string]: { [charCode: number]: IPatternConfig[] }
} = {}
protected modes: string[] = []
protected defaultMode!: string
protected emptyGroups: { [groupName: string]: IToken } = {}
private config: Required<ILexerConfig>
private trackStartLines: boolean = true
private trackEndLines: boolean = true
private hasCustom: boolean = false
private canModeBeOptimized: Record<string, boolean> = {}
private traceInitPerf!: boolean | number
private traceInitMaxIdent!: number
private traceInitIndent: number
constructor(
protected lexerDefinition: TokenType[] | IMultiModeLexerDefinition,
config: ILexerConfig = DEFAULT_LEXER_CONFIG
) {
if (typeof config === "boolean") {
throw Error(
"The second argument to the Lexer constructor is now an ILexerConfig Object.\n" +
"a boolean 2nd argument is no longer supported"
)
}
// todo: defaults func?
this.config = assign({}, DEFAULT_LEXER_CONFIG, config) as any
const traceInitVal = this.config.traceInitPerf
if (traceInitVal === true) {
this.traceInitMaxIdent = Infinity
this.traceInitPerf = true
} else if (typeof traceInitVal === "number") {
this.traceInitMaxIdent = traceInitVal
this.traceInitPerf = true
}
this.traceInitIndent = -1
this.TRACE_INIT("Lexer Constructor", () => {
let actualDefinition!: IMultiModeLexerDefinition
let hasOnlySingleMode = true
this.TRACE_INIT("Lexer Config handling", () => {
if (
this.config.lineTerminatorsPattern ===
DEFAULT_LEXER_CONFIG.lineTerminatorsPattern
) {
// optimized built-in implementation for the defaults definition of lineTerminators
this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester
} else {
if (
this.config.lineTerminatorCharacters ===
DEFAULT_LEXER_CONFIG.lineTerminatorCharacters
) {
throw Error(
"Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" +
"\tFor details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS"
)
}
}
if (config.safeMode && config.ensureOptimizations) {
throw Error(
'"safeMode" and "ensureOptimizations" flags are mutually exclusive.'
)
}
this.trackStartLines = /full|onlyStart/i.test(
this.config.positionTracking
)
this.trackEndLines = /full/i.test(this.config.positionTracking)
// Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition.
if (isArray(lexerDefinition)) {
actualDefinition = {
modes: { defaultMode: clone(lexerDefinition) },
defaultMode: DEFAULT_MODE
}
} else {
// no conversion needed, input should already be a IMultiModeLexerDefinition
hasOnlySingleMode = false
actualDefinition = clone(<IMultiModeLexerDefinition>lexerDefinition)
}
})
if (this.config.skipValidations === false) {
this.TRACE_INIT("performRuntimeChecks", () => {
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
performRuntimeChecks(
actualDefinition,
this.trackStartLines,
this.config.lineTerminatorCharacters
)
)
})
this.TRACE_INIT("performWarningRuntimeChecks", () => {
this.lexerDefinitionWarning = this.lexerDefinitionWarning.concat(
performWarningRuntimeChecks(
actualDefinition,
this.trackStartLines,
this.config.lineTerminatorCharacters
)
)
})
}
// for extra robustness to avoid throwing an none informative error message
actualDefinition.modes = actualDefinition.modes
? actualDefinition.modes
: {}
// an error of undefined TokenTypes will be detected in "performRuntimeChecks" above.
// this transformation is to increase robustness in the case of partially invalid lexer definition.
forEach(actualDefinition.modes, (currModeValue, currModeName) => {
actualDefinition.modes[currModeName] = reject<TokenType>(
currModeValue,
(currTokType) => isUndefined(currTokType)
)
})
const allModeNames = keys(actualDefinition.modes)
forEach(
actualDefinition.modes,
(currModDef: TokenType[], currModName) => {
this.TRACE_INIT(`Mode: <${currModName}> processing`, () => {
this.modes.push(currModName)
if (this.config.skipValidations === false) {
this.TRACE_INIT(`validatePatterns`, () => {
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
validatePatterns(currModDef, allModeNames)
)
})
}
// If definition errors were encountered, the analysis phase may fail unexpectedly/
// Considering a lexer with definition errors may never be used, there is no point
// to performing the analysis anyhow...
if (isEmpty(this.lexerDefinitionErrors)) {
augmentTokenTypes(currModDef)
let currAnalyzeResult!: IAnalyzeResult
this.TRACE_INIT(`analyzeTokenTypes`, () => {
currAnalyzeResult = analyzeTokenTypes(currModDef, {
lineTerminatorCharacters:
this.config.lineTerminatorCharacters,
positionTracking: config.positionTracking,
ensureOptimizations: config.ensureOptimizations,
safeMode: config.safeMode,
tracer: this.TRACE_INIT
})
})
this.patternIdxToConfig[currModName] =
currAnalyzeResult.patternIdxToConfig
this.charCodeToPatternIdxToConfig[currModName] =
currAnalyzeResult.charCodeToPatternIdxToConfig
this.emptyGroups = assign(
{},
this.emptyGroups,
currAnalyzeResult.emptyGroups
) as any
this.hasCustom = currAnalyzeResult.hasCustom || this.hasCustom
this.canModeBeOptimized[currModName] =
currAnalyzeResult.canBeOptimized
}
})
}
)
this.defaultMode = actualDefinition.defaultMode
if (
!isEmpty(this.lexerDefinitionErrors) &&
!this.config.deferDefinitionErrorsHandling
) {
const allErrMessages = map(this.lexerDefinitionErrors, (error) => {
return error.message
})
const allErrMessagesString = allErrMessages.join(
"-----------------------\n"
)
throw new Error(
"Errors detected in definition of Lexer:\n" + allErrMessagesString
)
}
// Only print warning if there are no errors, This will avoid pl
forEach(this.lexerDefinitionWarning, (warningDescriptor) => {
PRINT_WARNING(warningDescriptor.message)
})
this.TRACE_INIT("Choosing sub-methods implementations", () => {
// Choose the relevant internal implementations for this specific parser.
// These implementations should be in-lined by the JavaScript engine
// to provide optimal performance in each scenario.
if (SUPPORT_STICKY) {
this.chopInput = <any>identity
this.match = this.matchWithTest
} else {
this.updateLastIndex = noop
this.match = this.matchWithExec
}
if (hasOnlySingleMode) {
this.handleModes = noop
}
if (this.trackStartLines === false) {
this.computeNewColumn = identity
}
if (this.trackEndLines === false) {
this.updateTokenEndLineColumnLocation = noop
}
if (/full/i.test(this.config.positionTracking)) {
this.createTokenInstance = this.createFullToken
} else if (/onlyStart/i.test(this.config.positionTracking)) {
this.createTokenInstance = this.createStartOnlyToken
} else if (/onlyOffset/i.test(this.config.positionTracking)) {
this.createTokenInstance = this.createOffsetOnlyToken
} else {
throw Error(
`Invalid <positionTracking> config option: "${this.config.positionTracking}"`
)
}
if (this.hasCustom) {
this.addToken = this.addTokenUsingPush
this.handlePayload = this.handlePayloadWithCustom
} else {
this.addToken = this.addTokenUsingMemberAccess
this.handlePayload = this.handlePayloadNoCustom
}
})
this.TRACE_INIT("Failed Optimization Warnings", () => {
const unOptimizedModes = reduce(
this.canModeBeOptimized,
(cannotBeOptimized, canBeOptimized, modeName) => {
if (canBeOptimized === false) {
cannotBeOptimized.push(modeName)
}
return cannotBeOptimized
},
[] as string[]
)
if (config.ensureOptimizations && !isEmpty(unOptimizedModes)) {
throw Error(
`Lexer Modes: < ${unOptimizedModes.join(
", "
)} > cannot be optimized.\n` +
'\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' +
"\t Or inspect the console log for details on how to resolve these issues."
)
}
})
this.TRACE_INIT("clearRegExpParserCache", () => {
clearRegExpParserCache()
})
this.TRACE_INIT("toFastProperties", () => {
toFastProperties(this)
})
})
}
public tokenize(
text: string,
initialMode: string = this.defaultMode
): ILexingResult {
if (!isEmpty(this.lexerDefinitionErrors)) {
const allErrMessages = map(this.lexerDefinitionErrors, (error) => {
return error.message
})
const allErrMessagesString = allErrMessages.join(
"-----------------------\n"
)
throw new Error(
"Unable to Tokenize because Errors detected in definition of Lexer:\n" +
allErrMessagesString
)
}
return this.tokenizeInternal(text, initialMode)
}
// There is quite a bit of duplication between this and "tokenizeInternalLazy"
// This is intentional due to performance considerations.
// this method also used quite a bit of `!` none null assertions because it is too optimized
// for `tsc` to always understand it is "safe"
private tokenizeInternal(text: string, initialMode: string): ILexingResult {
let i,
j,
k,
matchAltImage,
longerAlt,
matchedImage: string | null,
payload,
altPayload,
imageLength,
group,
tokType,
newToken: IToken,
errLength,
droppedChar,
msg,
match
const orgText = text
const orgLength = orgText.length
let offset = 0
let matchedTokensIndex = 0
// initializing the tokensArray to the "guessed" size.
// guessing too little will still reduce the number of array re-sizes on pushes.
// guessing too large (Tested by guessing x4 too large) may cost a bit more of memory
// but would still have a faster runtime by avoiding (All but one) array resizing.
const guessedNumberOfTokens = this.hasCustom
? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements.
: Math.floor(text.length / 10)
const matchedTokens = new Array(guessedNumberOfTokens)
const errors: ILexingError[] = []
let line = this.trackStartLines ? 1 : undefined
let column = this.trackStartLines ? 1 : undefined
const groups: any = cloneEmptyGroups(this.emptyGroups)
const trackLines = this.trackStartLines
const lineTerminatorPattern = this.config.lineTerminatorsPattern
let currModePatternsLength = 0
let patternIdxToConfig: IPatternConfig[] = []
let currCharCodeToPatternIdxToConfig: {
[charCode: number]: IPatternConfig[]
} = []
const modeStack: string[] = []
const emptyArray: IPatternConfig[] = []
Object.freeze(emptyArray)
let getPossiblePatterns!: (charCode: number) => IPatternConfig[]
function getPossiblePatternsSlow() {
return patternIdxToConfig
}
function getPossiblePatternsOptimized(charCode: number): IPatternConfig[] {
const optimizedCharIdx = charCodeToOptimizedIndex(charCode)
const possiblePatterns =
currCharCodeToPatternIdxToConfig[optimizedCharIdx]
if (possiblePatterns === undefined) {
return emptyArray
} else {
return possiblePatterns
}
}
const pop_mode = (popToken: IToken) => {
// TODO: perhaps avoid this error in the edge case there is no more input?
if (
modeStack.length === 1 &&
// if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition"
// So no error should occur.
popToken.tokenType.PUSH_MODE === undefined
) {
// if we try to pop the last mode there lexer will no longer have ANY mode.
// thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode.
const msg =
this.config.errorMessageProvider.buildUnableToPopLexerModeMessage(
popToken
)
errors.push({
offset: popToken.startOffset,
line: popToken.startLine,
column: popToken.startColumn,
length: popToken.image.length,
message: msg
})
} else {
modeStack.pop()
const newMode = last(modeStack)!
patternIdxToConfig = this.patternIdxToConfig[newMode]
currCharCodeToPatternIdxToConfig =
this.charCodeToPatternIdxToConfig[newMode]
currModePatternsLength = patternIdxToConfig.length
const modeCanBeOptimized =
this.canModeBeOptimized[newMode] && this.config.safeMode === false
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
getPossiblePatterns = getPossiblePatternsOptimized
} else {
getPossiblePatterns = getPossiblePatternsSlow
}
}
}
function push_mode(this: Lexer, newMode: string) {
modeStack.push(newMode)
currCharCodeToPatternIdxToConfig =
this.charCodeToPatternIdxToConfig[newMode]
patternIdxToConfig = this.patternIdxToConfig[newMode]
currModePatternsLength = patternIdxToConfig.length
currModePatternsLength = patternIdxToConfig.length
const modeCanBeOptimized =
this.canModeBeOptimized[newMode] && this.config.safeMode === false
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
getPossiblePatterns = getPossiblePatternsOptimized
} else {
getPossiblePatterns = getPossiblePatternsSlow
}
}
// this pattern seems to avoid a V8 de-optimization, although that de-optimization does not
// seem to matter performance wise.
push_mode.call(this, initialMode)
let currConfig!: IPatternConfig
const recoveryEnabled = this.config.recoveryEnabled
while (offset < orgLength) {
matchedImage = null
const nextCharCode = orgText.charCodeAt(offset)
const chosenPatternIdxToConfig = getPossiblePatterns(nextCharCode)
const chosenPatternsLength = chosenPatternIdxToConfig.length
for (i = 0; i < chosenPatternsLength; i++) {
currConfig = chosenPatternIdxToConfig[i]
const currPattern = currConfig.pattern
payload = null
// manually in-lined because > 600 chars won't be in-lined in V8
const singleCharCode = currConfig.short
if (singleCharCode !== false) {
if (nextCharCode === singleCharCode) {
// single character string
matchedImage = currPattern as string
}
} else if (currConfig.isCustom === true) {
match = (currPattern as IRegExpExec).exec(
orgText,
offset,
matchedTokens,
groups
)
if (match !== null) {
matchedImage = match[0]
if ((match as CustomPatternMatcherReturn).payload !== undefined) {
payload = (match as CustomPatternMatcherReturn).payload
}
} else {
matchedImage = null
}
} else {
this.updateLastIndex(currPattern as RegExp, offset)
matchedImage = this.match(currPattern as RegExp, text, offset)
}
if (matchedImage !== null) {
// even though this pattern matched we must try a another longer alternative.
// this can be used to prioritize keywords over identifiers
longerAlt = currConfig.longerAlt
if (longerAlt !== undefined) {
// TODO: micro optimize, avoid extra prop access
// by saving/linking longerAlt on the original config?
const longerAltLength = longerAlt.length
for (k = 0; k < longerAltLength; k++) {
const longerAltConfig = patternIdxToConfig[longerAlt[k]]
const longerAltPattern = longerAltConfig.pattern
altPayload = null
// single Char can never be a longer alt so no need to test it.
// manually in-lined because > 600 chars won't be in-lined in V8
if (longerAltConfig.isCustom === true) {
match = (longerAltPattern as IRegExpExec).exec(
orgText,
offset,
matchedTokens,
groups
)
if (match !== null) {
matchAltImage = match[0]
if (
(match as CustomPatternMatcherReturn).payload !== undefined
) {
altPayload = (match as CustomPatternMatcherReturn).payload
}
} else {
matchAltImage = null
}
} else {
this.updateLastIndex(longerAltPattern as RegExp, offset)
matchAltImage = this.match(
longerAltPattern as RegExp,
text,
offset
)
}
if (matchAltImage && matchAltImage.length > matchedImage.length) {
matchedImage = matchAltImage
payload = altPayload
currConfig = longerAltConfig
// Exit the loop early after matching one of the longer alternatives
// The first matched alternative takes precedence
break
}
}
}
break
}
}
// successful match
if (matchedImage !== null) {
imageLength = matchedImage.length
group = currConfig.group
if (group !== undefined) {
tokType = currConfig.tokenTypeIdx
// TODO: "offset + imageLength" and the new column may be computed twice in case of "full" location information inside
// createFullToken method
newToken = this.createTokenInstance(
matchedImage,
offset,
tokType,
currConfig.tokenType,
line,
column,
imageLength
)
this.handlePayload(newToken, payload)
// TODO: optimize NOOP in case there are no special groups?
if (group === false) {
matchedTokensIndex = this.addToken(
matchedTokens,
matchedTokensIndex,
newToken
)
} else {
groups[group].push(newToken)
}
}
text = this.chopInput(text, imageLength)
offset = offset + imageLength
// TODO: with newlines the column may be assigned twice
column = this.computeNewColumn(column!, imageLength)
if (trackLines === true && currConfig.canLineTerminator === true) {
let numOfLTsInMatch = 0
let foundTerminator
let lastLTEndOffset: number
lineTerminatorPattern.lastIndex = 0
do {
foundTerminator = lineTerminatorPattern.test(matchedImage)
if (foundTerminator === true) {
lastLTEndOffset = lineTerminatorPattern.lastIndex - 1
numOfLTsInMatch++
}
} while (foundTerminator === true)
if (numOfLTsInMatch !== 0) {
line = line! + numOfLTsInMatch
column = imageLength - lastLTEndOffset!
this.updateTokenEndLineColumnLocation(
newToken!,
group!,
lastLTEndOffset!,
numOfLTsInMatch,
line,
column,
imageLength
)
}
}
// will be NOOP if no modes present
this.handleModes(currConfig, pop_mode, push_mode, newToken!)
} else {
// error recovery, drop characters until we identify a valid token's start point
const errorStartOffset = offset
const errorLine = line
const errorColumn = column
let foundResyncPoint = recoveryEnabled === false
while (foundResyncPoint === false && offset < orgLength) {
// Identity Func (when sticky flag is enabled)
text = this.chopInput(text, 1)
offset++
for (j = 0; j < currModePatternsLength; j++) {
const currConfig = patternIdxToConfig[j]
const currPattern = currConfig.pattern
// manually in-lined because > 600 chars won't be in-lined in V8
const singleCharCode = currConfig.short
if (singleCharCode !== false) {
if (orgText.charCodeAt(offset) === singleCharCode) {
// single character string
foundResyncPoint = true
}
} else if (currConfig.isCustom === true) {
foundResyncPoint =
(currPattern as IRegExpExec).exec(
orgText,
offset,
matchedTokens,
groups
) !== null
} else {
this.updateLastIndex(currPattern as RegExp, offset)
foundResyncPoint = (currPattern as RegExp).exec(text) !== null
}
if (foundResyncPoint === true) {
break
}
}
}
errLength = offset - errorStartOffset
// at this point we either re-synced or reached the end of the input text
msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage(
orgText,
errorStartOffset,
errLength,
errorLine,
errorColumn
)
errors.push({
offset: errorStartOffset,
line: errorLine,
column: errorColumn,
length: errLength,
message: msg
})
if (recoveryEnabled === false) {
break
}
}
}
// if we do have custom patterns which push directly into the
// TODO: custom tokens should not push directly??
if (!this.hasCustom) {
// if we guessed a too large size for the tokens array this will shrink it to the right size.
matchedTokens.length = matchedTokensIndex
}
return {
tokens: matchedTokens,
groups: groups,
errors: errors
}
}
private handleModes(
config: IPatternConfig,
pop_mode: (tok: IToken) => void,
push_mode: (this: Lexer, pushMode: string) => void,
newToken: IToken
) {
if (config.pop === true) {
// need to save the PUSH_MODE property as if the mode is popped
// patternIdxToPopMode is updated to reflect the new mode after popping the stack
const pushMode = config.push
pop_mode(newToken)
if (pushMode !== undefined) {
push_mode.call(this, pushMode)
}
} else if (config.push !== undefined) {
push_mode.call(this, config.push)
}
}
private chopInput(text: string, length: number): string {
return text.substring(length)
}
private updateLastIndex(regExp: RegExp, newLastIndex: number): void {
regExp.lastIndex = newLastIndex
}
// TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler
private updateTokenEndLineColumnLocation(
newToken: IToken,
group: string | false,
lastLTIdx: number,
numOfLTsInMatch: number,
line: number,
column: number,
imageLength: number
): void {
let lastCharIsLT, fixForEndingInLT
if (group !== undefined) {
// a none skipped multi line Token, need to update endLine/endColumn
lastCharIsLT = lastLTIdx === imageLength - 1
fixForEndingInLT = lastCharIsLT ? -1 : 0
if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) {
// if a token ends in a LT that last LT only affects the line numbering of following Tokens
newToken.endLine = line + fixForEndingInLT
// the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd)
// inclusive to exclusive range.
newToken.endColumn = column - 1 + -fixForEndingInLT
}
// else single LT in the last character of a token, no need to modify the endLine/EndColumn
}
}
private computeNewColumn(oldColumn: number, imageLength: number) {
return oldColumn + imageLength
}
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
/* istanbul ignore next - place holder */
private createTokenInstance!: (...args: any[]) => IToken
private createOffsetOnlyToken(
image: string,
startOffset: number,
tokenTypeIdx: number,
tokenType: TokenType
) {
return {
image,
startOffset,
tokenTypeIdx,
tokenType
}
}
private createStartOnlyToken(
image: string,
startOffset: number,
tokenTypeIdx: number,
tokenType: TokenType,
startLine: number,
startColumn: number
) {
return {
image,
startOffset,
startLine,
startColumn,
tokenTypeIdx,
tokenType
}
}
private createFullToken(
image: string,
startOffset: number,
tokenTypeIdx: number,
tokenType: TokenType,
startLine: number,
startColumn: number,
imageLength: number
): IToken {
return {
image,
startOffset,
endOffset: startOffset + imageLength - 1,
startLine,
endLine: startLine,
startColumn,
endColumn: startColumn + imageLength - 1,
tokenTypeIdx,
tokenType
}
}
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
/* istanbul ignore next - place holder */
private addToken!: (
tokenVector: IToken[],
index: number,
tokenToAdd: IToken
) => number
private addTokenUsingPush(
tokenVector: IToken[],
index: number,
tokenToAdd: IToken
): number {
tokenVector.push(tokenToAdd)
return index
}
private addTokenUsingMemberAccess(
tokenVector: IToken[],
index: number,
tokenToAdd: IToken
): number {
tokenVector[index] = tokenToAdd
index++
return index
}
// Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime.
private handlePayload: (token: IToken, payload: any) => void
private handlePayloadNoCustom(token: IToken, payload: any): void {}
private handlePayloadWithCustom(token: IToken, payload: any): void {
if (payload !== null) {
token.payload = payload
}
}
// place holder to be replaced with chosen alternative at runtime
private match!: (
pattern: RegExp,
text: string,
offset: number
) => string | null
private matchWithTest(
pattern: RegExp,
text: string,
offset: number
): string | null {
const found = pattern.test(text)
if (found === true) {
return text.substring(offset, pattern.lastIndex)
}
return null
}
private matchWithExec(pattern: RegExp, text: string): string | null {
const regExpArray = pattern.exec(text)
return regExpArray !== null ? regExpArray[0] : null
}
// Duplicated from the parser's perf trace trait to allow future extraction
// of the lexer to a separate package.
TRACE_INIT = <T>(phaseDesc: string, phaseImpl: () => T): T => {
// No need to optimize this using NOOP pattern because
// It is not called in a hot spot...
if (this.traceInitPerf === true) {
this.traceInitIndent++
const indent = new Array(this.traceInitIndent + 1).join("\t")
if (this.traceInitIndent < this.traceInitMaxIdent) {
console.log(`${indent}--> <${phaseDesc}>`)
}
const { time, value } = timer(phaseImpl)
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
const traceMethod = time > 10 ? console.warn : console.log
if (this.traceInitIndent < this.traceInitMaxIdent) {
traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`)
}
this.traceInitIndent--
return value
} else {
return phaseImpl()
}
}
}

View File

@@ -0,0 +1,320 @@
import {
Alternative,
Atom,
BaseRegExpVisitor,
Character,
Disjunction,
Group,
Set,
Term,
VERSION
} from "regexp-to-ast"
import isArray from "lodash/isArray"
import every from "lodash/every"
import forEach from "lodash/forEach"
import find from "lodash/find"
import values from "lodash/values"
import includes from "lodash/includes"
import { PRINT_ERROR, PRINT_WARNING } from "@chevrotain/utils"
import { ASTNode, getRegExpAst } from "./reg_exp_parser"
import { charCodeToOptimizedIndex, minOptimizationVal } from "./lexer"
const complementErrorMessage =
"Complement Sets are not supported for first char optimization"
export const failedOptimizationPrefixMsg =
'Unable to use "first char" lexer optimizations:\n'
export function getOptimizedStartCodesIndices(
regExp: RegExp,
ensureOptimizations = false
): number[] {
try {
const ast = getRegExpAst(regExp)
const firstChars = firstCharOptimizedIndices(
ast.value,
{},
ast.flags.ignoreCase
)
return firstChars
} catch (e) {
/* istanbul ignore next */
// Testing this relies on the regexp-to-ast library having a bug... */
// TODO: only the else branch needs to be ignored, try to fix with newer prettier / tsc
if (e.message === complementErrorMessage) {
if (ensureOptimizations) {
PRINT_WARNING(
`${failedOptimizationPrefixMsg}` +
`\tUnable to optimize: < ${regExp.toString()} >\n` +
"\tComplement Sets cannot be automatically optimized.\n" +
"\tThis will disable the lexer's first char optimizations.\n" +
"\tSee: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details."
)
}
} else {
let msgSuffix = ""
if (ensureOptimizations) {
msgSuffix =
"\n\tThis will disable the lexer's first char optimizations.\n" +
"\tSee: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details."
}
PRINT_ERROR(
`${failedOptimizationPrefixMsg}\n` +
`\tFailed parsing: < ${regExp.toString()} >\n` +
`\tUsing the regexp-to-ast library version: ${VERSION}\n` +
"\tPlease open an issue at: https://github.com/bd82/regexp-to-ast/issues" +
msgSuffix
)
}
}
return []
}
export function firstCharOptimizedIndices(
ast: ASTNode,
result: { [charCode: number]: number },
ignoreCase: boolean
): number[] {
switch (ast.type) {
case "Disjunction":
for (let i = 0; i < ast.value.length; i++) {
firstCharOptimizedIndices(ast.value[i], result, ignoreCase)
}
break
case "Alternative":
const terms = ast.value
for (let i = 0; i < terms.length; i++) {
const term = terms[i]
// skip terms that cannot effect the first char results
switch (term.type) {
case "EndAnchor":
// A group back reference cannot affect potential starting char.
// because if a back reference is the first production than automatically
// the group being referenced has had to come BEFORE so its codes have already been added
case "GroupBackReference":
// assertions do not affect potential starting codes
case "Lookahead":
case "NegativeLookahead":
case "StartAnchor":
case "WordBoundary":
case "NonWordBoundary":
continue
}
const atom = term
switch (atom.type) {
case "Character":
addOptimizedIdxToResult(atom.value, result, ignoreCase)
break
case "Set":
if (atom.complement === true) {
throw Error(complementErrorMessage)
}
forEach(atom.value, (code) => {
if (typeof code === "number") {
addOptimizedIdxToResult(code, result, ignoreCase)
} else {
// range
const range = code as any
// cannot optimize when ignoreCase is
if (ignoreCase === true) {
for (
let rangeCode = range.from;
rangeCode <= range.to;
rangeCode++
) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
}
}
// Optimization (2 orders of magnitude less work for very large ranges)
else {
// handle unoptimized values
for (
let rangeCode = range.from;
rangeCode <= range.to && rangeCode < minOptimizationVal;
rangeCode++
) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
}
// Less common charCode where we optimize for faster init time, by using larger "buckets"
if (range.to >= minOptimizationVal) {
const minUnOptVal =
range.from >= minOptimizationVal
? range.from
: minOptimizationVal
const maxUnOptVal = range.to
const minOptIdx = charCodeToOptimizedIndex(minUnOptVal)
const maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal)
for (
let currOptIdx = minOptIdx;
currOptIdx <= maxOptIdx;
currOptIdx++
) {
result[currOptIdx] = currOptIdx
}
}
}
}
})
break
case "Group":
firstCharOptimizedIndices(atom.value, result, ignoreCase)
break
/* istanbul ignore next */
default:
throw Error("Non Exhaustive Match")
}
// reached a mandatory production, no more **start** codes can be found on this alternative
const isOptionalQuantifier =
atom.quantifier !== undefined && atom.quantifier.atLeast === 0
if (
// A group may be optional due to empty contents /(?:)/
// or if everything inside it is optional /((a)?)/
(atom.type === "Group" && isWholeOptional(atom) === false) ||
// If this term is not a group it may only be optional if it has an optional quantifier
(atom.type !== "Group" && isOptionalQuantifier === false)
) {
break
}
}
break
/* istanbul ignore next */
default:
throw Error("non exhaustive match!")
}
// console.log(Object.keys(result).length)
return values(result)
}
function addOptimizedIdxToResult(
code: number,
result: { [charCode: number]: number },
ignoreCase: boolean
) {
const optimizedCharIdx = charCodeToOptimizedIndex(code)
result[optimizedCharIdx] = optimizedCharIdx
if (ignoreCase === true) {
handleIgnoreCase(code, result)
}
}
function handleIgnoreCase(
code: number,
result: { [charCode: number]: number }
) {
const char = String.fromCharCode(code)
const upperChar = char.toUpperCase()
/* istanbul ignore else */
if (upperChar !== char) {
const optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0))
result[optimizedCharIdx] = optimizedCharIdx
} else {
const lowerChar = char.toLowerCase()
if (lowerChar !== char) {
const optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0))
result[optimizedCharIdx] = optimizedCharIdx
}
}
}
function findCode(setNode: Set, targetCharCodes: number[]) {
return find(setNode.value, (codeOrRange) => {
if (typeof codeOrRange === "number") {
return includes(targetCharCodes, codeOrRange)
} else {
// range
const range = <any>codeOrRange
return (
find(
targetCharCodes,
(targetCode) => range.from <= targetCode && targetCode <= range.to
) !== undefined
)
}
})
}
function isWholeOptional(ast: any): boolean {
const quantifier = (ast as Atom).quantifier
if (quantifier && quantifier.atLeast === 0) {
return true
}
if (!ast.value) {
return false
}
return isArray(ast.value)
? every(ast.value, isWholeOptional)
: isWholeOptional(ast.value)
}
class CharCodeFinder extends BaseRegExpVisitor {
found: boolean = false
constructor(private targetCharCodes: number[]) {
super()
}
visitChildren(node: ASTNode) {
// No need to keep looking...
if (this.found === true) {
return
}
// switch lookaheads as they do not actually consume any characters thus
// finding a charCode at lookahead context does not mean that regexp can actually contain it in a match.
switch (node.type) {
case "Lookahead":
this.visitLookahead(node)
return
case "NegativeLookahead":
this.visitNegativeLookahead(node)
return
}
super.visitChildren(node)
}
visitCharacter(node: Character) {
if (includes(this.targetCharCodes, node.value)) {
this.found = true
}
}
visitSet(node: Set) {
if (node.complement) {
if (findCode(node, this.targetCharCodes) === undefined) {
this.found = true
}
} else {
if (findCode(node, this.targetCharCodes) !== undefined) {
this.found = true
}
}
}
}
export function canMatchCharCode(
charCodes: number[],
pattern: RegExp | string
) {
if (pattern instanceof RegExp) {
const ast = getRegExpAst(pattern)
const charCodeFinder = new CharCodeFinder(charCodes)
charCodeFinder.visit(ast)
return charCodeFinder.found
} else {
return (
find(<any>pattern, (char) => {
return includes(charCodes, (<string>char).charCodeAt(0))
}) !== undefined
)
}
}

View File

@@ -0,0 +1,34 @@
import {
Alternative,
Assertion,
Atom,
Disjunction,
RegExpParser,
RegExpPattern
} from "regexp-to-ast"
let regExpAstCache: { [regex: string]: RegExpPattern } = {}
const regExpParser = new RegExpParser()
// this should be moved to regexp-to-ast
export type ASTNode =
| RegExpPattern
| Disjunction
| Alternative
| Assertion
| Atom
export function getRegExpAst(regExp: RegExp): RegExpPattern {
const regExpStr = regExp.toString()
if (regExpAstCache.hasOwnProperty(regExpStr)) {
return regExpAstCache[regExpStr]
} else {
const regExpAst = regExpParser.pattern(regExpStr)
regExpAstCache[regExpStr] = regExpAst
return regExpAst
}
}
export function clearRegExpParserCache() {
regExpAstCache = {}
}

View File

@@ -0,0 +1,165 @@
import isEmpty from "lodash/isEmpty"
import compact from "lodash/compact"
import isArray from "lodash/isArray"
import flatten from "lodash/flatten"
import difference from "lodash/difference"
import map from "lodash/map"
import forEach from "lodash/forEach"
import has from "lodash/has"
import includes from "lodash/includes"
import clone from "lodash/clone"
import { IToken, TokenType } from "@chevrotain/types"
export function tokenStructuredMatcher(
tokInstance: IToken,
tokConstructor: TokenType
) {
const instanceType = tokInstance.tokenTypeIdx
if (instanceType === tokConstructor.tokenTypeIdx) {
return true
} else {
return (
tokConstructor.isParent === true &&
tokConstructor.categoryMatchesMap![instanceType] === true
)
}
}
// Optimized tokenMatcher in case our grammar does not use token categories
// Being so tiny it is much more likely to be in-lined and this avoid the function call overhead
export function tokenStructuredMatcherNoCategories(
token: IToken,
tokType: TokenType
) {
return token.tokenTypeIdx === tokType.tokenTypeIdx
}
export let tokenShortNameIdx = 1
export const tokenIdxToClass: { [tokenIdx: number]: TokenType } = {}
export function augmentTokenTypes(tokenTypes: TokenType[]): void {
// collect the parent Token Types as well.
const tokenTypesAndParents = expandCategories(tokenTypes)
// add required tokenType and categoryMatches properties
assignTokenDefaultProps(tokenTypesAndParents)
// fill up the categoryMatches
assignCategoriesMapProp(tokenTypesAndParents)
assignCategoriesTokensProp(tokenTypesAndParents)
forEach(tokenTypesAndParents, (tokType) => {
tokType.isParent = tokType.categoryMatches!.length > 0
})
}
export function expandCategories(tokenTypes: TokenType[]): TokenType[] {
let result = clone(tokenTypes)
let categories = tokenTypes
let searching = true
while (searching) {
categories = compact(
flatten(map(categories, (currTokType) => currTokType.CATEGORIES))
)
const newCategories = difference(categories, result)
result = result.concat(newCategories)
if (isEmpty(newCategories)) {
searching = false
} else {
categories = newCategories
}
}
return result
}
export function assignTokenDefaultProps(tokenTypes: TokenType[]): void {
forEach(tokenTypes, (currTokType) => {
if (!hasShortKeyProperty(currTokType)) {
tokenIdxToClass[tokenShortNameIdx] = currTokType
;(<any>currTokType).tokenTypeIdx = tokenShortNameIdx++
}
// CATEGORIES? : TokenType | TokenType[]
if (
hasCategoriesProperty(currTokType) &&
!isArray(currTokType.CATEGORIES)
// &&
// !isUndefined(currTokType.CATEGORIES.PATTERN)
) {
currTokType.CATEGORIES = [currTokType.CATEGORIES as unknown as TokenType]
}
if (!hasCategoriesProperty(currTokType)) {
currTokType.CATEGORIES = []
}
if (!hasExtendingTokensTypesProperty(currTokType)) {
currTokType.categoryMatches = []
}
if (!hasExtendingTokensTypesMapProperty(currTokType)) {
currTokType.categoryMatchesMap = {}
}
})
}
export function assignCategoriesTokensProp(tokenTypes: TokenType[]): void {
forEach(tokenTypes, (currTokType) => {
// avoid duplications
currTokType.categoryMatches = []
forEach(currTokType.categoryMatchesMap!, (val, key) => {
currTokType.categoryMatches!.push(
tokenIdxToClass[key as unknown as number].tokenTypeIdx!
)
})
})
}
export function assignCategoriesMapProp(tokenTypes: TokenType[]): void {
forEach(tokenTypes, (currTokType) => {
singleAssignCategoriesToksMap([], currTokType)
})
}
export function singleAssignCategoriesToksMap(
path: TokenType[],
nextNode: TokenType
): void {
forEach(path, (pathNode) => {
nextNode.categoryMatchesMap![pathNode.tokenTypeIdx!] = true
})
forEach(nextNode.CATEGORIES, (nextCategory) => {
const newPath = path.concat(nextNode)
// avoids infinite loops due to cyclic categories.
if (!includes(newPath, nextCategory)) {
singleAssignCategoriesToksMap(newPath, nextCategory)
}
})
}
export function hasShortKeyProperty(tokType: TokenType): boolean {
return has(tokType, "tokenTypeIdx")
}
export function hasCategoriesProperty(tokType: TokenType): boolean {
return has(tokType, "CATEGORIES")
}
export function hasExtendingTokensTypesProperty(tokType: TokenType): boolean {
return has(tokType, "categoryMatches")
}
export function hasExtendingTokensTypesMapProperty(
tokType: TokenType
): boolean {
return has(tokType, "categoryMatchesMap")
}
export function isTokenType(tokType: TokenType): boolean {
return has(tokType, "tokenTypeIdx")
}

View File

@@ -0,0 +1 @@
export const EOF_TOKEN_TYPE = 1

View File

@@ -0,0 +1,123 @@
import isString from "lodash/isString"
import has from "lodash/has"
import isUndefined from "lodash/isUndefined"
import { Lexer } from "./lexer_public"
import { augmentTokenTypes, tokenStructuredMatcher } from "./tokens"
import { IToken, ITokenConfig, TokenType } from "@chevrotain/types"
export function tokenLabel(tokType: TokenType): string {
if (hasTokenLabel(tokType)) {
return tokType.LABEL
} else {
return tokType.name
}
}
export function tokenName(tokType: TokenType): string {
return tokType.name
}
export function hasTokenLabel(
obj: TokenType
): obj is TokenType & Pick<Required<TokenType>, "LABEL"> {
return isString(obj.LABEL) && obj.LABEL !== ""
}
const PARENT = "parent"
const CATEGORIES = "categories"
const LABEL = "label"
const GROUP = "group"
const PUSH_MODE = "push_mode"
const POP_MODE = "pop_mode"
const LONGER_ALT = "longer_alt"
const LINE_BREAKS = "line_breaks"
const START_CHARS_HINT = "start_chars_hint"
export function createToken(config: ITokenConfig): TokenType {
return createTokenInternal(config)
}
function createTokenInternal(config: ITokenConfig): TokenType {
const pattern = config.pattern
const tokenType: TokenType = <any>{}
tokenType.name = config.name
if (!isUndefined(pattern)) {
tokenType.PATTERN = pattern
}
if (has(config, PARENT)) {
throw (
"The parent property is no longer supported.\n" +
"See: https://github.com/chevrotain/chevrotain/issues/564#issuecomment-349062346 for details."
)
}
if (has(config, CATEGORIES)) {
// casting to ANY as this will be fixed inside `augmentTokenTypes``
tokenType.CATEGORIES = <any>config[CATEGORIES]
}
augmentTokenTypes([tokenType])
if (has(config, LABEL)) {
tokenType.LABEL = config[LABEL]
}
if (has(config, GROUP)) {
tokenType.GROUP = config[GROUP]
}
if (has(config, POP_MODE)) {
tokenType.POP_MODE = config[POP_MODE]
}
if (has(config, PUSH_MODE)) {
tokenType.PUSH_MODE = config[PUSH_MODE]
}
if (has(config, LONGER_ALT)) {
tokenType.LONGER_ALT = config[LONGER_ALT]
}
if (has(config, LINE_BREAKS)) {
tokenType.LINE_BREAKS = config[LINE_BREAKS]
}
if (has(config, START_CHARS_HINT)) {
tokenType.START_CHARS_HINT = config[START_CHARS_HINT]
}
return tokenType
}
export const EOF = createToken({ name: "EOF", pattern: Lexer.NA })
augmentTokenTypes([EOF])
export function createTokenInstance(
tokType: TokenType,
image: string,
startOffset: number,
endOffset: number,
startLine: number,
endLine: number,
startColumn: number,
endColumn: number
): IToken {
return {
image,
startOffset,
endOffset,
startLine,
endLine,
startColumn,
endColumn,
tokenTypeIdx: (<any>tokType).tokenTypeIdx,
tokenType: tokType
}
}
export function tokenMatcher(token: IToken, tokType: TokenType): boolean {
return tokenStructuredMatcher(token, tokType)
}

View File

@@ -0,0 +1,46 @@
export interface IRange {
start: number
end: number
contains(num: number): boolean
containsRange(other: IRange): boolean
isContainedInRange(other: IRange): boolean
strictlyContainsRange(other: IRange): boolean
isStrictlyContainedInRange(other: IRange): boolean
}
export class Range implements IRange {
constructor(public start: number, public end: number) {
if (!isValidRange(start, end)) {
throw new Error("INVALID RANGE")
}
}
contains(num: number): boolean {
return this.start <= num && this.end >= num
}
containsRange(other: IRange): boolean {
return this.start <= other.start && this.end >= other.end
}
isContainedInRange(other: IRange): boolean {
return other.containsRange(this)
}
strictlyContainsRange(other: IRange): boolean {
return this.start < other.start && this.end > other.end
}
isStrictlyContainedInRange(other: IRange): boolean {
return other.strictlyContainsRange(this)
}
}
export function isValidRange(start: number, end: number): boolean {
return !(start < 0 || end < start)
}

4
_node_modules/chevrotain/src/version.ts generated Normal file
View File

@@ -0,0 +1,4 @@
// needs a separate module as this is required inside chevrotain productive code
// and also in the entry point for webpack(api.ts).
// A separate file avoids cyclic dependencies and webpack errors.
export const VERSION = "10.5.0"