Erster Docker-Stand
This commit is contained in:
87
_node_modules/chevrotain/src/api.ts
generated
Normal file
87
_node_modules/chevrotain/src/api.ts
generated
Normal file
@@ -0,0 +1,87 @@
|
||||
/* istanbul ignore file - tricky to import some things from this module during testing */
|
||||
|
||||
// semantic version
|
||||
export { VERSION } from "./version"
|
||||
|
||||
export {
|
||||
CstParser,
|
||||
EmbeddedActionsParser,
|
||||
ParserDefinitionErrorType,
|
||||
EMPTY_ALT
|
||||
} from "./parse/parser/parser"
|
||||
|
||||
export { Lexer, LexerDefinitionErrorType } from "./scan/lexer_public"
|
||||
|
||||
// Tokens utilities
|
||||
export {
|
||||
createToken,
|
||||
createTokenInstance,
|
||||
EOF,
|
||||
tokenLabel,
|
||||
tokenMatcher,
|
||||
tokenName
|
||||
} from "./scan/tokens_public"
|
||||
|
||||
// Lookahead
|
||||
|
||||
export { getLookaheadPaths } from "./parse/grammar/lookahead"
|
||||
|
||||
export { LLkLookaheadStrategy } from "./parse/grammar/llk_lookahead"
|
||||
|
||||
// Other Utilities
|
||||
|
||||
export { defaultParserErrorProvider } from "./parse/errors_public"
|
||||
|
||||
export {
|
||||
EarlyExitException,
|
||||
isRecognitionException,
|
||||
MismatchedTokenException,
|
||||
NotAllInputParsedException,
|
||||
NoViableAltException
|
||||
} from "./parse/exceptions_public"
|
||||
|
||||
export { defaultLexerErrorProvider } from "./scan/lexer_errors_public"
|
||||
|
||||
// grammar reflection API
|
||||
export {
|
||||
Alternation,
|
||||
Alternative,
|
||||
NonTerminal,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Rule,
|
||||
Terminal
|
||||
} from "@chevrotain/gast"
|
||||
|
||||
// GAST Utilities
|
||||
|
||||
export {
|
||||
serializeGrammar,
|
||||
serializeProduction,
|
||||
GAstVisitor
|
||||
} from "@chevrotain/gast"
|
||||
|
||||
export { generateCstDts } from "@chevrotain/cst-dts-gen"
|
||||
|
||||
/* istanbul ignore next */
|
||||
export function clearCache() {
|
||||
console.warn(
|
||||
"The clearCache function was 'soft' removed from the Chevrotain API." +
|
||||
"\n\t It performs no action other than printing this message." +
|
||||
"\n\t Please avoid using it as it will be completely removed in the future"
|
||||
)
|
||||
}
|
||||
|
||||
export { createSyntaxDiagramsCode } from "./diagrams/render_public"
|
||||
|
||||
export class Parser {
|
||||
constructor() {
|
||||
throw new Error(
|
||||
"The Parser class has been deprecated, use CstParser or EmbeddedActionsParser instead.\t\n" +
|
||||
"See: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_7-0-0"
|
||||
)
|
||||
}
|
||||
}
|
||||
53
_node_modules/chevrotain/src/diagrams/render_public.ts
generated
Normal file
53
_node_modules/chevrotain/src/diagrams/render_public.ts
generated
Normal file
@@ -0,0 +1,53 @@
|
||||
import { VERSION } from "../version"
|
||||
import { ISerializedGast } from "@chevrotain/types"
|
||||
|
||||
export function createSyntaxDiagramsCode(
|
||||
grammar: ISerializedGast[],
|
||||
{
|
||||
resourceBase = `https://unpkg.com/chevrotain@${VERSION}/diagrams/`,
|
||||
css = `https://unpkg.com/chevrotain@${VERSION}/diagrams/diagrams.css`
|
||||
}: {
|
||||
resourceBase?: string
|
||||
css?: string
|
||||
} = {}
|
||||
) {
|
||||
const header = `
|
||||
<!-- This is a generated file -->
|
||||
<!DOCTYPE html>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
body {
|
||||
background-color: hsl(30, 20%, 95%)
|
||||
}
|
||||
</style>
|
||||
|
||||
`
|
||||
const cssHtml = `
|
||||
<link rel='stylesheet' href='${css}'>
|
||||
`
|
||||
|
||||
const scripts = `
|
||||
<script src='${resourceBase}vendor/railroad-diagrams.js'></script>
|
||||
<script src='${resourceBase}src/diagrams_builder.js'></script>
|
||||
<script src='${resourceBase}src/diagrams_behavior.js'></script>
|
||||
<script src='${resourceBase}src/main.js'></script>
|
||||
`
|
||||
const diagramsDiv = `
|
||||
<div id="diagrams" align="center"></div>
|
||||
`
|
||||
const serializedGrammar = `
|
||||
<script>
|
||||
window.serializedGrammar = ${JSON.stringify(grammar, null, " ")};
|
||||
</script>
|
||||
`
|
||||
|
||||
const initLogic = `
|
||||
<script>
|
||||
var diagramsDiv = document.getElementById("diagrams");
|
||||
main.drawDiagramsFromSerializedGrammar(serializedGrammar, diagramsDiv);
|
||||
</script>
|
||||
`
|
||||
return (
|
||||
header + cssHtml + scripts + diagramsDiv + serializedGrammar + initLogic
|
||||
)
|
||||
}
|
||||
10
_node_modules/chevrotain/src/lang/lang_extensions.ts
generated
Normal file
10
_node_modules/chevrotain/src/lang/lang_extensions.ts
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
const NAME = "name"
|
||||
|
||||
export function defineNameProp(obj: {}, nameValue: string): void {
|
||||
Object.defineProperty(obj, NAME, {
|
||||
enumerable: false,
|
||||
configurable: true,
|
||||
writable: false,
|
||||
value: nameValue
|
||||
})
|
||||
}
|
||||
2
_node_modules/chevrotain/src/parse/constants.ts
generated
Normal file
2
_node_modules/chevrotain/src/parse/constants.ts
generated
Normal file
@@ -0,0 +1,2 @@
|
||||
// TODO: can this be removed? where is it used?
|
||||
export const IN = "_~IN~_"
|
||||
87
_node_modules/chevrotain/src/parse/cst/cst.ts
generated
Normal file
87
_node_modules/chevrotain/src/parse/cst/cst.ts
generated
Normal file
@@ -0,0 +1,87 @@
|
||||
import { CstNode, CstNodeLocation, IToken } from "@chevrotain/types"
|
||||
|
||||
/**
|
||||
* This nodeLocation tracking is not efficient and should only be used
|
||||
* when error recovery is enabled or the Token Vector contains virtual Tokens
|
||||
* (e.g, Python Indent/Outdent)
|
||||
* As it executes the calculation for every single terminal/nonTerminal
|
||||
* and does not rely on the fact the token vector is **sorted**
|
||||
*/
|
||||
export function setNodeLocationOnlyOffset(
|
||||
currNodeLocation: CstNodeLocation,
|
||||
newLocationInfo: Required<Pick<IToken, "startOffset" | "endOffset">>
|
||||
): void {
|
||||
// First (valid) update for this cst node
|
||||
if (isNaN(currNodeLocation.startOffset) === true) {
|
||||
// assumption1: Token location information is either NaN or a valid number
|
||||
// assumption2: Token location information is fully valid if it exist
|
||||
// (both start/end offsets exist and are numbers).
|
||||
currNodeLocation.startOffset = newLocationInfo.startOffset
|
||||
currNodeLocation.endOffset = newLocationInfo.endOffset
|
||||
}
|
||||
// Once the startOffset has been updated with a valid number it should never receive
|
||||
// any farther updates as the Token vector is sorted.
|
||||
// We still have to check this this condition for every new possible location info
|
||||
// because with error recovery enabled we may encounter invalid tokens (NaN location props)
|
||||
else if (currNodeLocation.endOffset! < newLocationInfo.endOffset === true) {
|
||||
currNodeLocation.endOffset = newLocationInfo.endOffset
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This nodeLocation tracking is not efficient and should only be used
|
||||
* when error recovery is enabled or the Token Vector contains virtual Tokens
|
||||
* (e.g, Python Indent/Outdent)
|
||||
* As it executes the calculation for every single terminal/nonTerminal
|
||||
* and does not rely on the fact the token vector is **sorted**
|
||||
*/
|
||||
export function setNodeLocationFull(
|
||||
currNodeLocation: CstNodeLocation,
|
||||
newLocationInfo: CstNodeLocation
|
||||
): void {
|
||||
// First (valid) update for this cst node
|
||||
if (isNaN(currNodeLocation.startOffset) === true) {
|
||||
// assumption1: Token location information is either NaN or a valid number
|
||||
// assumption2: Token location information is fully valid if it exist
|
||||
// (all start/end props exist and are numbers).
|
||||
currNodeLocation.startOffset = newLocationInfo.startOffset
|
||||
currNodeLocation.startColumn = newLocationInfo.startColumn
|
||||
currNodeLocation.startLine = newLocationInfo.startLine
|
||||
currNodeLocation.endOffset = newLocationInfo.endOffset
|
||||
currNodeLocation.endColumn = newLocationInfo.endColumn
|
||||
currNodeLocation.endLine = newLocationInfo.endLine
|
||||
}
|
||||
// Once the start props has been updated with a valid number it should never receive
|
||||
// any farther updates as the Token vector is sorted.
|
||||
// We still have to check this this condition for every new possible location info
|
||||
// because with error recovery enabled we may encounter invalid tokens (NaN location props)
|
||||
else if (currNodeLocation.endOffset! < newLocationInfo.endOffset! === true) {
|
||||
currNodeLocation.endOffset = newLocationInfo.endOffset
|
||||
currNodeLocation.endColumn = newLocationInfo.endColumn
|
||||
currNodeLocation.endLine = newLocationInfo.endLine
|
||||
}
|
||||
}
|
||||
|
||||
export function addTerminalToCst(
|
||||
node: CstNode,
|
||||
token: IToken,
|
||||
tokenTypeName: string
|
||||
): void {
|
||||
if (node.children[tokenTypeName] === undefined) {
|
||||
node.children[tokenTypeName] = [token]
|
||||
} else {
|
||||
node.children[tokenTypeName].push(token)
|
||||
}
|
||||
}
|
||||
|
||||
export function addNoneTerminalToCst(
|
||||
node: CstNode,
|
||||
ruleName: string,
|
||||
ruleResult: any
|
||||
): void {
|
||||
if (node.children[ruleName] === undefined) {
|
||||
node.children[ruleName] = [ruleResult]
|
||||
} else {
|
||||
node.children[ruleName].push(ruleResult)
|
||||
}
|
||||
}
|
||||
151
_node_modules/chevrotain/src/parse/cst/cst_visitor.ts
generated
Normal file
151
_node_modules/chevrotain/src/parse/cst/cst_visitor.ts
generated
Normal file
@@ -0,0 +1,151 @@
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import compact from "lodash/compact"
|
||||
import isArray from "lodash/isArray"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import filter from "lodash/filter"
|
||||
import keys from "lodash/keys"
|
||||
import isFunction from "lodash/isFunction"
|
||||
import isUndefined from "lodash/isUndefined"
|
||||
import { defineNameProp } from "../../lang/lang_extensions"
|
||||
import { CstNode, ICstVisitor } from "@chevrotain/types"
|
||||
|
||||
export function defaultVisit<IN>(ctx: any, param: IN): void {
|
||||
const childrenNames = keys(ctx)
|
||||
const childrenNamesLength = childrenNames.length
|
||||
for (let i = 0; i < childrenNamesLength; i++) {
|
||||
const currChildName = childrenNames[i]
|
||||
const currChildArray = ctx[currChildName]
|
||||
const currChildArrayLength = currChildArray.length
|
||||
for (let j = 0; j < currChildArrayLength; j++) {
|
||||
const currChild: any = currChildArray[j]
|
||||
// distinction between Tokens Children and CstNode children
|
||||
if (currChild.tokenTypeIdx === undefined) {
|
||||
this[currChild.name](currChild.children, param)
|
||||
}
|
||||
}
|
||||
}
|
||||
// defaultVisit does not support generic out param
|
||||
}
|
||||
|
||||
export function createBaseSemanticVisitorConstructor(
|
||||
grammarName: string,
|
||||
ruleNames: string[]
|
||||
): {
|
||||
new (...args: any[]): ICstVisitor<any, any>
|
||||
} {
|
||||
const derivedConstructor: any = function () {}
|
||||
|
||||
// can be overwritten according to:
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/
|
||||
// name?redirectlocale=en-US&redirectslug=JavaScript%2FReference%2FGlobal_Objects%2FFunction%2Fname
|
||||
defineNameProp(derivedConstructor, grammarName + "BaseSemantics")
|
||||
|
||||
const semanticProto = {
|
||||
visit: function (cstNode: CstNode | CstNode[], param: any) {
|
||||
// enables writing more concise visitor methods when CstNode has only a single child
|
||||
if (isArray(cstNode)) {
|
||||
// A CST Node's children dictionary can never have empty arrays as values
|
||||
// If a key is defined there will be at least one element in the corresponding value array.
|
||||
cstNode = cstNode[0]
|
||||
}
|
||||
|
||||
// enables passing optional CstNodes concisely.
|
||||
if (isUndefined(cstNode)) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
return this[cstNode.name](cstNode.children, param)
|
||||
},
|
||||
|
||||
validateVisitor: function () {
|
||||
const semanticDefinitionErrors = validateVisitor(this, ruleNames)
|
||||
if (!isEmpty(semanticDefinitionErrors)) {
|
||||
const errorMessages = map(
|
||||
semanticDefinitionErrors,
|
||||
(currDefError) => currDefError.msg
|
||||
)
|
||||
throw Error(
|
||||
`Errors Detected in CST Visitor <${this.constructor.name}>:\n\t` +
|
||||
`${errorMessages.join("\n\n").replace(/\n/g, "\n\t")}`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
derivedConstructor.prototype = semanticProto
|
||||
derivedConstructor.prototype.constructor = derivedConstructor
|
||||
|
||||
derivedConstructor._RULE_NAMES = ruleNames
|
||||
|
||||
return derivedConstructor
|
||||
}
|
||||
|
||||
export function createBaseVisitorConstructorWithDefaults(
|
||||
grammarName: string,
|
||||
ruleNames: string[],
|
||||
baseConstructor: Function
|
||||
): {
|
||||
new (...args: any[]): ICstVisitor<any, any>
|
||||
} {
|
||||
const derivedConstructor: any = function () {}
|
||||
|
||||
// can be overwritten according to:
|
||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/
|
||||
// name?redirectlocale=en-US&redirectslug=JavaScript%2FReference%2FGlobal_Objects%2FFunction%2Fname
|
||||
defineNameProp(derivedConstructor, grammarName + "BaseSemanticsWithDefaults")
|
||||
|
||||
const withDefaultsProto = Object.create(baseConstructor.prototype)
|
||||
forEach(ruleNames, (ruleName) => {
|
||||
withDefaultsProto[ruleName] = defaultVisit
|
||||
})
|
||||
|
||||
derivedConstructor.prototype = withDefaultsProto
|
||||
derivedConstructor.prototype.constructor = derivedConstructor
|
||||
|
||||
return derivedConstructor
|
||||
}
|
||||
|
||||
export enum CstVisitorDefinitionError {
|
||||
REDUNDANT_METHOD,
|
||||
MISSING_METHOD
|
||||
}
|
||||
|
||||
export interface IVisitorDefinitionError {
|
||||
msg: string
|
||||
type: CstVisitorDefinitionError
|
||||
methodName: string
|
||||
}
|
||||
|
||||
export function validateVisitor(
|
||||
visitorInstance: ICstVisitor<unknown, unknown>,
|
||||
ruleNames: string[]
|
||||
): IVisitorDefinitionError[] {
|
||||
const missingErrors = validateMissingCstMethods(visitorInstance, ruleNames)
|
||||
|
||||
return missingErrors
|
||||
}
|
||||
|
||||
export function validateMissingCstMethods(
|
||||
visitorInstance: ICstVisitor<unknown, unknown>,
|
||||
ruleNames: string[]
|
||||
): IVisitorDefinitionError[] {
|
||||
const missingRuleNames = filter(ruleNames, (currRuleName) => {
|
||||
return isFunction((visitorInstance as any)[currRuleName]) === false
|
||||
})
|
||||
|
||||
const errors: IVisitorDefinitionError[] = map(
|
||||
missingRuleNames,
|
||||
(currRuleName) => {
|
||||
return {
|
||||
msg: `Missing visitor method: <${currRuleName}> on ${<any>(
|
||||
visitorInstance.constructor.name
|
||||
)} CST Visitor.`,
|
||||
type: CstVisitorDefinitionError.MISSING_METHOD,
|
||||
methodName: currRuleName
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return compact<IVisitorDefinitionError>(errors)
|
||||
}
|
||||
322
_node_modules/chevrotain/src/parse/errors_public.ts
generated
Normal file
322
_node_modules/chevrotain/src/parse/errors_public.ts
generated
Normal file
@@ -0,0 +1,322 @@
|
||||
import { hasTokenLabel, tokenLabel } from "../scan/tokens_public"
|
||||
import first from "lodash/first"
|
||||
import map from "lodash/map"
|
||||
import reduce from "lodash/reduce"
|
||||
import { Alternation, NonTerminal, Rule, Terminal } from "@chevrotain/gast"
|
||||
import { getProductionDslName } from "@chevrotain/gast"
|
||||
import {
|
||||
IParserErrorMessageProvider,
|
||||
IProductionWithOccurrence,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import {
|
||||
IGrammarResolverErrorMessageProvider,
|
||||
IGrammarValidatorErrorMessageProvider
|
||||
} from "./grammar/types"
|
||||
|
||||
export const defaultParserErrorProvider: IParserErrorMessageProvider = {
|
||||
buildMismatchTokenMessage({ expected, actual, previous, ruleName }): string {
|
||||
const hasLabel = hasTokenLabel(expected)
|
||||
const expectedMsg = hasLabel
|
||||
? `--> ${tokenLabel(expected)} <--`
|
||||
: `token of type --> ${expected.name} <--`
|
||||
|
||||
const msg = `Expecting ${expectedMsg} but found --> '${actual.image}' <--`
|
||||
|
||||
return msg
|
||||
},
|
||||
|
||||
buildNotAllInputParsedMessage({ firstRedundant, ruleName }): string {
|
||||
return "Redundant input, expecting EOF but found: " + firstRedundant.image
|
||||
},
|
||||
|
||||
buildNoViableAltMessage({
|
||||
expectedPathsPerAlt,
|
||||
actual,
|
||||
previous,
|
||||
customUserDescription,
|
||||
ruleName
|
||||
}): string {
|
||||
const errPrefix = "Expecting: "
|
||||
// TODO: issue: No Viable Alternative Error may have incomplete details. #502
|
||||
const actualText = first(actual)!.image
|
||||
const errSuffix = "\nbut found: '" + actualText + "'"
|
||||
|
||||
if (customUserDescription) {
|
||||
return errPrefix + customUserDescription + errSuffix
|
||||
} else {
|
||||
const allLookAheadPaths = reduce(
|
||||
expectedPathsPerAlt,
|
||||
(result, currAltPaths) => result.concat(currAltPaths),
|
||||
[] as TokenType[][]
|
||||
)
|
||||
const nextValidTokenSequences = map(
|
||||
allLookAheadPaths,
|
||||
(currPath) =>
|
||||
`[${map(currPath, (currTokenType) => tokenLabel(currTokenType)).join(
|
||||
", "
|
||||
)}]`
|
||||
)
|
||||
const nextValidSequenceItems = map(
|
||||
nextValidTokenSequences,
|
||||
(itemMsg, idx) => ` ${idx + 1}. ${itemMsg}`
|
||||
)
|
||||
const calculatedDescription = `one of these possible Token sequences:\n${nextValidSequenceItems.join(
|
||||
"\n"
|
||||
)}`
|
||||
|
||||
return errPrefix + calculatedDescription + errSuffix
|
||||
}
|
||||
},
|
||||
|
||||
buildEarlyExitMessage({
|
||||
expectedIterationPaths,
|
||||
actual,
|
||||
customUserDescription,
|
||||
ruleName
|
||||
}): string {
|
||||
const errPrefix = "Expecting: "
|
||||
// TODO: issue: No Viable Alternative Error may have incomplete details. #502
|
||||
const actualText = first(actual)!.image
|
||||
const errSuffix = "\nbut found: '" + actualText + "'"
|
||||
|
||||
if (customUserDescription) {
|
||||
return errPrefix + customUserDescription + errSuffix
|
||||
} else {
|
||||
const nextValidTokenSequences = map(
|
||||
expectedIterationPaths,
|
||||
(currPath) =>
|
||||
`[${map(currPath, (currTokenType) => tokenLabel(currTokenType)).join(
|
||||
","
|
||||
)}]`
|
||||
)
|
||||
const calculatedDescription =
|
||||
`expecting at least one iteration which starts with one of these possible Token sequences::\n ` +
|
||||
`<${nextValidTokenSequences.join(" ,")}>`
|
||||
|
||||
return errPrefix + calculatedDescription + errSuffix
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Object.freeze(defaultParserErrorProvider)
|
||||
|
||||
export const defaultGrammarResolverErrorProvider: IGrammarResolverErrorMessageProvider =
|
||||
{
|
||||
buildRuleNotFoundError(
|
||||
topLevelRule: Rule,
|
||||
undefinedRule: NonTerminal
|
||||
): string {
|
||||
const msg =
|
||||
"Invalid grammar, reference to a rule which is not defined: ->" +
|
||||
undefinedRule.nonTerminalName +
|
||||
"<-\n" +
|
||||
"inside top level rule: ->" +
|
||||
topLevelRule.name +
|
||||
"<-"
|
||||
return msg
|
||||
}
|
||||
}
|
||||
|
||||
export const defaultGrammarValidatorErrorProvider: IGrammarValidatorErrorMessageProvider =
|
||||
{
|
||||
buildDuplicateFoundError(
|
||||
topLevelRule: Rule,
|
||||
duplicateProds: IProductionWithOccurrence[]
|
||||
): string {
|
||||
function getExtraProductionArgument(
|
||||
prod: IProductionWithOccurrence
|
||||
): string {
|
||||
if (prod instanceof Terminal) {
|
||||
return prod.terminalType.name
|
||||
} else if (prod instanceof NonTerminal) {
|
||||
return prod.nonTerminalName
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
const topLevelName = topLevelRule.name
|
||||
const duplicateProd = first(duplicateProds)!
|
||||
const index = duplicateProd.idx
|
||||
const dslName = getProductionDslName(duplicateProd)
|
||||
const extraArgument = getExtraProductionArgument(duplicateProd)
|
||||
|
||||
const hasExplicitIndex = index > 0
|
||||
let msg = `->${dslName}${hasExplicitIndex ? index : ""}<- ${
|
||||
extraArgument ? `with argument: ->${extraArgument}<-` : ""
|
||||
}
|
||||
appears more than once (${
|
||||
duplicateProds.length
|
||||
} times) in the top level rule: ->${topLevelName}<-.
|
||||
For further details see: https://chevrotain.io/docs/FAQ.html#NUMERICAL_SUFFIXES
|
||||
`
|
||||
|
||||
// white space trimming time! better to trim afterwards as it allows to use WELL formatted multi line template strings...
|
||||
msg = msg.replace(/[ \t]+/g, " ")
|
||||
msg = msg.replace(/\s\s+/g, "\n")
|
||||
|
||||
return msg
|
||||
},
|
||||
|
||||
buildNamespaceConflictError(rule: Rule): string {
|
||||
const errMsg =
|
||||
`Namespace conflict found in grammar.\n` +
|
||||
`The grammar has both a Terminal(Token) and a Non-Terminal(Rule) named: <${rule.name}>.\n` +
|
||||
`To resolve this make sure each Terminal and Non-Terminal names are unique\n` +
|
||||
`This is easy to accomplish by using the convention that Terminal names start with an uppercase letter\n` +
|
||||
`and Non-Terminal names start with a lower case letter.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
buildAlternationPrefixAmbiguityError(options: {
|
||||
topLevelRule: Rule
|
||||
prefixPath: TokenType[]
|
||||
ambiguityIndices: number[]
|
||||
alternation: Alternation
|
||||
}): string {
|
||||
const pathMsg = map(options.prefixPath, (currTok) =>
|
||||
tokenLabel(currTok)
|
||||
).join(", ")
|
||||
const occurrence =
|
||||
options.alternation.idx === 0 ? "" : options.alternation.idx
|
||||
const errMsg =
|
||||
`Ambiguous alternatives: <${options.ambiguityIndices.join(
|
||||
" ,"
|
||||
)}> due to common lookahead prefix\n` +
|
||||
`in <OR${occurrence}> inside <${options.topLevelRule.name}> Rule,\n` +
|
||||
`<${pathMsg}> may appears as a prefix path in all these alternatives.\n` +
|
||||
`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#COMMON_PREFIX\n` +
|
||||
`For Further details.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
buildAlternationAmbiguityError(options: {
|
||||
topLevelRule: Rule
|
||||
prefixPath: TokenType[]
|
||||
ambiguityIndices: number[]
|
||||
alternation: Alternation
|
||||
}): string {
|
||||
const pathMsg = map(options.prefixPath, (currtok) =>
|
||||
tokenLabel(currtok)
|
||||
).join(", ")
|
||||
const occurrence =
|
||||
options.alternation.idx === 0 ? "" : options.alternation.idx
|
||||
let currMessage =
|
||||
`Ambiguous Alternatives Detected: <${options.ambiguityIndices.join(
|
||||
" ,"
|
||||
)}> in <OR${occurrence}>` +
|
||||
` inside <${options.topLevelRule.name}> Rule,\n` +
|
||||
`<${pathMsg}> may appears as a prefix path in all these alternatives.\n`
|
||||
|
||||
currMessage =
|
||||
currMessage +
|
||||
`See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#AMBIGUOUS_ALTERNATIVES\n` +
|
||||
`For Further details.`
|
||||
return currMessage
|
||||
},
|
||||
|
||||
buildEmptyRepetitionError(options: {
|
||||
topLevelRule: Rule
|
||||
repetition: IProductionWithOccurrence
|
||||
}): string {
|
||||
let dslName = getProductionDslName(options.repetition)
|
||||
if (options.repetition.idx !== 0) {
|
||||
dslName += options.repetition.idx
|
||||
}
|
||||
|
||||
const errMsg =
|
||||
`The repetition <${dslName}> within Rule <${options.topLevelRule.name}> can never consume any tokens.\n` +
|
||||
`This could lead to an infinite loop.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
// TODO: remove - `errors_public` from nyc.config.js exclude
|
||||
// once this method is fully removed from this file
|
||||
buildTokenNameError(options: {
|
||||
tokenType: TokenType
|
||||
expectedPattern: RegExp
|
||||
}): string {
|
||||
/* istanbul ignore next */
|
||||
return "deprecated"
|
||||
},
|
||||
|
||||
buildEmptyAlternationError(options: {
|
||||
topLevelRule: Rule
|
||||
alternation: Alternation
|
||||
emptyChoiceIdx: number
|
||||
}): string {
|
||||
const errMsg =
|
||||
`Ambiguous empty alternative: <${options.emptyChoiceIdx + 1}>` +
|
||||
` in <OR${options.alternation.idx}> inside <${options.topLevelRule.name}> Rule.\n` +
|
||||
`Only the last alternative may be an empty alternative.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
buildTooManyAlternativesError(options: {
|
||||
topLevelRule: Rule
|
||||
alternation: Alternation
|
||||
}): string {
|
||||
const errMsg =
|
||||
`An Alternation cannot have more than 256 alternatives:\n` +
|
||||
`<OR${options.alternation.idx}> inside <${
|
||||
options.topLevelRule.name
|
||||
}> Rule.\n has ${
|
||||
options.alternation.definition.length + 1
|
||||
} alternatives.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
buildLeftRecursionError(options: {
|
||||
topLevelRule: Rule
|
||||
leftRecursionPath: Rule[]
|
||||
}): string {
|
||||
const ruleName = options.topLevelRule.name
|
||||
const pathNames = map(
|
||||
options.leftRecursionPath,
|
||||
(currRule) => currRule.name
|
||||
)
|
||||
const leftRecursivePath = `${ruleName} --> ${pathNames
|
||||
.concat([ruleName])
|
||||
.join(" --> ")}`
|
||||
const errMsg =
|
||||
`Left Recursion found in grammar.\n` +
|
||||
`rule: <${ruleName}> can be invoked from itself (directly or indirectly)\n` +
|
||||
`without consuming any Tokens. The grammar path that causes this is: \n ${leftRecursivePath}\n` +
|
||||
` To fix this refactor your grammar to remove the left recursion.\n` +
|
||||
`see: https://en.wikipedia.org/wiki/LL_parser#Left_factoring.`
|
||||
|
||||
return errMsg
|
||||
},
|
||||
|
||||
// TODO: remove - `errors_public` from nyc.config.js exclude
|
||||
// once this method is fully removed from this file
|
||||
buildInvalidRuleNameError(options: {
|
||||
topLevelRule: Rule
|
||||
expectedPattern: RegExp
|
||||
}): string {
|
||||
/* istanbul ignore next */
|
||||
return "deprecated"
|
||||
},
|
||||
|
||||
buildDuplicateRuleNameError(options: {
|
||||
topLevelRule: Rule | string
|
||||
grammarName: string
|
||||
}): string {
|
||||
let ruleName
|
||||
if (options.topLevelRule instanceof Rule) {
|
||||
ruleName = options.topLevelRule.name
|
||||
} else {
|
||||
ruleName = options.topLevelRule
|
||||
}
|
||||
|
||||
const errMsg = `Duplicate definition, rule: ->${ruleName}<- is already defined in the grammar: ->${options.grammarName}<-`
|
||||
|
||||
return errMsg
|
||||
}
|
||||
}
|
||||
74
_node_modules/chevrotain/src/parse/exceptions_public.ts
generated
Normal file
74
_node_modules/chevrotain/src/parse/exceptions_public.ts
generated
Normal file
@@ -0,0 +1,74 @@
|
||||
import includes from "lodash/includes"
|
||||
import {
|
||||
IToken,
|
||||
IRecognitionException,
|
||||
IRecognizerContext
|
||||
} from "@chevrotain/types"
|
||||
|
||||
const MISMATCHED_TOKEN_EXCEPTION = "MismatchedTokenException"
|
||||
const NO_VIABLE_ALT_EXCEPTION = "NoViableAltException"
|
||||
const EARLY_EXIT_EXCEPTION = "EarlyExitException"
|
||||
const NOT_ALL_INPUT_PARSED_EXCEPTION = "NotAllInputParsedException"
|
||||
|
||||
const RECOGNITION_EXCEPTION_NAMES = [
|
||||
MISMATCHED_TOKEN_EXCEPTION,
|
||||
NO_VIABLE_ALT_EXCEPTION,
|
||||
EARLY_EXIT_EXCEPTION,
|
||||
NOT_ALL_INPUT_PARSED_EXCEPTION
|
||||
]
|
||||
|
||||
Object.freeze(RECOGNITION_EXCEPTION_NAMES)
|
||||
|
||||
// hacks to bypass no support for custom Errors in javascript/typescript
|
||||
export function isRecognitionException(error: Error) {
|
||||
// can't do instanceof on hacked custom js exceptions
|
||||
return includes(RECOGNITION_EXCEPTION_NAMES, error.name)
|
||||
}
|
||||
|
||||
abstract class RecognitionException
|
||||
extends Error
|
||||
implements IRecognitionException
|
||||
{
|
||||
context: IRecognizerContext
|
||||
resyncedTokens: IToken[] = []
|
||||
|
||||
protected constructor(message: string, public token: IToken) {
|
||||
super(message)
|
||||
|
||||
// fix prototype chain when typescript target is ES5
|
||||
Object.setPrototypeOf(this, new.target.prototype)
|
||||
|
||||
/* istanbul ignore next - V8 workaround to remove constructor from stacktrace when typescript target is ES5 */
|
||||
if (Error.captureStackTrace) {
|
||||
Error.captureStackTrace(this, this.constructor)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class MismatchedTokenException extends RecognitionException {
|
||||
constructor(message: string, token: IToken, public previousToken: IToken) {
|
||||
super(message, token)
|
||||
this.name = MISMATCHED_TOKEN_EXCEPTION
|
||||
}
|
||||
}
|
||||
|
||||
export class NoViableAltException extends RecognitionException {
|
||||
constructor(message: string, token: IToken, public previousToken: IToken) {
|
||||
super(message, token)
|
||||
this.name = NO_VIABLE_ALT_EXCEPTION
|
||||
}
|
||||
}
|
||||
|
||||
export class NotAllInputParsedException extends RecognitionException {
|
||||
constructor(message: string, token: IToken) {
|
||||
super(message, token)
|
||||
this.name = NOT_ALL_INPUT_PARSED_EXCEPTION
|
||||
}
|
||||
}
|
||||
|
||||
export class EarlyExitException extends RecognitionException {
|
||||
constructor(message: string, token: IToken, public previousToken: IToken) {
|
||||
super(message, token)
|
||||
this.name = EARLY_EXIT_EXCEPTION
|
||||
}
|
||||
}
|
||||
708
_node_modules/chevrotain/src/parse/grammar/checks.ts
generated
Normal file
708
_node_modules/chevrotain/src/parse/grammar/checks.ts
generated
Normal file
@@ -0,0 +1,708 @@
|
||||
import first from "lodash/first"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import drop from "lodash/drop"
|
||||
import flatten from "lodash/flatten"
|
||||
import filter from "lodash/filter"
|
||||
import reject from "lodash/reject"
|
||||
import difference from "lodash/difference"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import groupBy from "lodash/groupBy"
|
||||
import reduce from "lodash/reduce"
|
||||
import pickBy from "lodash/pickBy"
|
||||
import values from "lodash/values"
|
||||
import includes from "lodash/includes"
|
||||
import flatMap from "lodash/flatMap"
|
||||
import clone from "lodash/clone"
|
||||
import {
|
||||
IParserAmbiguousAlternativesDefinitionError,
|
||||
IParserDuplicatesDefinitionError,
|
||||
IParserEmptyAlternativeDefinitionError,
|
||||
ParserDefinitionErrorType
|
||||
} from "../parser/parser"
|
||||
import { getProductionDslName, isOptionalProd } from "@chevrotain/gast"
|
||||
import {
|
||||
Alternative,
|
||||
containsPath,
|
||||
getLookaheadPathsForOptionalProd,
|
||||
getLookaheadPathsForOr,
|
||||
getProdType,
|
||||
isStrictPrefixOfPath
|
||||
} from "./lookahead"
|
||||
import { nextPossibleTokensAfter } from "./interpreter"
|
||||
import {
|
||||
Alternation,
|
||||
Alternative as AlternativeGAST,
|
||||
NonTerminal,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Terminal
|
||||
} from "@chevrotain/gast"
|
||||
import { GAstVisitor } from "@chevrotain/gast"
|
||||
import {
|
||||
ILookaheadStrategy,
|
||||
IProduction,
|
||||
IProductionWithOccurrence,
|
||||
TokenType,
|
||||
Rule
|
||||
} from "@chevrotain/types"
|
||||
import {
|
||||
IGrammarValidatorErrorMessageProvider,
|
||||
IParserDefinitionError
|
||||
} from "./types"
|
||||
import dropRight from "lodash/dropRight"
|
||||
import compact from "lodash/compact"
|
||||
import { tokenStructuredMatcher } from "../../scan/tokens"
|
||||
|
||||
export function validateLookahead(options: {
|
||||
lookaheadStrategy: ILookaheadStrategy
|
||||
rules: Rule[]
|
||||
tokenTypes: TokenType[]
|
||||
grammarName: string
|
||||
}): IParserDefinitionError[] {
|
||||
const lookaheadValidationErrorMessages = options.lookaheadStrategy.validate({
|
||||
rules: options.rules,
|
||||
tokenTypes: options.tokenTypes,
|
||||
grammarName: options.grammarName
|
||||
})
|
||||
return map(lookaheadValidationErrorMessages, (errorMessage) => ({
|
||||
type: ParserDefinitionErrorType.CUSTOM_LOOKAHEAD_VALIDATION,
|
||||
...errorMessage
|
||||
}))
|
||||
}
|
||||
|
||||
export function validateGrammar(
|
||||
topLevels: Rule[],
|
||||
tokenTypes: TokenType[],
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider,
|
||||
grammarName: string
|
||||
): IParserDefinitionError[] {
|
||||
const duplicateErrors: IParserDefinitionError[] = flatMap(
|
||||
topLevels,
|
||||
(currTopLevel) => validateDuplicateProductions(currTopLevel, errMsgProvider)
|
||||
)
|
||||
|
||||
const termsNamespaceConflictErrors = checkTerminalAndNoneTerminalsNameSpace(
|
||||
topLevels,
|
||||
tokenTypes,
|
||||
errMsgProvider
|
||||
)
|
||||
|
||||
const tooManyAltsErrors = flatMap(topLevels, (curRule) =>
|
||||
validateTooManyAlts(curRule, errMsgProvider)
|
||||
)
|
||||
|
||||
const duplicateRulesError = flatMap(topLevels, (curRule) =>
|
||||
validateRuleDoesNotAlreadyExist(
|
||||
curRule,
|
||||
topLevels,
|
||||
grammarName,
|
||||
errMsgProvider
|
||||
)
|
||||
)
|
||||
|
||||
return duplicateErrors.concat(
|
||||
termsNamespaceConflictErrors,
|
||||
tooManyAltsErrors,
|
||||
duplicateRulesError
|
||||
)
|
||||
}
|
||||
|
||||
function validateDuplicateProductions(
|
||||
topLevelRule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserDuplicatesDefinitionError[] {
|
||||
const collectorVisitor = new OccurrenceValidationCollector()
|
||||
topLevelRule.accept(collectorVisitor)
|
||||
const allRuleProductions = collectorVisitor.allProductions
|
||||
|
||||
const productionGroups = groupBy(
|
||||
allRuleProductions,
|
||||
identifyProductionForDuplicates
|
||||
)
|
||||
|
||||
const duplicates: any = pickBy(productionGroups, (currGroup) => {
|
||||
return currGroup.length > 1
|
||||
})
|
||||
|
||||
const errors = map(values(duplicates), (currDuplicates: any) => {
|
||||
const firstProd: any = first(currDuplicates)
|
||||
const msg = errMsgProvider.buildDuplicateFoundError(
|
||||
topLevelRule,
|
||||
currDuplicates
|
||||
)
|
||||
const dslName = getProductionDslName(firstProd)
|
||||
const defError: IParserDuplicatesDefinitionError = {
|
||||
message: msg,
|
||||
type: ParserDefinitionErrorType.DUPLICATE_PRODUCTIONS,
|
||||
ruleName: topLevelRule.name,
|
||||
dslName: dslName,
|
||||
occurrence: firstProd.idx
|
||||
}
|
||||
|
||||
const param = getExtraProductionArgument(firstProd)
|
||||
if (param) {
|
||||
defError.parameter = param
|
||||
}
|
||||
|
||||
return defError
|
||||
})
|
||||
return errors
|
||||
}
|
||||
|
||||
export function identifyProductionForDuplicates(
|
||||
prod: IProductionWithOccurrence
|
||||
): string {
|
||||
return `${getProductionDslName(prod)}_#_${
|
||||
prod.idx
|
||||
}_#_${getExtraProductionArgument(prod)}`
|
||||
}
|
||||
|
||||
function getExtraProductionArgument(prod: IProductionWithOccurrence): string {
|
||||
if (prod instanceof Terminal) {
|
||||
return prod.terminalType.name
|
||||
} else if (prod instanceof NonTerminal) {
|
||||
return prod.nonTerminalName
|
||||
} else {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
export class OccurrenceValidationCollector extends GAstVisitor {
|
||||
public allProductions: IProductionWithOccurrence[] = []
|
||||
|
||||
public visitNonTerminal(subrule: NonTerminal): void {
|
||||
this.allProductions.push(subrule)
|
||||
}
|
||||
|
||||
public visitOption(option: Option): void {
|
||||
this.allProductions.push(option)
|
||||
}
|
||||
|
||||
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
|
||||
this.allProductions.push(manySep)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
|
||||
this.allProductions.push(atLeastOne)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatoryWithSeparator(
|
||||
atLeastOneSep: RepetitionMandatoryWithSeparator
|
||||
): void {
|
||||
this.allProductions.push(atLeastOneSep)
|
||||
}
|
||||
|
||||
public visitRepetition(many: Repetition): void {
|
||||
this.allProductions.push(many)
|
||||
}
|
||||
|
||||
public visitAlternation(or: Alternation): void {
|
||||
this.allProductions.push(or)
|
||||
}
|
||||
|
||||
public visitTerminal(terminal: Terminal): void {
|
||||
this.allProductions.push(terminal)
|
||||
}
|
||||
}
|
||||
|
||||
export function validateRuleDoesNotAlreadyExist(
|
||||
rule: Rule,
|
||||
allRules: Rule[],
|
||||
className: string,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserDefinitionError[] {
|
||||
const errors = []
|
||||
const occurrences = reduce(
|
||||
allRules,
|
||||
(result, curRule) => {
|
||||
if (curRule.name === rule.name) {
|
||||
return result + 1
|
||||
}
|
||||
return result
|
||||
},
|
||||
0
|
||||
)
|
||||
if (occurrences > 1) {
|
||||
const errMsg = errMsgProvider.buildDuplicateRuleNameError({
|
||||
topLevelRule: rule,
|
||||
grammarName: className
|
||||
})
|
||||
errors.push({
|
||||
message: errMsg,
|
||||
type: ParserDefinitionErrorType.DUPLICATE_RULE_NAME,
|
||||
ruleName: rule.name
|
||||
})
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
// TODO: is there anyway to get only the rule names of rules inherited from the super grammars?
|
||||
// This is not part of the IGrammarErrorProvider because the validation cannot be performed on
|
||||
// The grammar structure, only at runtime.
|
||||
export function validateRuleIsOverridden(
|
||||
ruleName: string,
|
||||
definedRulesNames: string[],
|
||||
className: string
|
||||
): IParserDefinitionError[] {
|
||||
const errors = []
|
||||
let errMsg
|
||||
|
||||
if (!includes(definedRulesNames, ruleName)) {
|
||||
errMsg =
|
||||
`Invalid rule override, rule: ->${ruleName}<- cannot be overridden in the grammar: ->${className}<-` +
|
||||
`as it is not defined in any of the super grammars `
|
||||
errors.push({
|
||||
message: errMsg,
|
||||
type: ParserDefinitionErrorType.INVALID_RULE_OVERRIDE,
|
||||
ruleName: ruleName
|
||||
})
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
export function validateNoLeftRecursion(
|
||||
topRule: Rule,
|
||||
currRule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider,
|
||||
path: Rule[] = []
|
||||
): IParserDefinitionError[] {
|
||||
const errors: IParserDefinitionError[] = []
|
||||
const nextNonTerminals = getFirstNoneTerminal(currRule.definition)
|
||||
if (isEmpty(nextNonTerminals)) {
|
||||
return []
|
||||
} else {
|
||||
const ruleName = topRule.name
|
||||
const foundLeftRecursion = includes(nextNonTerminals, topRule)
|
||||
if (foundLeftRecursion) {
|
||||
errors.push({
|
||||
message: errMsgProvider.buildLeftRecursionError({
|
||||
topLevelRule: topRule,
|
||||
leftRecursionPath: path
|
||||
}),
|
||||
type: ParserDefinitionErrorType.LEFT_RECURSION,
|
||||
ruleName: ruleName
|
||||
})
|
||||
}
|
||||
|
||||
// we are only looking for cyclic paths leading back to the specific topRule
|
||||
// other cyclic paths are ignored, we still need this difference to avoid infinite loops...
|
||||
const validNextSteps = difference(nextNonTerminals, path.concat([topRule]))
|
||||
const errorsFromNextSteps = flatMap(validNextSteps, (currRefRule) => {
|
||||
const newPath = clone(path)
|
||||
newPath.push(currRefRule)
|
||||
return validateNoLeftRecursion(
|
||||
topRule,
|
||||
currRefRule,
|
||||
errMsgProvider,
|
||||
newPath
|
||||
)
|
||||
})
|
||||
|
||||
return errors.concat(errorsFromNextSteps)
|
||||
}
|
||||
}
|
||||
|
||||
export function getFirstNoneTerminal(definition: IProduction[]): Rule[] {
|
||||
let result: Rule[] = []
|
||||
if (isEmpty(definition)) {
|
||||
return result
|
||||
}
|
||||
const firstProd = first(definition)
|
||||
|
||||
/* istanbul ignore else */
|
||||
if (firstProd instanceof NonTerminal) {
|
||||
result.push(firstProd.referencedRule)
|
||||
} else if (
|
||||
firstProd instanceof AlternativeGAST ||
|
||||
firstProd instanceof Option ||
|
||||
firstProd instanceof RepetitionMandatory ||
|
||||
firstProd instanceof RepetitionMandatoryWithSeparator ||
|
||||
firstProd instanceof RepetitionWithSeparator ||
|
||||
firstProd instanceof Repetition
|
||||
) {
|
||||
result = result.concat(
|
||||
getFirstNoneTerminal(<IProduction[]>firstProd.definition)
|
||||
)
|
||||
} else if (firstProd instanceof Alternation) {
|
||||
// each sub definition in alternation is a FLAT
|
||||
result = flatten(
|
||||
map(firstProd.definition, (currSubDef) =>
|
||||
getFirstNoneTerminal((<AlternativeGAST>currSubDef).definition)
|
||||
)
|
||||
)
|
||||
} else if (firstProd instanceof Terminal) {
|
||||
// nothing to see, move along
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
|
||||
const isFirstOptional = isOptionalProd(firstProd)
|
||||
const hasMore = definition.length > 1
|
||||
if (isFirstOptional && hasMore) {
|
||||
const rest = drop(definition)
|
||||
return result.concat(getFirstNoneTerminal(rest))
|
||||
} else {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
class OrCollector extends GAstVisitor {
|
||||
public alternations: Alternation[] = []
|
||||
|
||||
public visitAlternation(node: Alternation): void {
|
||||
this.alternations.push(node)
|
||||
}
|
||||
}
|
||||
|
||||
export function validateEmptyOrAlternative(
|
||||
topLevelRule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserEmptyAlternativeDefinitionError[] {
|
||||
const orCollector = new OrCollector()
|
||||
topLevelRule.accept(orCollector)
|
||||
const ors = orCollector.alternations
|
||||
|
||||
const errors = flatMap<Alternation, IParserEmptyAlternativeDefinitionError>(
|
||||
ors,
|
||||
(currOr) => {
|
||||
const exceptLast = dropRight(currOr.definition)
|
||||
return flatMap(exceptLast, (currAlternative, currAltIdx) => {
|
||||
const possibleFirstInAlt = nextPossibleTokensAfter(
|
||||
[currAlternative],
|
||||
[],
|
||||
tokenStructuredMatcher,
|
||||
1
|
||||
)
|
||||
if (isEmpty(possibleFirstInAlt)) {
|
||||
return [
|
||||
{
|
||||
message: errMsgProvider.buildEmptyAlternationError({
|
||||
topLevelRule: topLevelRule,
|
||||
alternation: currOr,
|
||||
emptyChoiceIdx: currAltIdx
|
||||
}),
|
||||
type: ParserDefinitionErrorType.NONE_LAST_EMPTY_ALT,
|
||||
ruleName: topLevelRule.name,
|
||||
occurrence: currOr.idx,
|
||||
alternative: currAltIdx + 1
|
||||
}
|
||||
]
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
export function validateAmbiguousAlternationAlternatives(
|
||||
topLevelRule: Rule,
|
||||
globalMaxLookahead: number,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserAmbiguousAlternativesDefinitionError[] {
|
||||
const orCollector = new OrCollector()
|
||||
topLevelRule.accept(orCollector)
|
||||
let ors = orCollector.alternations
|
||||
|
||||
// New Handling of ignoring ambiguities
|
||||
// - https://github.com/chevrotain/chevrotain/issues/869
|
||||
ors = reject(ors, (currOr) => currOr.ignoreAmbiguities === true)
|
||||
|
||||
const errors = flatMap(ors, (currOr: Alternation) => {
|
||||
const currOccurrence = currOr.idx
|
||||
const actualMaxLookahead = currOr.maxLookahead || globalMaxLookahead
|
||||
const alternatives = getLookaheadPathsForOr(
|
||||
currOccurrence,
|
||||
topLevelRule,
|
||||
actualMaxLookahead,
|
||||
currOr
|
||||
)
|
||||
const altsAmbiguityErrors = checkAlternativesAmbiguities(
|
||||
alternatives,
|
||||
currOr,
|
||||
topLevelRule,
|
||||
errMsgProvider
|
||||
)
|
||||
const altsPrefixAmbiguityErrors = checkPrefixAlternativesAmbiguities(
|
||||
alternatives,
|
||||
currOr,
|
||||
topLevelRule,
|
||||
errMsgProvider
|
||||
)
|
||||
|
||||
return altsAmbiguityErrors.concat(altsPrefixAmbiguityErrors)
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
export class RepetitionCollector extends GAstVisitor {
|
||||
public allProductions: (IProductionWithOccurrence & {
|
||||
maxLookahead?: number
|
||||
})[] = []
|
||||
|
||||
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
|
||||
this.allProductions.push(manySep)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
|
||||
this.allProductions.push(atLeastOne)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatoryWithSeparator(
|
||||
atLeastOneSep: RepetitionMandatoryWithSeparator
|
||||
): void {
|
||||
this.allProductions.push(atLeastOneSep)
|
||||
}
|
||||
|
||||
public visitRepetition(many: Repetition): void {
|
||||
this.allProductions.push(many)
|
||||
}
|
||||
}
|
||||
|
||||
export function validateTooManyAlts(
|
||||
topLevelRule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserDefinitionError[] {
|
||||
const orCollector = new OrCollector()
|
||||
topLevelRule.accept(orCollector)
|
||||
const ors = orCollector.alternations
|
||||
|
||||
const errors = flatMap(ors, (currOr) => {
|
||||
if (currOr.definition.length > 255) {
|
||||
return [
|
||||
{
|
||||
message: errMsgProvider.buildTooManyAlternativesError({
|
||||
topLevelRule: topLevelRule,
|
||||
alternation: currOr
|
||||
}),
|
||||
type: ParserDefinitionErrorType.TOO_MANY_ALTS,
|
||||
ruleName: topLevelRule.name,
|
||||
occurrence: currOr.idx
|
||||
}
|
||||
]
|
||||
} else {
|
||||
return []
|
||||
}
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
export function validateSomeNonEmptyLookaheadPath(
|
||||
topLevelRules: Rule[],
|
||||
maxLookahead: number,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserDefinitionError[] {
|
||||
const errors: IParserDefinitionError[] = []
|
||||
forEach(topLevelRules, (currTopRule) => {
|
||||
const collectorVisitor = new RepetitionCollector()
|
||||
currTopRule.accept(collectorVisitor)
|
||||
const allRuleProductions = collectorVisitor.allProductions
|
||||
forEach(allRuleProductions, (currProd) => {
|
||||
const prodType = getProdType(currProd)
|
||||
const actualMaxLookahead = currProd.maxLookahead || maxLookahead
|
||||
const currOccurrence = currProd.idx
|
||||
const paths = getLookaheadPathsForOptionalProd(
|
||||
currOccurrence,
|
||||
currTopRule,
|
||||
prodType,
|
||||
actualMaxLookahead
|
||||
)
|
||||
const pathsInsideProduction = paths[0]
|
||||
if (isEmpty(flatten(pathsInsideProduction))) {
|
||||
const errMsg = errMsgProvider.buildEmptyRepetitionError({
|
||||
topLevelRule: currTopRule,
|
||||
repetition: currProd
|
||||
})
|
||||
errors.push({
|
||||
message: errMsg,
|
||||
type: ParserDefinitionErrorType.NO_NON_EMPTY_LOOKAHEAD,
|
||||
ruleName: currTopRule.name
|
||||
})
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
export interface IAmbiguityDescriptor {
|
||||
alts: number[]
|
||||
path: TokenType[]
|
||||
}
|
||||
|
||||
function checkAlternativesAmbiguities(
|
||||
alternatives: Alternative[],
|
||||
alternation: Alternation,
|
||||
rule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserAmbiguousAlternativesDefinitionError[] {
|
||||
const foundAmbiguousPaths: Alternative = []
|
||||
const identicalAmbiguities = reduce(
|
||||
alternatives,
|
||||
(result, currAlt, currAltIdx) => {
|
||||
// ignore (skip) ambiguities with this alternative
|
||||
if (alternation.definition[currAltIdx].ignoreAmbiguities === true) {
|
||||
return result
|
||||
}
|
||||
|
||||
forEach(currAlt, (currPath) => {
|
||||
const altsCurrPathAppearsIn = [currAltIdx]
|
||||
forEach(alternatives, (currOtherAlt, currOtherAltIdx) => {
|
||||
if (
|
||||
currAltIdx !== currOtherAltIdx &&
|
||||
containsPath(currOtherAlt, currPath) &&
|
||||
// ignore (skip) ambiguities with this "other" alternative
|
||||
alternation.definition[currOtherAltIdx].ignoreAmbiguities !== true
|
||||
) {
|
||||
altsCurrPathAppearsIn.push(currOtherAltIdx)
|
||||
}
|
||||
})
|
||||
|
||||
if (
|
||||
altsCurrPathAppearsIn.length > 1 &&
|
||||
!containsPath(foundAmbiguousPaths, currPath)
|
||||
) {
|
||||
foundAmbiguousPaths.push(currPath)
|
||||
result.push({
|
||||
alts: altsCurrPathAppearsIn,
|
||||
path: currPath
|
||||
})
|
||||
}
|
||||
})
|
||||
return result
|
||||
},
|
||||
[] as { alts: number[]; path: TokenType[] }[]
|
||||
)
|
||||
|
||||
const currErrors = map(identicalAmbiguities, (currAmbDescriptor) => {
|
||||
const ambgIndices = map(
|
||||
currAmbDescriptor.alts,
|
||||
(currAltIdx) => currAltIdx + 1
|
||||
)
|
||||
|
||||
const currMessage = errMsgProvider.buildAlternationAmbiguityError({
|
||||
topLevelRule: rule,
|
||||
alternation: alternation,
|
||||
ambiguityIndices: ambgIndices,
|
||||
prefixPath: currAmbDescriptor.path
|
||||
})
|
||||
|
||||
return {
|
||||
message: currMessage,
|
||||
type: ParserDefinitionErrorType.AMBIGUOUS_ALTS,
|
||||
ruleName: rule.name,
|
||||
occurrence: alternation.idx,
|
||||
alternatives: currAmbDescriptor.alts
|
||||
}
|
||||
})
|
||||
|
||||
return currErrors
|
||||
}
|
||||
|
||||
export function checkPrefixAlternativesAmbiguities(
|
||||
alternatives: Alternative[],
|
||||
alternation: Alternation,
|
||||
rule: Rule,
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserAmbiguousAlternativesDefinitionError[] {
|
||||
// flatten
|
||||
const pathsAndIndices = reduce(
|
||||
alternatives,
|
||||
(result, currAlt, idx) => {
|
||||
const currPathsAndIdx = map(currAlt, (currPath) => {
|
||||
return { idx: idx, path: currPath }
|
||||
})
|
||||
return result.concat(currPathsAndIdx)
|
||||
},
|
||||
[] as { idx: number; path: TokenType[] }[]
|
||||
)
|
||||
|
||||
const errors = compact(
|
||||
flatMap(pathsAndIndices, (currPathAndIdx) => {
|
||||
const alternativeGast = alternation.definition[currPathAndIdx.idx]
|
||||
// ignore (skip) ambiguities with this alternative
|
||||
if (alternativeGast.ignoreAmbiguities === true) {
|
||||
return []
|
||||
}
|
||||
const targetIdx = currPathAndIdx.idx
|
||||
const targetPath = currPathAndIdx.path
|
||||
|
||||
const prefixAmbiguitiesPathsAndIndices = filter(
|
||||
pathsAndIndices,
|
||||
(searchPathAndIdx) => {
|
||||
// prefix ambiguity can only be created from lower idx (higher priority) path
|
||||
return (
|
||||
// ignore (skip) ambiguities with this "other" alternative
|
||||
alternation.definition[searchPathAndIdx.idx].ignoreAmbiguities !==
|
||||
true &&
|
||||
searchPathAndIdx.idx < targetIdx &&
|
||||
// checking for strict prefix because identical lookaheads
|
||||
// will be be detected using a different validation.
|
||||
isStrictPrefixOfPath(searchPathAndIdx.path, targetPath)
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
const currPathPrefixErrors = map(
|
||||
prefixAmbiguitiesPathsAndIndices,
|
||||
(currAmbPathAndIdx): IParserAmbiguousAlternativesDefinitionError => {
|
||||
const ambgIndices = [currAmbPathAndIdx.idx + 1, targetIdx + 1]
|
||||
const occurrence = alternation.idx === 0 ? "" : alternation.idx
|
||||
|
||||
const message = errMsgProvider.buildAlternationPrefixAmbiguityError({
|
||||
topLevelRule: rule,
|
||||
alternation: alternation,
|
||||
ambiguityIndices: ambgIndices,
|
||||
prefixPath: currAmbPathAndIdx.path
|
||||
})
|
||||
return {
|
||||
message: message,
|
||||
type: ParserDefinitionErrorType.AMBIGUOUS_PREFIX_ALTS,
|
||||
ruleName: rule.name,
|
||||
occurrence: occurrence,
|
||||
alternatives: ambgIndices
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
return currPathPrefixErrors
|
||||
})
|
||||
)
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
function checkTerminalAndNoneTerminalsNameSpace(
|
||||
topLevels: Rule[],
|
||||
tokenTypes: TokenType[],
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
): IParserDefinitionError[] {
|
||||
const errors: IParserDefinitionError[] = []
|
||||
|
||||
const tokenNames = map(tokenTypes, (currToken) => currToken.name)
|
||||
|
||||
forEach(topLevels, (currRule) => {
|
||||
const currRuleName = currRule.name
|
||||
if (includes(tokenNames, currRuleName)) {
|
||||
const errMsg = errMsgProvider.buildNamespaceConflictError(currRule)
|
||||
|
||||
errors.push({
|
||||
message: errMsg,
|
||||
type: ParserDefinitionErrorType.CONFLICT_TOKENS_RULES_NAMESPACE,
|
||||
ruleName: currRuleName
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return errors
|
||||
}
|
||||
71
_node_modules/chevrotain/src/parse/grammar/first.ts
generated
Normal file
71
_node_modules/chevrotain/src/parse/grammar/first.ts
generated
Normal file
@@ -0,0 +1,71 @@
|
||||
import flatten from "lodash/flatten"
|
||||
import uniq from "lodash/uniq"
|
||||
import map from "lodash/map"
|
||||
import { NonTerminal, Terminal } from "@chevrotain/gast"
|
||||
import {
|
||||
isBranchingProd,
|
||||
isOptionalProd,
|
||||
isSequenceProd
|
||||
} from "@chevrotain/gast"
|
||||
import { IProduction, TokenType } from "@chevrotain/types"
|
||||
|
||||
export function first(prod: IProduction): TokenType[] {
|
||||
/* istanbul ignore else */
|
||||
if (prod instanceof NonTerminal) {
|
||||
// this could in theory cause infinite loops if
|
||||
// (1) prod A refs prod B.
|
||||
// (2) prod B refs prod A
|
||||
// (3) AB can match the empty set
|
||||
// in other words a cycle where everything is optional so the first will keep
|
||||
// looking ahead for the next optional part and will never exit
|
||||
// currently there is no safeguard for this unique edge case because
|
||||
// (1) not sure a grammar in which this can happen is useful for anything (productive)
|
||||
return first((<NonTerminal>prod).referencedRule)
|
||||
} else if (prod instanceof Terminal) {
|
||||
return firstForTerminal(<Terminal>prod)
|
||||
} else if (isSequenceProd(prod)) {
|
||||
return firstForSequence(prod)
|
||||
} else if (isBranchingProd(prod)) {
|
||||
return firstForBranching(prod)
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
}
|
||||
|
||||
export function firstForSequence(prod: {
|
||||
definition: IProduction[]
|
||||
}): TokenType[] {
|
||||
let firstSet: TokenType[] = []
|
||||
const seq = prod.definition
|
||||
let nextSubProdIdx = 0
|
||||
let hasInnerProdsRemaining = seq.length > nextSubProdIdx
|
||||
let currSubProd
|
||||
// so we enter the loop at least once (if the definition is not empty
|
||||
let isLastInnerProdOptional = true
|
||||
// scan a sequence until it's end or until we have found a NONE optional production in it
|
||||
while (hasInnerProdsRemaining && isLastInnerProdOptional) {
|
||||
currSubProd = seq[nextSubProdIdx]
|
||||
isLastInnerProdOptional = isOptionalProd(currSubProd)
|
||||
firstSet = firstSet.concat(first(currSubProd))
|
||||
nextSubProdIdx = nextSubProdIdx + 1
|
||||
hasInnerProdsRemaining = seq.length > nextSubProdIdx
|
||||
}
|
||||
|
||||
return uniq(firstSet)
|
||||
}
|
||||
|
||||
export function firstForBranching(prod: {
|
||||
definition: IProduction[]
|
||||
}): TokenType[] {
|
||||
const allAlternativesFirsts: TokenType[][] = map(
|
||||
prod.definition,
|
||||
(innerProd) => {
|
||||
return first(innerProd)
|
||||
}
|
||||
)
|
||||
return uniq(flatten<TokenType>(allAlternativesFirsts))
|
||||
}
|
||||
|
||||
export function firstForTerminal(terminal: Terminal): TokenType[] {
|
||||
return [terminal.terminalType]
|
||||
}
|
||||
68
_node_modules/chevrotain/src/parse/grammar/follow.ts
generated
Normal file
68
_node_modules/chevrotain/src/parse/grammar/follow.ts
generated
Normal file
@@ -0,0 +1,68 @@
|
||||
import { RestWalker } from "./rest"
|
||||
import { first } from "./first"
|
||||
import forEach from "lodash/forEach"
|
||||
import assign from "lodash/assign"
|
||||
import { IN } from "../constants"
|
||||
import { Alternative, NonTerminal, Rule, Terminal } from "@chevrotain/gast"
|
||||
import { IProduction, TokenType } from "@chevrotain/types"
|
||||
|
||||
// This ResyncFollowsWalker computes all of the follows required for RESYNC
|
||||
// (skipping reference production).
|
||||
export class ResyncFollowsWalker extends RestWalker {
|
||||
public follows: Record<string, TokenType[]> = {}
|
||||
|
||||
constructor(private topProd: Rule) {
|
||||
super()
|
||||
}
|
||||
|
||||
startWalking(): Record<string, TokenType[]> {
|
||||
this.walk(this.topProd)
|
||||
return this.follows
|
||||
}
|
||||
|
||||
walkTerminal(
|
||||
terminal: Terminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// do nothing! just like in the public sector after 13:00
|
||||
}
|
||||
|
||||
walkProdRef(
|
||||
refProd: NonTerminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
const followName =
|
||||
buildBetweenProdsFollowPrefix(refProd.referencedRule, refProd.idx) +
|
||||
this.topProd.name
|
||||
const fullRest: IProduction[] = currRest.concat(prevRest)
|
||||
const restProd = new Alternative({ definition: fullRest })
|
||||
const t_in_topProd_follows = first(restProd)
|
||||
this.follows[followName] = t_in_topProd_follows
|
||||
}
|
||||
}
|
||||
|
||||
export function computeAllProdsFollows(
|
||||
topProductions: Rule[]
|
||||
): Record<string, TokenType[]> {
|
||||
const reSyncFollows = {}
|
||||
|
||||
forEach(topProductions, (topProd) => {
|
||||
const currRefsFollow = new ResyncFollowsWalker(topProd).startWalking()
|
||||
assign(reSyncFollows, currRefsFollow)
|
||||
})
|
||||
return reSyncFollows
|
||||
}
|
||||
|
||||
export function buildBetweenProdsFollowPrefix(
|
||||
inner: Rule,
|
||||
occurenceInParent: number
|
||||
): string {
|
||||
return inner.name + occurenceInParent + IN
|
||||
}
|
||||
|
||||
export function buildInProdFollowPrefix(terminal: Terminal): string {
|
||||
const terminalName = terminal.terminalType.name
|
||||
return terminalName + terminal.idx + IN
|
||||
}
|
||||
51
_node_modules/chevrotain/src/parse/grammar/gast/gast_resolver_public.ts
generated
Normal file
51
_node_modules/chevrotain/src/parse/grammar/gast/gast_resolver_public.ts
generated
Normal file
@@ -0,0 +1,51 @@
|
||||
import { Rule } from "@chevrotain/gast"
|
||||
import forEach from "lodash/forEach"
|
||||
import defaults from "lodash/defaults"
|
||||
import { resolveGrammar as orgResolveGrammar } from "../resolver"
|
||||
import { validateGrammar as orgValidateGrammar } from "../checks"
|
||||
import {
|
||||
defaultGrammarResolverErrorProvider,
|
||||
defaultGrammarValidatorErrorProvider
|
||||
} from "../../errors_public"
|
||||
import { TokenType } from "@chevrotain/types"
|
||||
import {
|
||||
IGrammarResolverErrorMessageProvider,
|
||||
IGrammarValidatorErrorMessageProvider,
|
||||
IParserDefinitionError
|
||||
} from "../types"
|
||||
|
||||
type ResolveGrammarOpts = {
|
||||
rules: Rule[]
|
||||
errMsgProvider?: IGrammarResolverErrorMessageProvider
|
||||
}
|
||||
export function resolveGrammar(
|
||||
options: ResolveGrammarOpts
|
||||
): IParserDefinitionError[] {
|
||||
const actualOptions: Required<ResolveGrammarOpts> = defaults(options, {
|
||||
errMsgProvider: defaultGrammarResolverErrorProvider
|
||||
})
|
||||
|
||||
const topRulesTable: { [ruleName: string]: Rule } = {}
|
||||
forEach(options.rules, (rule) => {
|
||||
topRulesTable[rule.name] = rule
|
||||
})
|
||||
return orgResolveGrammar(topRulesTable, actualOptions.errMsgProvider)
|
||||
}
|
||||
|
||||
export function validateGrammar(options: {
|
||||
rules: Rule[]
|
||||
tokenTypes: TokenType[]
|
||||
grammarName: string
|
||||
errMsgProvider: IGrammarValidatorErrorMessageProvider
|
||||
}): IParserDefinitionError[] {
|
||||
options = defaults(options, {
|
||||
errMsgProvider: defaultGrammarValidatorErrorProvider
|
||||
})
|
||||
|
||||
return orgValidateGrammar(
|
||||
options.rules,
|
||||
options.tokenTypes,
|
||||
options.errMsgProvider,
|
||||
options.grammarName
|
||||
)
|
||||
}
|
||||
612
_node_modules/chevrotain/src/parse/grammar/interpreter.ts
generated
Normal file
612
_node_modules/chevrotain/src/parse/grammar/interpreter.ts
generated
Normal file
@@ -0,0 +1,612 @@
|
||||
import { RestWalker } from "./rest"
|
||||
import _first from "lodash/first"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import dropRight from "lodash/dropRight"
|
||||
import drop from "lodash/drop"
|
||||
import last from "lodash/last"
|
||||
import forEach from "lodash/forEach"
|
||||
import clone from "lodash/clone"
|
||||
import { first } from "./first"
|
||||
import { TokenMatcher } from "../parser/parser"
|
||||
import {
|
||||
Alternation,
|
||||
Alternative,
|
||||
NonTerminal,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Rule,
|
||||
Terminal
|
||||
} from "@chevrotain/gast"
|
||||
import {
|
||||
IGrammarPath,
|
||||
IProduction,
|
||||
ISyntacticContentAssistPath,
|
||||
IToken,
|
||||
ITokenGrammarPath,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
|
||||
export abstract class AbstractNextPossibleTokensWalker extends RestWalker {
|
||||
protected possibleTokTypes: TokenType[] = []
|
||||
protected ruleStack: string[]
|
||||
protected occurrenceStack: number[]
|
||||
|
||||
protected nextProductionName = ""
|
||||
protected nextProductionOccurrence = 0
|
||||
protected found = false
|
||||
protected isAtEndOfPath = false
|
||||
|
||||
constructor(protected topProd: Rule, protected path: IGrammarPath) {
|
||||
super()
|
||||
}
|
||||
|
||||
startWalking(): TokenType[] {
|
||||
this.found = false
|
||||
|
||||
if (this.path.ruleStack[0] !== this.topProd.name) {
|
||||
throw Error("The path does not start with the walker's top Rule!")
|
||||
}
|
||||
|
||||
// immutable for the win
|
||||
this.ruleStack = clone(this.path.ruleStack).reverse() // intelij bug requires assertion
|
||||
this.occurrenceStack = clone(this.path.occurrenceStack).reverse() // intelij bug requires assertion
|
||||
|
||||
// already verified that the first production is valid, we now seek the 2nd production
|
||||
this.ruleStack.pop()
|
||||
this.occurrenceStack.pop()
|
||||
|
||||
this.updateExpectedNext()
|
||||
this.walk(this.topProd)
|
||||
|
||||
return this.possibleTokTypes
|
||||
}
|
||||
|
||||
walk(
|
||||
prod: { definition: IProduction[] },
|
||||
prevRest: IProduction[] = []
|
||||
): void {
|
||||
// stop scanning once we found the path
|
||||
if (!this.found) {
|
||||
super.walk(prod, prevRest)
|
||||
}
|
||||
}
|
||||
|
||||
walkProdRef(
|
||||
refProd: NonTerminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// found the next production, need to keep walking in it
|
||||
if (
|
||||
refProd.referencedRule.name === this.nextProductionName &&
|
||||
refProd.idx === this.nextProductionOccurrence
|
||||
) {
|
||||
const fullRest = currRest.concat(prevRest)
|
||||
this.updateExpectedNext()
|
||||
this.walk(refProd.referencedRule, <any>fullRest)
|
||||
}
|
||||
}
|
||||
|
||||
updateExpectedNext(): void {
|
||||
// need to consume the Terminal
|
||||
if (isEmpty(this.ruleStack)) {
|
||||
// must reset nextProductionXXX to avoid walking down another Top Level production while what we are
|
||||
// really seeking is the last Terminal...
|
||||
this.nextProductionName = ""
|
||||
this.nextProductionOccurrence = 0
|
||||
this.isAtEndOfPath = true
|
||||
} else {
|
||||
this.nextProductionName = this.ruleStack.pop()!
|
||||
this.nextProductionOccurrence = this.occurrenceStack.pop()!
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class NextAfterTokenWalker extends AbstractNextPossibleTokensWalker {
|
||||
private nextTerminalName = ""
|
||||
private nextTerminalOccurrence = 0
|
||||
|
||||
constructor(topProd: Rule, protected path: ITokenGrammarPath) {
|
||||
super(topProd, path)
|
||||
this.nextTerminalName = this.path.lastTok.name
|
||||
this.nextTerminalOccurrence = this.path.lastTokOccurrence
|
||||
}
|
||||
|
||||
walkTerminal(
|
||||
terminal: Terminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (
|
||||
this.isAtEndOfPath &&
|
||||
terminal.terminalType.name === this.nextTerminalName &&
|
||||
terminal.idx === this.nextTerminalOccurrence &&
|
||||
!this.found
|
||||
) {
|
||||
const fullRest = currRest.concat(prevRest)
|
||||
const restProd = new Alternative({ definition: fullRest })
|
||||
this.possibleTokTypes = first(restProd)
|
||||
this.found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export type AlternativesFirstTokens = TokenType[][]
|
||||
|
||||
export interface IFirstAfterRepetition {
|
||||
token: TokenType | undefined
|
||||
occurrence: number | undefined
|
||||
isEndOfRule: boolean | undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* This walker only "walks" a single "TOP" level in the Grammar Ast, this means
|
||||
* it never "follows" production refs
|
||||
*/
|
||||
export class AbstractNextTerminalAfterProductionWalker extends RestWalker {
|
||||
protected result: IFirstAfterRepetition = {
|
||||
token: undefined,
|
||||
occurrence: undefined,
|
||||
isEndOfRule: undefined
|
||||
}
|
||||
|
||||
constructor(protected topRule: Rule, protected occurrence: number) {
|
||||
super()
|
||||
}
|
||||
|
||||
startWalking(): IFirstAfterRepetition {
|
||||
this.walk(this.topRule)
|
||||
return this.result
|
||||
}
|
||||
}
|
||||
|
||||
export class NextTerminalAfterManyWalker extends AbstractNextTerminalAfterProductionWalker {
|
||||
walkMany(
|
||||
manyProd: Repetition,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (manyProd.idx === this.occurrence) {
|
||||
const firstAfterMany = _first(currRest.concat(prevRest))
|
||||
this.result.isEndOfRule = firstAfterMany === undefined
|
||||
if (firstAfterMany instanceof Terminal) {
|
||||
this.result.token = firstAfterMany.terminalType
|
||||
this.result.occurrence = firstAfterMany.idx
|
||||
}
|
||||
} else {
|
||||
super.walkMany(manyProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class NextTerminalAfterManySepWalker extends AbstractNextTerminalAfterProductionWalker {
|
||||
walkManySep(
|
||||
manySepProd: RepetitionWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (manySepProd.idx === this.occurrence) {
|
||||
const firstAfterManySep = _first(currRest.concat(prevRest))
|
||||
this.result.isEndOfRule = firstAfterManySep === undefined
|
||||
if (firstAfterManySep instanceof Terminal) {
|
||||
this.result.token = firstAfterManySep.terminalType
|
||||
this.result.occurrence = firstAfterManySep.idx
|
||||
}
|
||||
} else {
|
||||
super.walkManySep(manySepProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class NextTerminalAfterAtLeastOneWalker extends AbstractNextTerminalAfterProductionWalker {
|
||||
walkAtLeastOne(
|
||||
atLeastOneProd: RepetitionMandatory,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (atLeastOneProd.idx === this.occurrence) {
|
||||
const firstAfterAtLeastOne = _first(currRest.concat(prevRest))
|
||||
this.result.isEndOfRule = firstAfterAtLeastOne === undefined
|
||||
if (firstAfterAtLeastOne instanceof Terminal) {
|
||||
this.result.token = firstAfterAtLeastOne.terminalType
|
||||
this.result.occurrence = firstAfterAtLeastOne.idx
|
||||
}
|
||||
} else {
|
||||
super.walkAtLeastOne(atLeastOneProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: reduce code duplication in the AfterWalkers
|
||||
export class NextTerminalAfterAtLeastOneSepWalker extends AbstractNextTerminalAfterProductionWalker {
|
||||
walkAtLeastOneSep(
|
||||
atleastOneSepProd: RepetitionMandatoryWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (atleastOneSepProd.idx === this.occurrence) {
|
||||
const firstAfterfirstAfterAtLeastOneSep = _first(
|
||||
currRest.concat(prevRest)
|
||||
)
|
||||
this.result.isEndOfRule = firstAfterfirstAfterAtLeastOneSep === undefined
|
||||
if (firstAfterfirstAfterAtLeastOneSep instanceof Terminal) {
|
||||
this.result.token = firstAfterfirstAfterAtLeastOneSep.terminalType
|
||||
this.result.occurrence = firstAfterfirstAfterAtLeastOneSep.idx
|
||||
}
|
||||
} else {
|
||||
super.walkAtLeastOneSep(atleastOneSepProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export interface PartialPathAndSuffixes {
|
||||
partialPath: TokenType[]
|
||||
suffixDef: IProduction[]
|
||||
}
|
||||
|
||||
export function possiblePathsFrom(
|
||||
targetDef: IProduction[],
|
||||
maxLength: number,
|
||||
currPath: TokenType[] = []
|
||||
): PartialPathAndSuffixes[] {
|
||||
// avoid side effects
|
||||
currPath = clone(currPath)
|
||||
let result: PartialPathAndSuffixes[] = []
|
||||
let i = 0
|
||||
|
||||
// TODO: avoid inner funcs
|
||||
function remainingPathWith(nextDef: IProduction[]) {
|
||||
return nextDef.concat(drop(targetDef, i + 1))
|
||||
}
|
||||
|
||||
// TODO: avoid inner funcs
|
||||
function getAlternativesForProd(definition: IProduction[]) {
|
||||
const alternatives = possiblePathsFrom(
|
||||
remainingPathWith(definition),
|
||||
maxLength,
|
||||
currPath
|
||||
)
|
||||
return result.concat(alternatives)
|
||||
}
|
||||
|
||||
/**
|
||||
* Mandatory productions will halt the loop as the paths computed from their recursive calls will already contain the
|
||||
* following (rest) of the targetDef.
|
||||
*
|
||||
* For optional productions (Option/Repetition/...) the loop will continue to represent the paths that do not include the
|
||||
* the optional production.
|
||||
*/
|
||||
while (currPath.length < maxLength && i < targetDef.length) {
|
||||
const prod = targetDef[i]
|
||||
|
||||
/* istanbul ignore else */
|
||||
if (prod instanceof Alternative) {
|
||||
return getAlternativesForProd(prod.definition)
|
||||
} else if (prod instanceof NonTerminal) {
|
||||
return getAlternativesForProd(prod.definition)
|
||||
} else if (prod instanceof Option) {
|
||||
result = getAlternativesForProd(prod.definition)
|
||||
} else if (prod instanceof RepetitionMandatory) {
|
||||
const newDef = prod.definition.concat([
|
||||
new Repetition({
|
||||
definition: prod.definition
|
||||
})
|
||||
])
|
||||
return getAlternativesForProd(newDef)
|
||||
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
|
||||
const newDef = [
|
||||
new Alternative({ definition: prod.definition }),
|
||||
new Repetition({
|
||||
definition: [new Terminal({ terminalType: prod.separator })].concat(
|
||||
<any>prod.definition
|
||||
)
|
||||
})
|
||||
]
|
||||
return getAlternativesForProd(newDef)
|
||||
} else if (prod instanceof RepetitionWithSeparator) {
|
||||
const newDef = prod.definition.concat([
|
||||
new Repetition({
|
||||
definition: [new Terminal({ terminalType: prod.separator })].concat(
|
||||
<any>prod.definition
|
||||
)
|
||||
})
|
||||
])
|
||||
result = getAlternativesForProd(newDef)
|
||||
} else if (prod instanceof Repetition) {
|
||||
const newDef = prod.definition.concat([
|
||||
new Repetition({
|
||||
definition: prod.definition
|
||||
})
|
||||
])
|
||||
result = getAlternativesForProd(newDef)
|
||||
} else if (prod instanceof Alternation) {
|
||||
forEach(prod.definition, (currAlt) => {
|
||||
// TODO: this is a limited check for empty alternatives
|
||||
// It would prevent a common case of infinite loops during parser initialization.
|
||||
// However **in-directly** empty alternatives may still cause issues.
|
||||
if (isEmpty(currAlt.definition) === false) {
|
||||
result = getAlternativesForProd(currAlt.definition)
|
||||
}
|
||||
})
|
||||
return result
|
||||
} else if (prod instanceof Terminal) {
|
||||
currPath.push(prod.terminalType)
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
|
||||
i++
|
||||
}
|
||||
result.push({
|
||||
partialPath: currPath,
|
||||
suffixDef: drop(targetDef, i)
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
interface IPathToExamine {
|
||||
idx: number
|
||||
def: IProduction[]
|
||||
ruleStack: string[]
|
||||
occurrenceStack: number[]
|
||||
}
|
||||
|
||||
export function nextPossibleTokensAfter(
|
||||
initialDef: IProduction[],
|
||||
tokenVector: IToken[],
|
||||
tokMatcher: TokenMatcher,
|
||||
maxLookAhead: number
|
||||
): ISyntacticContentAssistPath[] {
|
||||
const EXIT_NON_TERMINAL: any = "EXIT_NONE_TERMINAL"
|
||||
// to avoid creating a new Array each time.
|
||||
const EXIT_NON_TERMINAL_ARR = [EXIT_NON_TERMINAL]
|
||||
const EXIT_ALTERNATIVE: any = "EXIT_ALTERNATIVE"
|
||||
let foundCompletePath = false
|
||||
|
||||
const tokenVectorLength = tokenVector.length
|
||||
const minimalAlternativesIndex = tokenVectorLength - maxLookAhead - 1
|
||||
|
||||
const result: ISyntacticContentAssistPath[] = []
|
||||
|
||||
const possiblePaths: IPathToExamine[] = []
|
||||
possiblePaths.push({
|
||||
idx: -1,
|
||||
def: initialDef,
|
||||
ruleStack: [],
|
||||
occurrenceStack: []
|
||||
})
|
||||
|
||||
while (!isEmpty(possiblePaths)) {
|
||||
const currPath = possiblePaths.pop()!
|
||||
|
||||
// skip alternatives if no more results can be found (assuming deterministic grammar with fixed lookahead)
|
||||
if (currPath === EXIT_ALTERNATIVE) {
|
||||
if (
|
||||
foundCompletePath &&
|
||||
last(possiblePaths)!.idx <= minimalAlternativesIndex
|
||||
) {
|
||||
// remove irrelevant alternative
|
||||
possiblePaths.pop()
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
const currDef = currPath.def
|
||||
const currIdx = currPath.idx
|
||||
const currRuleStack = currPath.ruleStack
|
||||
const currOccurrenceStack = currPath.occurrenceStack
|
||||
|
||||
// For Example: an empty path could exist in a valid grammar in the case of an EMPTY_ALT
|
||||
if (isEmpty(currDef)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const prod = currDef[0]
|
||||
/* istanbul ignore else */
|
||||
if (prod === EXIT_NON_TERMINAL) {
|
||||
const nextPath = {
|
||||
idx: currIdx,
|
||||
def: drop(currDef),
|
||||
ruleStack: dropRight(currRuleStack),
|
||||
occurrenceStack: dropRight(currOccurrenceStack)
|
||||
}
|
||||
possiblePaths.push(nextPath)
|
||||
} else if (prod instanceof Terminal) {
|
||||
/* istanbul ignore else */
|
||||
if (currIdx < tokenVectorLength - 1) {
|
||||
const nextIdx = currIdx + 1
|
||||
const actualToken = tokenVector[nextIdx]
|
||||
if (tokMatcher!(actualToken, prod.terminalType)) {
|
||||
const nextPath = {
|
||||
idx: nextIdx,
|
||||
def: drop(currDef),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPath)
|
||||
}
|
||||
// end of the line
|
||||
} else if (currIdx === tokenVectorLength - 1) {
|
||||
// IGNORE ABOVE ELSE
|
||||
result.push({
|
||||
nextTokenType: prod.terminalType,
|
||||
nextTokenOccurrence: prod.idx,
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
})
|
||||
foundCompletePath = true
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
} else if (prod instanceof NonTerminal) {
|
||||
const newRuleStack = clone(currRuleStack)
|
||||
newRuleStack.push(prod.nonTerminalName)
|
||||
|
||||
const newOccurrenceStack = clone(currOccurrenceStack)
|
||||
newOccurrenceStack.push(prod.idx)
|
||||
|
||||
const nextPath = {
|
||||
idx: currIdx,
|
||||
def: prod.definition.concat(EXIT_NON_TERMINAL_ARR, drop(currDef)),
|
||||
ruleStack: newRuleStack,
|
||||
occurrenceStack: newOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPath)
|
||||
} else if (prod instanceof Option) {
|
||||
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
|
||||
const nextPathWithout = {
|
||||
idx: currIdx,
|
||||
def: drop(currDef),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWithout)
|
||||
// required marker to avoid backtracking paths whose higher priority alternatives already matched
|
||||
possiblePaths.push(EXIT_ALTERNATIVE)
|
||||
|
||||
const nextPathWith = {
|
||||
idx: currIdx,
|
||||
def: prod.definition.concat(drop(currDef)),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWith)
|
||||
} else if (prod instanceof RepetitionMandatory) {
|
||||
// TODO:(THE NEW operators here take a while...) (convert once?)
|
||||
const secondIteration = new Repetition({
|
||||
definition: prod.definition,
|
||||
idx: prod.idx
|
||||
})
|
||||
const nextDef = prod.definition.concat([secondIteration], drop(currDef))
|
||||
const nextPath = {
|
||||
idx: currIdx,
|
||||
def: nextDef,
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPath)
|
||||
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
|
||||
// TODO:(THE NEW operators here take a while...) (convert once?)
|
||||
const separatorGast = new Terminal({
|
||||
terminalType: prod.separator
|
||||
})
|
||||
const secondIteration = new Repetition({
|
||||
definition: [<any>separatorGast].concat(prod.definition),
|
||||
idx: prod.idx
|
||||
})
|
||||
const nextDef = prod.definition.concat([secondIteration], drop(currDef))
|
||||
const nextPath = {
|
||||
idx: currIdx,
|
||||
def: nextDef,
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPath)
|
||||
} else if (prod instanceof RepetitionWithSeparator) {
|
||||
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
|
||||
const nextPathWithout = {
|
||||
idx: currIdx,
|
||||
def: drop(currDef),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWithout)
|
||||
// required marker to avoid backtracking paths whose higher priority alternatives already matched
|
||||
possiblePaths.push(EXIT_ALTERNATIVE)
|
||||
|
||||
const separatorGast = new Terminal({
|
||||
terminalType: prod.separator
|
||||
})
|
||||
const nthRepetition = new Repetition({
|
||||
definition: [<any>separatorGast].concat(prod.definition),
|
||||
idx: prod.idx
|
||||
})
|
||||
const nextDef = prod.definition.concat([nthRepetition], drop(currDef))
|
||||
const nextPathWith = {
|
||||
idx: currIdx,
|
||||
def: nextDef,
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWith)
|
||||
} else if (prod instanceof Repetition) {
|
||||
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
|
||||
const nextPathWithout = {
|
||||
idx: currIdx,
|
||||
def: drop(currDef),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWithout)
|
||||
// required marker to avoid backtracking paths whose higher priority alternatives already matched
|
||||
possiblePaths.push(EXIT_ALTERNATIVE)
|
||||
|
||||
// TODO: an empty repetition will cause infinite loops here, will the parser detect this in selfAnalysis?
|
||||
const nthRepetition = new Repetition({
|
||||
definition: prod.definition,
|
||||
idx: prod.idx
|
||||
})
|
||||
const nextDef = prod.definition.concat([nthRepetition], drop(currDef))
|
||||
const nextPathWith = {
|
||||
idx: currIdx,
|
||||
def: nextDef,
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(nextPathWith)
|
||||
} else if (prod instanceof Alternation) {
|
||||
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
|
||||
for (let i = prod.definition.length - 1; i >= 0; i--) {
|
||||
const currAlt: any = prod.definition[i]
|
||||
const currAltPath = {
|
||||
idx: currIdx,
|
||||
def: currAlt.definition.concat(drop(currDef)),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
}
|
||||
possiblePaths.push(currAltPath)
|
||||
possiblePaths.push(EXIT_ALTERNATIVE)
|
||||
}
|
||||
} else if (prod instanceof Alternative) {
|
||||
possiblePaths.push({
|
||||
idx: currIdx,
|
||||
def: prod.definition.concat(drop(currDef)),
|
||||
ruleStack: currRuleStack,
|
||||
occurrenceStack: currOccurrenceStack
|
||||
})
|
||||
} else if (prod instanceof Rule) {
|
||||
// last because we should only encounter at most a single one of these per invocation.
|
||||
possiblePaths.push(
|
||||
expandTopLevelRule(prod, currIdx, currRuleStack, currOccurrenceStack)
|
||||
)
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
function expandTopLevelRule(
|
||||
topRule: Rule,
|
||||
currIdx: number,
|
||||
currRuleStack: string[],
|
||||
currOccurrenceStack: number[]
|
||||
): IPathToExamine {
|
||||
const newRuleStack = clone(currRuleStack)
|
||||
newRuleStack.push(topRule.name)
|
||||
|
||||
const newCurrOccurrenceStack = clone(currOccurrenceStack)
|
||||
// top rule is always assumed to have been called with occurrence index 1
|
||||
newCurrOccurrenceStack.push(1)
|
||||
|
||||
return {
|
||||
idx: currIdx,
|
||||
def: topRule.definition,
|
||||
ruleStack: newRuleStack,
|
||||
occurrenceStack: newCurrOccurrenceStack
|
||||
}
|
||||
}
|
||||
33
_node_modules/chevrotain/src/parse/grammar/keys.ts
generated
Normal file
33
_node_modules/chevrotain/src/parse/grammar/keys.ts
generated
Normal file
@@ -0,0 +1,33 @@
|
||||
// Lookahead keys are 32Bit integers in the form
|
||||
// TTTTTTTT-ZZZZZZZZZZZZ-YYYY-XXXXXXXX
|
||||
// XXXX -> Occurrence Index bitmap.
|
||||
// YYYY -> DSL Method Type bitmap.
|
||||
// ZZZZZZZZZZZZZZZ -> Rule short Index bitmap.
|
||||
// TTTTTTTTT -> alternation alternative index bitmap
|
||||
|
||||
export const BITS_FOR_METHOD_TYPE = 4
|
||||
export const BITS_FOR_OCCURRENCE_IDX = 8
|
||||
export const BITS_FOR_RULE_IDX = 12
|
||||
// TODO: validation, this means that there may at most 2^8 --> 256 alternatives for an alternation.
|
||||
export const BITS_FOR_ALT_IDX = 8
|
||||
|
||||
// short string used as part of mapping keys.
|
||||
// being short improves the performance when composing KEYS for maps out of these
|
||||
// The 5 - 8 bits (16 possible values, are reserved for the DSL method indices)
|
||||
export const OR_IDX = 1 << BITS_FOR_OCCURRENCE_IDX
|
||||
export const OPTION_IDX = 2 << BITS_FOR_OCCURRENCE_IDX
|
||||
export const MANY_IDX = 3 << BITS_FOR_OCCURRENCE_IDX
|
||||
export const AT_LEAST_ONE_IDX = 4 << BITS_FOR_OCCURRENCE_IDX
|
||||
export const MANY_SEP_IDX = 5 << BITS_FOR_OCCURRENCE_IDX
|
||||
export const AT_LEAST_ONE_SEP_IDX = 6 << BITS_FOR_OCCURRENCE_IDX
|
||||
|
||||
// this actually returns a number, but it is always used as a string (object prop key)
|
||||
export function getKeyForAutomaticLookahead(
|
||||
ruleIdx: number,
|
||||
dslMethodIdx: number,
|
||||
occurrence: number
|
||||
): number {
|
||||
return occurrence | dslMethodIdx | ruleIdx
|
||||
}
|
||||
|
||||
const BITS_START_FOR_ALT_IDX = 32 - BITS_FOR_ALT_IDX
|
||||
140
_node_modules/chevrotain/src/parse/grammar/llk_lookahead.ts
generated
Normal file
140
_node_modules/chevrotain/src/parse/grammar/llk_lookahead.ts
generated
Normal file
@@ -0,0 +1,140 @@
|
||||
import {
|
||||
ILookaheadStrategy,
|
||||
ILookaheadValidationError,
|
||||
IOrAlt,
|
||||
Rule,
|
||||
TokenType,
|
||||
OptionalProductionType
|
||||
} from "@chevrotain/types"
|
||||
import flatMap from "lodash/flatMap"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import { defaultGrammarValidatorErrorProvider } from "../errors_public"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser/parser"
|
||||
import {
|
||||
validateAmbiguousAlternationAlternatives,
|
||||
validateEmptyOrAlternative,
|
||||
validateNoLeftRecursion,
|
||||
validateSomeNonEmptyLookaheadPath
|
||||
} from "./checks"
|
||||
import {
|
||||
buildAlternativesLookAheadFunc,
|
||||
buildLookaheadFuncForOptionalProd,
|
||||
buildLookaheadFuncForOr,
|
||||
buildSingleAlternativeLookaheadFunction,
|
||||
getProdType
|
||||
} from "./lookahead"
|
||||
import { IParserDefinitionError } from "./types"
|
||||
|
||||
export class LLkLookaheadStrategy implements ILookaheadStrategy {
|
||||
readonly maxLookahead: number
|
||||
|
||||
constructor(options?: { maxLookahead?: number }) {
|
||||
this.maxLookahead =
|
||||
options?.maxLookahead ?? DEFAULT_PARSER_CONFIG.maxLookahead
|
||||
}
|
||||
|
||||
validate(options: {
|
||||
rules: Rule[]
|
||||
tokenTypes: TokenType[]
|
||||
grammarName: string
|
||||
}): ILookaheadValidationError[] {
|
||||
const leftRecursionErrors = this.validateNoLeftRecursion(options.rules)
|
||||
|
||||
if (isEmpty(leftRecursionErrors)) {
|
||||
const emptyAltErrors = this.validateEmptyOrAlternatives(options.rules)
|
||||
const ambiguousAltsErrors = this.validateAmbiguousAlternationAlternatives(
|
||||
options.rules,
|
||||
this.maxLookahead
|
||||
)
|
||||
const emptyRepetitionErrors = this.validateSomeNonEmptyLookaheadPath(
|
||||
options.rules,
|
||||
this.maxLookahead
|
||||
)
|
||||
const allErrors = [
|
||||
...leftRecursionErrors,
|
||||
...emptyAltErrors,
|
||||
...ambiguousAltsErrors,
|
||||
...emptyRepetitionErrors
|
||||
]
|
||||
return allErrors
|
||||
}
|
||||
return leftRecursionErrors
|
||||
}
|
||||
|
||||
validateNoLeftRecursion(rules: Rule[]): IParserDefinitionError[] {
|
||||
return flatMap(rules, (currTopRule) =>
|
||||
validateNoLeftRecursion(
|
||||
currTopRule,
|
||||
currTopRule,
|
||||
defaultGrammarValidatorErrorProvider
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
validateEmptyOrAlternatives(rules: Rule[]): IParserDefinitionError[] {
|
||||
return flatMap(rules, (currTopRule) =>
|
||||
validateEmptyOrAlternative(
|
||||
currTopRule,
|
||||
defaultGrammarValidatorErrorProvider
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
validateAmbiguousAlternationAlternatives(
|
||||
rules: Rule[],
|
||||
maxLookahead: number
|
||||
): IParserDefinitionError[] {
|
||||
return flatMap(rules, (currTopRule) =>
|
||||
validateAmbiguousAlternationAlternatives(
|
||||
currTopRule,
|
||||
maxLookahead,
|
||||
defaultGrammarValidatorErrorProvider
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
validateSomeNonEmptyLookaheadPath(
|
||||
rules: Rule[],
|
||||
maxLookahead: number
|
||||
): IParserDefinitionError[] {
|
||||
return validateSomeNonEmptyLookaheadPath(
|
||||
rules,
|
||||
maxLookahead,
|
||||
defaultGrammarValidatorErrorProvider
|
||||
)
|
||||
}
|
||||
|
||||
buildLookaheadForAlternation(options: {
|
||||
prodOccurrence: number
|
||||
rule: Rule
|
||||
maxLookahead: number
|
||||
hasPredicates: boolean
|
||||
dynamicTokensEnabled: boolean
|
||||
}): (orAlts?: IOrAlt<any>[] | undefined) => number | undefined {
|
||||
return buildLookaheadFuncForOr(
|
||||
options.prodOccurrence,
|
||||
options.rule,
|
||||
options.maxLookahead,
|
||||
options.hasPredicates,
|
||||
options.dynamicTokensEnabled,
|
||||
buildAlternativesLookAheadFunc
|
||||
)
|
||||
}
|
||||
|
||||
buildLookaheadForOptional(options: {
|
||||
prodOccurrence: number
|
||||
prodType: OptionalProductionType
|
||||
rule: Rule
|
||||
maxLookahead: number
|
||||
dynamicTokensEnabled: boolean
|
||||
}): () => boolean {
|
||||
return buildLookaheadFuncForOptionalProd(
|
||||
options.prodOccurrence,
|
||||
options.rule,
|
||||
options.maxLookahead,
|
||||
options.dynamicTokensEnabled,
|
||||
getProdType(options.prodType),
|
||||
buildSingleAlternativeLookaheadFunction
|
||||
)
|
||||
}
|
||||
}
|
||||
741
_node_modules/chevrotain/src/parse/grammar/lookahead.ts
generated
Normal file
741
_node_modules/chevrotain/src/parse/grammar/lookahead.ts
generated
Normal file
@@ -0,0 +1,741 @@
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import flatten from "lodash/flatten"
|
||||
import every from "lodash/every"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import has from "lodash/has"
|
||||
import reduce from "lodash/reduce"
|
||||
import { possiblePathsFrom } from "./interpreter"
|
||||
import { RestWalker } from "./rest"
|
||||
import { Predicate, TokenMatcher } from "../parser/parser"
|
||||
import {
|
||||
tokenStructuredMatcher,
|
||||
tokenStructuredMatcherNoCategories
|
||||
} from "../../scan/tokens"
|
||||
import {
|
||||
Alternation,
|
||||
Alternative as AlternativeGAST,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator
|
||||
} from "@chevrotain/gast"
|
||||
import { GAstVisitor } from "@chevrotain/gast"
|
||||
import {
|
||||
IOrAlt,
|
||||
IProduction,
|
||||
IProductionWithOccurrence,
|
||||
LookaheadSequence,
|
||||
LookaheadProductionType,
|
||||
Rule,
|
||||
TokenType,
|
||||
BaseParser
|
||||
} from "@chevrotain/types"
|
||||
|
||||
export enum PROD_TYPE {
|
||||
OPTION,
|
||||
REPETITION,
|
||||
REPETITION_MANDATORY,
|
||||
REPETITION_MANDATORY_WITH_SEPARATOR,
|
||||
REPETITION_WITH_SEPARATOR,
|
||||
ALTERNATION
|
||||
}
|
||||
|
||||
export function getProdType(
|
||||
prod: IProduction | LookaheadProductionType
|
||||
): PROD_TYPE {
|
||||
/* istanbul ignore else */
|
||||
if (prod instanceof Option || prod === "Option") {
|
||||
return PROD_TYPE.OPTION
|
||||
} else if (prod instanceof Repetition || prod === "Repetition") {
|
||||
return PROD_TYPE.REPETITION
|
||||
} else if (
|
||||
prod instanceof RepetitionMandatory ||
|
||||
prod === "RepetitionMandatory"
|
||||
) {
|
||||
return PROD_TYPE.REPETITION_MANDATORY
|
||||
} else if (
|
||||
prod instanceof RepetitionMandatoryWithSeparator ||
|
||||
prod === "RepetitionMandatoryWithSeparator"
|
||||
) {
|
||||
return PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR
|
||||
} else if (
|
||||
prod instanceof RepetitionWithSeparator ||
|
||||
prod === "RepetitionWithSeparator"
|
||||
) {
|
||||
return PROD_TYPE.REPETITION_WITH_SEPARATOR
|
||||
} else if (prod instanceof Alternation || prod === "Alternation") {
|
||||
return PROD_TYPE.ALTERNATION
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
}
|
||||
|
||||
export function getLookaheadPaths(options: {
|
||||
occurrence: number
|
||||
rule: Rule
|
||||
prodType: LookaheadProductionType
|
||||
maxLookahead: number
|
||||
}): LookaheadSequence[] {
|
||||
const { occurrence, rule, prodType, maxLookahead } = options
|
||||
const type = getProdType(prodType)
|
||||
if (type === PROD_TYPE.ALTERNATION) {
|
||||
return getLookaheadPathsForOr(occurrence, rule, maxLookahead)
|
||||
} else {
|
||||
return getLookaheadPathsForOptionalProd(
|
||||
occurrence,
|
||||
rule,
|
||||
type,
|
||||
maxLookahead
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export function buildLookaheadFuncForOr(
|
||||
occurrence: number,
|
||||
ruleGrammar: Rule,
|
||||
maxLookahead: number,
|
||||
hasPredicates: boolean,
|
||||
dynamicTokensEnabled: boolean,
|
||||
laFuncBuilder: Function
|
||||
): (orAlts?: IOrAlt<any>[]) => number | undefined {
|
||||
const lookAheadPaths = getLookaheadPathsForOr(
|
||||
occurrence,
|
||||
ruleGrammar,
|
||||
maxLookahead
|
||||
)
|
||||
|
||||
const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths)
|
||||
? tokenStructuredMatcherNoCategories
|
||||
: tokenStructuredMatcher
|
||||
|
||||
return laFuncBuilder(
|
||||
lookAheadPaths,
|
||||
hasPredicates,
|
||||
tokenMatcher,
|
||||
dynamicTokensEnabled
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* When dealing with an Optional production (OPTION/MANY/2nd iteration of AT_LEAST_ONE/...) we need to compare
|
||||
* the lookahead "inside" the production and the lookahead immediately "after" it in the same top level rule (context free).
|
||||
*
|
||||
* Example: given a production:
|
||||
* ABC(DE)?DF
|
||||
*
|
||||
* The optional '(DE)?' should only be entered if we see 'DE'. a single Token 'D' is not sufficient to distinguish between the two
|
||||
* alternatives.
|
||||
*
|
||||
* @returns A Lookahead function which will return true IFF the parser should parse the Optional production.
|
||||
*/
|
||||
export function buildLookaheadFuncForOptionalProd(
|
||||
occurrence: number,
|
||||
ruleGrammar: Rule,
|
||||
k: number,
|
||||
dynamicTokensEnabled: boolean,
|
||||
prodType: PROD_TYPE,
|
||||
lookaheadBuilder: (
|
||||
lookAheadSequence: LookaheadSequence,
|
||||
tokenMatcher: TokenMatcher,
|
||||
dynamicTokensEnabled: boolean
|
||||
) => () => boolean
|
||||
): () => boolean {
|
||||
const lookAheadPaths = getLookaheadPathsForOptionalProd(
|
||||
occurrence,
|
||||
ruleGrammar,
|
||||
prodType,
|
||||
k
|
||||
)
|
||||
|
||||
const tokenMatcher = areTokenCategoriesNotUsed(lookAheadPaths)
|
||||
? tokenStructuredMatcherNoCategories
|
||||
: tokenStructuredMatcher
|
||||
|
||||
return lookaheadBuilder(lookAheadPaths[0], tokenMatcher, dynamicTokensEnabled)
|
||||
}
|
||||
|
||||
export type Alternative = TokenType[][]
|
||||
|
||||
export function buildAlternativesLookAheadFunc(
|
||||
alts: LookaheadSequence[],
|
||||
hasPredicates: boolean,
|
||||
tokenMatcher: TokenMatcher,
|
||||
dynamicTokensEnabled: boolean
|
||||
): (orAlts: IOrAlt<any>[]) => number | undefined {
|
||||
const numOfAlts = alts.length
|
||||
const areAllOneTokenLookahead = every(alts, (currAlt) => {
|
||||
return every(currAlt, (currPath) => {
|
||||
return currPath.length === 1
|
||||
})
|
||||
})
|
||||
|
||||
// This version takes into account the predicates as well.
|
||||
if (hasPredicates) {
|
||||
/**
|
||||
* @returns {number} - The chosen alternative index
|
||||
*/
|
||||
return function (
|
||||
this: BaseParser,
|
||||
orAlts: IOrAlt<any>[]
|
||||
): number | undefined {
|
||||
// unfortunately the predicates must be extracted every single time
|
||||
// as they cannot be cached due to references to parameters(vars) which are no longer valid.
|
||||
// note that in the common case of no predicates, no cpu time will be wasted on this (see else block)
|
||||
const predicates: (Predicate | undefined)[] = map(
|
||||
orAlts,
|
||||
(currAlt) => currAlt.GATE
|
||||
)
|
||||
|
||||
for (let t = 0; t < numOfAlts; t++) {
|
||||
const currAlt = alts[t]
|
||||
const currNumOfPaths = currAlt.length
|
||||
|
||||
const currPredicate = predicates[t]
|
||||
if (currPredicate !== undefined && currPredicate.call(this) === false) {
|
||||
// if the predicate does not match there is no point in checking the paths
|
||||
continue
|
||||
}
|
||||
nextPath: for (let j = 0; j < currNumOfPaths; j++) {
|
||||
const currPath = currAlt[j]
|
||||
const currPathLength = currPath.length
|
||||
for (let i = 0; i < currPathLength; i++) {
|
||||
const nextToken = this.LA(i + 1)
|
||||
if (tokenMatcher(nextToken, currPath[i]) === false) {
|
||||
// mismatch in current path
|
||||
// try the next pth
|
||||
continue nextPath
|
||||
}
|
||||
}
|
||||
// found a full path that matches.
|
||||
// this will also work for an empty ALT as the loop will be skipped
|
||||
return t
|
||||
}
|
||||
// none of the paths for the current alternative matched
|
||||
// try the next alternative
|
||||
}
|
||||
// none of the alternatives could be matched
|
||||
return undefined
|
||||
}
|
||||
} else if (areAllOneTokenLookahead && !dynamicTokensEnabled) {
|
||||
// optimized (common) case of all the lookaheads paths requiring only
|
||||
// a single token lookahead. These Optimizations cannot work if dynamically defined Tokens are used.
|
||||
const singleTokenAlts = map(alts, (currAlt) => {
|
||||
return flatten(currAlt)
|
||||
})
|
||||
|
||||
const choiceToAlt = reduce(
|
||||
singleTokenAlts,
|
||||
(result, currAlt, idx) => {
|
||||
forEach(currAlt, (currTokType) => {
|
||||
if (!has(result, currTokType.tokenTypeIdx!)) {
|
||||
result[currTokType.tokenTypeIdx!] = idx
|
||||
}
|
||||
forEach(currTokType.categoryMatches!, (currExtendingType) => {
|
||||
if (!has(result, currExtendingType)) {
|
||||
result[currExtendingType] = idx
|
||||
}
|
||||
})
|
||||
})
|
||||
return result
|
||||
},
|
||||
{} as Record<number, number>
|
||||
)
|
||||
|
||||
/**
|
||||
* @returns {number} - The chosen alternative index
|
||||
*/
|
||||
return function (this: BaseParser): number {
|
||||
const nextToken = this.LA(1)
|
||||
return choiceToAlt[nextToken.tokenTypeIdx]
|
||||
}
|
||||
} else {
|
||||
// optimized lookahead without needing to check the predicates at all.
|
||||
// this causes code duplication which is intentional to improve performance.
|
||||
/**
|
||||
* @returns {number} - The chosen alternative index
|
||||
*/
|
||||
return function (this: BaseParser): number | undefined {
|
||||
for (let t = 0; t < numOfAlts; t++) {
|
||||
const currAlt = alts[t]
|
||||
const currNumOfPaths = currAlt.length
|
||||
nextPath: for (let j = 0; j < currNumOfPaths; j++) {
|
||||
const currPath = currAlt[j]
|
||||
const currPathLength = currPath.length
|
||||
for (let i = 0; i < currPathLength; i++) {
|
||||
const nextToken = this.LA(i + 1)
|
||||
if (tokenMatcher(nextToken, currPath[i]) === false) {
|
||||
// mismatch in current path
|
||||
// try the next pth
|
||||
continue nextPath
|
||||
}
|
||||
}
|
||||
// found a full path that matches.
|
||||
// this will also work for an empty ALT as the loop will be skipped
|
||||
return t
|
||||
}
|
||||
// none of the paths for the current alternative matched
|
||||
// try the next alternative
|
||||
}
|
||||
// none of the alternatives could be matched
|
||||
return undefined
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function buildSingleAlternativeLookaheadFunction(
|
||||
alt: LookaheadSequence,
|
||||
tokenMatcher: TokenMatcher,
|
||||
dynamicTokensEnabled: boolean
|
||||
): () => boolean {
|
||||
const areAllOneTokenLookahead = every(alt, (currPath) => {
|
||||
return currPath.length === 1
|
||||
})
|
||||
|
||||
const numOfPaths = alt.length
|
||||
|
||||
// optimized (common) case of all the lookaheads paths requiring only
|
||||
// a single token lookahead.
|
||||
if (areAllOneTokenLookahead && !dynamicTokensEnabled) {
|
||||
const singleTokensTypes = flatten(alt)
|
||||
|
||||
if (
|
||||
singleTokensTypes.length === 1 &&
|
||||
isEmpty((<any>singleTokensTypes[0]).categoryMatches)
|
||||
) {
|
||||
const expectedTokenType = singleTokensTypes[0]
|
||||
const expectedTokenUniqueKey = (<any>expectedTokenType).tokenTypeIdx
|
||||
|
||||
return function (this: BaseParser): boolean {
|
||||
return this.LA(1).tokenTypeIdx === expectedTokenUniqueKey
|
||||
}
|
||||
} else {
|
||||
const choiceToAlt = reduce(
|
||||
singleTokensTypes,
|
||||
(result, currTokType, idx) => {
|
||||
result[currTokType.tokenTypeIdx!] = true
|
||||
forEach(currTokType.categoryMatches!, (currExtendingType) => {
|
||||
result[currExtendingType] = true
|
||||
})
|
||||
return result
|
||||
},
|
||||
[] as boolean[]
|
||||
)
|
||||
|
||||
return function (this: BaseParser): boolean {
|
||||
const nextToken = this.LA(1)
|
||||
return choiceToAlt[nextToken.tokenTypeIdx] === true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return function (this: BaseParser): boolean {
|
||||
nextPath: for (let j = 0; j < numOfPaths; j++) {
|
||||
const currPath = alt[j]
|
||||
const currPathLength = currPath.length
|
||||
for (let i = 0; i < currPathLength; i++) {
|
||||
const nextToken = this.LA(i + 1)
|
||||
if (tokenMatcher(nextToken, currPath[i]) === false) {
|
||||
// mismatch in current path
|
||||
// try the next pth
|
||||
continue nextPath
|
||||
}
|
||||
}
|
||||
// found a full path that matches.
|
||||
return true
|
||||
}
|
||||
|
||||
// none of the paths matched
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class RestDefinitionFinderWalker extends RestWalker {
|
||||
private restDef: IProduction[]
|
||||
|
||||
constructor(
|
||||
private topProd: Rule,
|
||||
private targetOccurrence: number,
|
||||
private targetProdType: PROD_TYPE
|
||||
) {
|
||||
super()
|
||||
}
|
||||
|
||||
startWalking(): IProduction[] {
|
||||
this.walk(this.topProd)
|
||||
return this.restDef
|
||||
}
|
||||
|
||||
private checkIsTarget(
|
||||
node: IProductionWithOccurrence,
|
||||
expectedProdType: PROD_TYPE,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): boolean {
|
||||
if (
|
||||
node.idx === this.targetOccurrence &&
|
||||
this.targetProdType === expectedProdType
|
||||
) {
|
||||
this.restDef = currRest.concat(prevRest)
|
||||
return true
|
||||
}
|
||||
// performance optimization, do not iterate over the entire Grammar ast after we have found the target
|
||||
return false
|
||||
}
|
||||
|
||||
walkOption(
|
||||
optionProd: Option,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (!this.checkIsTarget(optionProd, PROD_TYPE.OPTION, currRest, prevRest)) {
|
||||
super.walkOption(optionProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
|
||||
walkAtLeastOne(
|
||||
atLeastOneProd: RepetitionMandatory,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (
|
||||
!this.checkIsTarget(
|
||||
atLeastOneProd,
|
||||
PROD_TYPE.REPETITION_MANDATORY,
|
||||
currRest,
|
||||
prevRest
|
||||
)
|
||||
) {
|
||||
super.walkOption(atLeastOneProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
|
||||
walkAtLeastOneSep(
|
||||
atLeastOneSepProd: RepetitionMandatoryWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (
|
||||
!this.checkIsTarget(
|
||||
atLeastOneSepProd,
|
||||
PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,
|
||||
currRest,
|
||||
prevRest
|
||||
)
|
||||
) {
|
||||
super.walkOption(atLeastOneSepProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
|
||||
walkMany(
|
||||
manyProd: Repetition,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (
|
||||
!this.checkIsTarget(manyProd, PROD_TYPE.REPETITION, currRest, prevRest)
|
||||
) {
|
||||
super.walkOption(manyProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
|
||||
walkManySep(
|
||||
manySepProd: RepetitionWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
if (
|
||||
!this.checkIsTarget(
|
||||
manySepProd,
|
||||
PROD_TYPE.REPETITION_WITH_SEPARATOR,
|
||||
currRest,
|
||||
prevRest
|
||||
)
|
||||
) {
|
||||
super.walkOption(manySepProd, currRest, prevRest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the definition of a target production in a top level level rule.
|
||||
*/
|
||||
class InsideDefinitionFinderVisitor extends GAstVisitor {
|
||||
public result: IProduction[] = []
|
||||
|
||||
constructor(
|
||||
private targetOccurrence: number,
|
||||
private targetProdType: PROD_TYPE,
|
||||
private targetRef?: any
|
||||
) {
|
||||
super()
|
||||
}
|
||||
|
||||
private checkIsTarget(
|
||||
node: { definition: IProduction[] } & IProductionWithOccurrence,
|
||||
expectedProdName: PROD_TYPE
|
||||
): void {
|
||||
if (
|
||||
node.idx === this.targetOccurrence &&
|
||||
this.targetProdType === expectedProdName &&
|
||||
(this.targetRef === undefined || node === this.targetRef)
|
||||
) {
|
||||
this.result = node.definition
|
||||
}
|
||||
}
|
||||
|
||||
public visitOption(node: Option): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.OPTION)
|
||||
}
|
||||
|
||||
public visitRepetition(node: Repetition): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.REPETITION)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatory(node: RepetitionMandatory): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatoryWithSeparator(
|
||||
node: RepetitionMandatoryWithSeparator
|
||||
): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR)
|
||||
}
|
||||
|
||||
public visitRepetitionWithSeparator(node: RepetitionWithSeparator): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.REPETITION_WITH_SEPARATOR)
|
||||
}
|
||||
|
||||
public visitAlternation(node: Alternation): void {
|
||||
this.checkIsTarget(node, PROD_TYPE.ALTERNATION)
|
||||
}
|
||||
}
|
||||
|
||||
function initializeArrayOfArrays(size: number): any[][] {
|
||||
const result = new Array(size)
|
||||
for (let i = 0; i < size; i++) {
|
||||
result[i] = []
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* A sort of hash function between a Path in the grammar and a string.
|
||||
* Note that this returns multiple "hashes" to support the scenario of token categories.
|
||||
* - A single path with categories may match multiple **actual** paths.
|
||||
*/
|
||||
function pathToHashKeys(path: TokenType[]): string[] {
|
||||
let keys = [""]
|
||||
for (let i = 0; i < path.length; i++) {
|
||||
const tokType = path[i]
|
||||
const longerKeys = []
|
||||
for (let j = 0; j < keys.length; j++) {
|
||||
const currShorterKey = keys[j]
|
||||
longerKeys.push(currShorterKey + "_" + tokType.tokenTypeIdx)
|
||||
for (let t = 0; t < tokType.categoryMatches!.length; t++) {
|
||||
const categoriesKeySuffix = "_" + tokType.categoryMatches![t]
|
||||
longerKeys.push(currShorterKey + categoriesKeySuffix)
|
||||
}
|
||||
}
|
||||
keys = longerKeys
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
/**
|
||||
* Imperative style due to being called from a hot spot
|
||||
*/
|
||||
function isUniquePrefixHash(
|
||||
altKnownPathsKeys: Record<string, boolean>[],
|
||||
searchPathKeys: string[],
|
||||
idx: number
|
||||
): boolean {
|
||||
for (
|
||||
let currAltIdx = 0;
|
||||
currAltIdx < altKnownPathsKeys.length;
|
||||
currAltIdx++
|
||||
) {
|
||||
// We only want to test vs the other alternatives
|
||||
if (currAltIdx === idx) {
|
||||
continue
|
||||
}
|
||||
const otherAltKnownPathsKeys = altKnownPathsKeys[currAltIdx]
|
||||
for (let searchIdx = 0; searchIdx < searchPathKeys.length; searchIdx++) {
|
||||
const searchKey = searchPathKeys[searchIdx]
|
||||
if (otherAltKnownPathsKeys[searchKey] === true) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
// None of the SearchPathKeys were found in any of the other alternatives
|
||||
return true
|
||||
}
|
||||
|
||||
export function lookAheadSequenceFromAlternatives(
|
||||
altsDefs: IProduction[],
|
||||
k: number
|
||||
): LookaheadSequence[] {
|
||||
const partialAlts = map(altsDefs, (currAlt) =>
|
||||
possiblePathsFrom([currAlt], 1)
|
||||
)
|
||||
const finalResult = initializeArrayOfArrays(partialAlts.length)
|
||||
const altsHashes = map(partialAlts, (currAltPaths) => {
|
||||
const dict: { [key: string]: boolean } = {}
|
||||
forEach(currAltPaths, (item) => {
|
||||
const keys = pathToHashKeys(item.partialPath)
|
||||
forEach(keys, (currKey) => {
|
||||
dict[currKey] = true
|
||||
})
|
||||
})
|
||||
return dict
|
||||
})
|
||||
let newData = partialAlts
|
||||
|
||||
// maxLookahead loop
|
||||
for (let pathLength = 1; pathLength <= k; pathLength++) {
|
||||
const currDataset = newData
|
||||
newData = initializeArrayOfArrays(currDataset.length)
|
||||
|
||||
// alternatives loop
|
||||
for (let altIdx = 0; altIdx < currDataset.length; altIdx++) {
|
||||
const currAltPathsAndSuffixes = currDataset[altIdx]
|
||||
// paths in current alternative loop
|
||||
for (
|
||||
let currPathIdx = 0;
|
||||
currPathIdx < currAltPathsAndSuffixes.length;
|
||||
currPathIdx++
|
||||
) {
|
||||
const currPathPrefix = currAltPathsAndSuffixes[currPathIdx].partialPath
|
||||
const suffixDef = currAltPathsAndSuffixes[currPathIdx].suffixDef
|
||||
const prefixKeys = pathToHashKeys(currPathPrefix)
|
||||
const isUnique = isUniquePrefixHash(altsHashes, prefixKeys, altIdx)
|
||||
// End of the line for this path.
|
||||
if (isUnique || isEmpty(suffixDef) || currPathPrefix.length === k) {
|
||||
const currAltResult = finalResult[altIdx]
|
||||
// TODO: Can we implement a containsPath using Maps/Dictionaries?
|
||||
if (containsPath(currAltResult, currPathPrefix) === false) {
|
||||
currAltResult.push(currPathPrefix)
|
||||
// Update all new keys for the current path.
|
||||
for (let j = 0; j < prefixKeys.length; j++) {
|
||||
const currKey = prefixKeys[j]
|
||||
altsHashes[altIdx][currKey] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Expand longer paths
|
||||
else {
|
||||
const newPartialPathsAndSuffixes = possiblePathsFrom(
|
||||
suffixDef,
|
||||
pathLength + 1,
|
||||
currPathPrefix
|
||||
)
|
||||
newData[altIdx] = newData[altIdx].concat(newPartialPathsAndSuffixes)
|
||||
|
||||
// Update keys for new known paths
|
||||
forEach(newPartialPathsAndSuffixes, (item) => {
|
||||
const prefixKeys = pathToHashKeys(item.partialPath)
|
||||
forEach(prefixKeys, (key) => {
|
||||
altsHashes[altIdx][key] = true
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return finalResult
|
||||
}
|
||||
|
||||
export function getLookaheadPathsForOr(
|
||||
occurrence: number,
|
||||
ruleGrammar: Rule,
|
||||
k: number,
|
||||
orProd?: Alternation
|
||||
): LookaheadSequence[] {
|
||||
const visitor = new InsideDefinitionFinderVisitor(
|
||||
occurrence,
|
||||
PROD_TYPE.ALTERNATION,
|
||||
orProd
|
||||
)
|
||||
ruleGrammar.accept(visitor)
|
||||
return lookAheadSequenceFromAlternatives(visitor.result, k)
|
||||
}
|
||||
|
||||
export function getLookaheadPathsForOptionalProd(
|
||||
occurrence: number,
|
||||
ruleGrammar: Rule,
|
||||
prodType: PROD_TYPE,
|
||||
k: number
|
||||
): LookaheadSequence[] {
|
||||
const insideDefVisitor = new InsideDefinitionFinderVisitor(
|
||||
occurrence,
|
||||
prodType
|
||||
)
|
||||
ruleGrammar.accept(insideDefVisitor)
|
||||
const insideDef = insideDefVisitor.result
|
||||
|
||||
const afterDefWalker = new RestDefinitionFinderWalker(
|
||||
ruleGrammar,
|
||||
occurrence,
|
||||
prodType
|
||||
)
|
||||
const afterDef = afterDefWalker.startWalking()
|
||||
|
||||
const insideFlat = new AlternativeGAST({ definition: insideDef })
|
||||
const afterFlat = new AlternativeGAST({ definition: afterDef })
|
||||
|
||||
return lookAheadSequenceFromAlternatives([insideFlat, afterFlat], k)
|
||||
}
|
||||
|
||||
export function containsPath(
|
||||
alternative: Alternative,
|
||||
searchPath: TokenType[]
|
||||
): boolean {
|
||||
compareOtherPath: for (let i = 0; i < alternative.length; i++) {
|
||||
const otherPath = alternative[i]
|
||||
if (otherPath.length !== searchPath.length) {
|
||||
continue
|
||||
}
|
||||
for (let j = 0; j < otherPath.length; j++) {
|
||||
const searchTok = searchPath[j]
|
||||
const otherTok = otherPath[j]
|
||||
|
||||
const matchingTokens =
|
||||
searchTok === otherTok ||
|
||||
otherTok.categoryMatchesMap![searchTok.tokenTypeIdx!] !== undefined
|
||||
if (matchingTokens === false) {
|
||||
continue compareOtherPath
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
export function isStrictPrefixOfPath(
|
||||
prefix: TokenType[],
|
||||
other: TokenType[]
|
||||
): boolean {
|
||||
return (
|
||||
prefix.length < other.length &&
|
||||
every(prefix, (tokType, idx) => {
|
||||
const otherTokType = other[idx]
|
||||
return (
|
||||
tokType === otherTokType ||
|
||||
otherTokType.categoryMatchesMap![tokType.tokenTypeIdx!]
|
||||
)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
export function areTokenCategoriesNotUsed(
|
||||
lookAheadPaths: LookaheadSequence[]
|
||||
): boolean {
|
||||
return every(lookAheadPaths, (singleAltPaths) =>
|
||||
every(singleAltPaths, (singlePath) =>
|
||||
every(singlePath, (token) => isEmpty(token.categoryMatches!))
|
||||
)
|
||||
)
|
||||
}
|
||||
59
_node_modules/chevrotain/src/parse/grammar/resolver.ts
generated
Normal file
59
_node_modules/chevrotain/src/parse/grammar/resolver.ts
generated
Normal file
@@ -0,0 +1,59 @@
|
||||
import {
|
||||
IParserUnresolvedRefDefinitionError,
|
||||
ParserDefinitionErrorType
|
||||
} from "../parser/parser"
|
||||
import forEach from "lodash/forEach"
|
||||
import values from "lodash/values"
|
||||
import { NonTerminal, Rule } from "@chevrotain/gast"
|
||||
import { GAstVisitor } from "@chevrotain/gast"
|
||||
import {
|
||||
IGrammarResolverErrorMessageProvider,
|
||||
IParserDefinitionError
|
||||
} from "./types"
|
||||
|
||||
export function resolveGrammar(
|
||||
topLevels: Record<string, Rule>,
|
||||
errMsgProvider: IGrammarResolverErrorMessageProvider
|
||||
): IParserDefinitionError[] {
|
||||
const refResolver = new GastRefResolverVisitor(topLevels, errMsgProvider)
|
||||
refResolver.resolveRefs()
|
||||
return refResolver.errors
|
||||
}
|
||||
|
||||
export class GastRefResolverVisitor extends GAstVisitor {
|
||||
public errors: IParserUnresolvedRefDefinitionError[] = []
|
||||
private currTopLevel: Rule
|
||||
|
||||
constructor(
|
||||
private nameToTopRule: Record<string, Rule>,
|
||||
private errMsgProvider: IGrammarResolverErrorMessageProvider
|
||||
) {
|
||||
super()
|
||||
}
|
||||
|
||||
public resolveRefs(): void {
|
||||
forEach(values(this.nameToTopRule), (prod) => {
|
||||
this.currTopLevel = prod
|
||||
prod.accept(this)
|
||||
})
|
||||
}
|
||||
|
||||
public visitNonTerminal(node: NonTerminal): void {
|
||||
const ref = this.nameToTopRule[node.nonTerminalName]
|
||||
|
||||
if (!ref) {
|
||||
const msg = this.errMsgProvider.buildRuleNotFoundError(
|
||||
this.currTopLevel,
|
||||
node
|
||||
)
|
||||
this.errors.push({
|
||||
message: msg,
|
||||
type: ParserDefinitionErrorType.UNRESOLVED_SUBRULE_REF,
|
||||
ruleName: this.currTopLevel.name,
|
||||
unresolvedRefName: node.nonTerminalName
|
||||
})
|
||||
} else {
|
||||
node.referencedRule = ref
|
||||
}
|
||||
}
|
||||
}
|
||||
164
_node_modules/chevrotain/src/parse/grammar/rest.ts
generated
Normal file
164
_node_modules/chevrotain/src/parse/grammar/rest.ts
generated
Normal file
@@ -0,0 +1,164 @@
|
||||
import drop from "lodash/drop"
|
||||
import forEach from "lodash/forEach"
|
||||
import {
|
||||
Alternation,
|
||||
Alternative,
|
||||
NonTerminal,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Terminal
|
||||
} from "@chevrotain/gast"
|
||||
import { IProduction } from "@chevrotain/types"
|
||||
|
||||
/**
|
||||
* A Grammar Walker that computes the "remaining" grammar "after" a productions in the grammar.
|
||||
*/
|
||||
export abstract class RestWalker {
|
||||
walk(prod: { definition: IProduction[] }, prevRest: any[] = []): void {
|
||||
forEach(prod.definition, (subProd: IProduction, index) => {
|
||||
const currRest = drop(prod.definition, index + 1)
|
||||
/* istanbul ignore else */
|
||||
if (subProd instanceof NonTerminal) {
|
||||
this.walkProdRef(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof Terminal) {
|
||||
this.walkTerminal(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof Alternative) {
|
||||
this.walkFlat(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof Option) {
|
||||
this.walkOption(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof RepetitionMandatory) {
|
||||
this.walkAtLeastOne(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof RepetitionMandatoryWithSeparator) {
|
||||
this.walkAtLeastOneSep(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof RepetitionWithSeparator) {
|
||||
this.walkManySep(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof Repetition) {
|
||||
this.walkMany(subProd, currRest, prevRest)
|
||||
} else if (subProd instanceof Alternation) {
|
||||
this.walkOr(subProd, currRest, prevRest)
|
||||
} else {
|
||||
throw Error("non exhaustive match")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
walkTerminal(
|
||||
terminal: Terminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {}
|
||||
|
||||
walkProdRef(
|
||||
refProd: NonTerminal,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {}
|
||||
|
||||
walkFlat(
|
||||
flatProd: Alternative,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABCDEF => after the D the rest is EF
|
||||
const fullOrRest = currRest.concat(prevRest)
|
||||
this.walk(flatProd, <any>fullOrRest)
|
||||
}
|
||||
|
||||
walkOption(
|
||||
optionProd: Option,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC(DE)?F => after the (DE)? the rest is F
|
||||
const fullOrRest = currRest.concat(prevRest)
|
||||
this.walk(optionProd, <any>fullOrRest)
|
||||
}
|
||||
|
||||
walkAtLeastOne(
|
||||
atLeastOneProd: RepetitionMandatory,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC(DE)+F => after the (DE)+ the rest is (DE)?F
|
||||
const fullAtLeastOneRest: IProduction[] = [
|
||||
new Option({ definition: atLeastOneProd.definition })
|
||||
].concat(<any>currRest, <any>prevRest)
|
||||
this.walk(atLeastOneProd, fullAtLeastOneRest)
|
||||
}
|
||||
|
||||
walkAtLeastOneSep(
|
||||
atLeastOneSepProd: RepetitionMandatoryWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC DE(,DE)* F => after the (,DE)+ the rest is (,DE)?F
|
||||
const fullAtLeastOneSepRest = restForRepetitionWithSeparator(
|
||||
atLeastOneSepProd,
|
||||
currRest,
|
||||
prevRest
|
||||
)
|
||||
this.walk(atLeastOneSepProd, fullAtLeastOneSepRest)
|
||||
}
|
||||
|
||||
walkMany(
|
||||
manyProd: Repetition,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC(DE)*F => after the (DE)* the rest is (DE)?F
|
||||
const fullManyRest: IProduction[] = [
|
||||
new Option({ definition: manyProd.definition })
|
||||
].concat(<any>currRest, <any>prevRest)
|
||||
this.walk(manyProd, fullManyRest)
|
||||
}
|
||||
|
||||
walkManySep(
|
||||
manySepProd: RepetitionWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC (DE(,DE)*)? F => after the (,DE)* the rest is (,DE)?F
|
||||
const fullManySepRest = restForRepetitionWithSeparator(
|
||||
manySepProd,
|
||||
currRest,
|
||||
prevRest
|
||||
)
|
||||
this.walk(manySepProd, fullManySepRest)
|
||||
}
|
||||
|
||||
walkOr(
|
||||
orProd: Alternation,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
): void {
|
||||
// ABC(D|E|F)G => when finding the (D|E|F) the rest is G
|
||||
const fullOrRest = currRest.concat(prevRest)
|
||||
// walk all different alternatives
|
||||
forEach(orProd.definition, (alt) => {
|
||||
// wrapping each alternative in a single definition wrapper
|
||||
// to avoid errors in computing the rest of that alternative in the invocation to computeInProdFollows
|
||||
// (otherwise for OR([alt1,alt2]) alt2 will be considered in 'rest' of alt1
|
||||
const prodWrapper = new Alternative({ definition: [alt] })
|
||||
this.walk(prodWrapper, <any>fullOrRest)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
function restForRepetitionWithSeparator(
|
||||
repSepProd: RepetitionWithSeparator,
|
||||
currRest: IProduction[],
|
||||
prevRest: IProduction[]
|
||||
) {
|
||||
const repSepRest = [
|
||||
new Option({
|
||||
definition: [
|
||||
new Terminal({ terminalType: repSepProd.separator }) as IProduction
|
||||
].concat(repSepProd.definition)
|
||||
}) as IProduction
|
||||
]
|
||||
const fullRepSepRest: IProduction[] = repSepRest.concat(currRest, prevRest)
|
||||
return fullRepSepRest
|
||||
}
|
||||
94
_node_modules/chevrotain/src/parse/grammar/types.ts
generated
Normal file
94
_node_modules/chevrotain/src/parse/grammar/types.ts
generated
Normal file
@@ -0,0 +1,94 @@
|
||||
import {
|
||||
Alternation,
|
||||
IProductionWithOccurrence,
|
||||
NonTerminal,
|
||||
Rule,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
|
||||
export interface IParserDefinitionError {
|
||||
message: string
|
||||
type: ParserDefinitionErrorType
|
||||
ruleName?: string
|
||||
}
|
||||
|
||||
export declare enum ParserDefinitionErrorType {
|
||||
INVALID_RULE_NAME = 0,
|
||||
DUPLICATE_RULE_NAME = 1,
|
||||
INVALID_RULE_OVERRIDE = 2,
|
||||
DUPLICATE_PRODUCTIONS = 3,
|
||||
UNRESOLVED_SUBRULE_REF = 4,
|
||||
LEFT_RECURSION = 5,
|
||||
NONE_LAST_EMPTY_ALT = 6,
|
||||
AMBIGUOUS_ALTS = 7,
|
||||
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
|
||||
INVALID_TOKEN_NAME = 9,
|
||||
NO_NON_EMPTY_LOOKAHEAD = 10,
|
||||
AMBIGUOUS_PREFIX_ALTS = 11,
|
||||
TOO_MANY_ALTS = 12,
|
||||
CUSTOM_LOOKAHEAD_VALIDATION = 13
|
||||
}
|
||||
|
||||
export interface IGrammarValidatorErrorMessageProvider {
|
||||
buildDuplicateFoundError(
|
||||
topLevelRule: Rule,
|
||||
duplicateProds: IProductionWithOccurrence[]
|
||||
): string
|
||||
buildNamespaceConflictError(topLevelRule: Rule): string
|
||||
buildAlternationPrefixAmbiguityError(options: {
|
||||
topLevelRule: Rule
|
||||
prefixPath: TokenType[]
|
||||
ambiguityIndices: number[]
|
||||
alternation: Alternation
|
||||
}): string
|
||||
buildAlternationAmbiguityError(options: {
|
||||
topLevelRule: Rule
|
||||
prefixPath: TokenType[]
|
||||
ambiguityIndices: number[]
|
||||
alternation: Alternation
|
||||
}): string
|
||||
buildEmptyRepetitionError(options: {
|
||||
topLevelRule: Rule
|
||||
repetition: IProductionWithOccurrence
|
||||
}): string
|
||||
/**
|
||||
* @deprecated - There are no longer constraints on Token names
|
||||
* This method will be removed from the interface in future versions.
|
||||
* Providing it will currently have no impact on the runtime.
|
||||
*/
|
||||
buildTokenNameError(options: {
|
||||
tokenType: TokenType
|
||||
expectedPattern: RegExp
|
||||
}): any
|
||||
|
||||
buildEmptyAlternationError(options: {
|
||||
topLevelRule: Rule
|
||||
alternation: Alternation
|
||||
emptyChoiceIdx: number
|
||||
}): any
|
||||
buildTooManyAlternativesError(options: {
|
||||
topLevelRule: Rule
|
||||
alternation: Alternation
|
||||
}): string
|
||||
buildLeftRecursionError(options: {
|
||||
topLevelRule: Rule
|
||||
leftRecursionPath: Rule[]
|
||||
}): string
|
||||
/**
|
||||
* @deprecated - There are no longer constraints on Rule names
|
||||
* This method will be removed from the interface in future versions.
|
||||
* Providing it will currently have no impact on the runtime.
|
||||
*/
|
||||
buildInvalidRuleNameError(options: {
|
||||
topLevelRule: Rule
|
||||
expectedPattern: RegExp
|
||||
}): string
|
||||
buildDuplicateRuleNameError(options: {
|
||||
topLevelRule: Rule | string
|
||||
grammarName: string
|
||||
}): string
|
||||
}
|
||||
|
||||
export interface IGrammarResolverErrorMessageProvider {
|
||||
buildRuleNotFoundError(topLevelRule: Rule, undefinedRule: NonTerminal): string
|
||||
}
|
||||
323
_node_modules/chevrotain/src/parse/parser/parser.ts
generated
Normal file
323
_node_modules/chevrotain/src/parse/parser/parser.ts
generated
Normal file
@@ -0,0 +1,323 @@
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import values from "lodash/values"
|
||||
import has from "lodash/has"
|
||||
import clone from "lodash/clone"
|
||||
import { toFastProperties } from "@chevrotain/utils"
|
||||
import { computeAllProdsFollows } from "../grammar/follow"
|
||||
import { createTokenInstance, EOF } from "../../scan/tokens_public"
|
||||
import {
|
||||
defaultGrammarValidatorErrorProvider,
|
||||
defaultParserErrorProvider
|
||||
} from "../errors_public"
|
||||
import {
|
||||
resolveGrammar,
|
||||
validateGrammar
|
||||
} from "../grammar/gast/gast_resolver_public"
|
||||
import {
|
||||
CstNode,
|
||||
IParserConfig,
|
||||
IRecognitionException,
|
||||
IRuleConfig,
|
||||
IToken,
|
||||
TokenType,
|
||||
TokenVocabulary
|
||||
} from "@chevrotain/types"
|
||||
import { Recoverable } from "./traits/recoverable"
|
||||
import { LooksAhead } from "./traits/looksahead"
|
||||
import { TreeBuilder } from "./traits/tree_builder"
|
||||
import { LexerAdapter } from "./traits/lexer_adapter"
|
||||
import { RecognizerApi } from "./traits/recognizer_api"
|
||||
import { RecognizerEngine } from "./traits/recognizer_engine"
|
||||
|
||||
import { ErrorHandler } from "./traits/error_handler"
|
||||
import { MixedInParser } from "./traits/parser_traits"
|
||||
import { ContentAssist } from "./traits/context_assist"
|
||||
import { GastRecorder } from "./traits/gast_recorder"
|
||||
import { PerformanceTracer } from "./traits/perf_tracer"
|
||||
import { applyMixins } from "./utils/apply_mixins"
|
||||
import { IParserDefinitionError } from "../grammar/types"
|
||||
import { Rule } from "@chevrotain/gast"
|
||||
import { IParserConfigInternal, ParserMethodInternal } from "./types"
|
||||
import { validateLookahead } from "../grammar/checks"
|
||||
|
||||
export const END_OF_FILE = createTokenInstance(
|
||||
EOF,
|
||||
"",
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN
|
||||
)
|
||||
Object.freeze(END_OF_FILE)
|
||||
|
||||
export type TokenMatcher = (token: IToken, tokType: TokenType) => boolean
|
||||
|
||||
export const DEFAULT_PARSER_CONFIG: Required<
|
||||
Omit<IParserConfigInternal, "lookaheadStrategy">
|
||||
> = Object.freeze({
|
||||
recoveryEnabled: false,
|
||||
maxLookahead: 3,
|
||||
dynamicTokensEnabled: false,
|
||||
outputCst: true,
|
||||
errorMessageProvider: defaultParserErrorProvider,
|
||||
nodeLocationTracking: "none",
|
||||
traceInitPerf: false,
|
||||
skipValidations: false
|
||||
})
|
||||
|
||||
export const DEFAULT_RULE_CONFIG: Required<IRuleConfig<any>> = Object.freeze({
|
||||
recoveryValueFunc: () => undefined,
|
||||
resyncEnabled: true
|
||||
})
|
||||
|
||||
export enum ParserDefinitionErrorType {
|
||||
INVALID_RULE_NAME = 0,
|
||||
DUPLICATE_RULE_NAME = 1,
|
||||
INVALID_RULE_OVERRIDE = 2,
|
||||
DUPLICATE_PRODUCTIONS = 3,
|
||||
UNRESOLVED_SUBRULE_REF = 4,
|
||||
LEFT_RECURSION = 5,
|
||||
NONE_LAST_EMPTY_ALT = 6,
|
||||
AMBIGUOUS_ALTS = 7,
|
||||
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
|
||||
INVALID_TOKEN_NAME = 9,
|
||||
NO_NON_EMPTY_LOOKAHEAD = 10,
|
||||
AMBIGUOUS_PREFIX_ALTS = 11,
|
||||
TOO_MANY_ALTS = 12,
|
||||
CUSTOM_LOOKAHEAD_VALIDATION = 13
|
||||
}
|
||||
|
||||
export interface IParserDuplicatesDefinitionError
|
||||
extends IParserDefinitionError {
|
||||
dslName: string
|
||||
occurrence: number
|
||||
parameter?: string
|
||||
}
|
||||
|
||||
export interface IParserEmptyAlternativeDefinitionError
|
||||
extends IParserDefinitionError {
|
||||
occurrence: number
|
||||
alternative: number
|
||||
}
|
||||
|
||||
export interface IParserAmbiguousAlternativesDefinitionError
|
||||
extends IParserDefinitionError {
|
||||
occurrence: number | string
|
||||
alternatives: number[]
|
||||
}
|
||||
|
||||
export interface IParserUnresolvedRefDefinitionError
|
||||
extends IParserDefinitionError {
|
||||
unresolvedRefName: string
|
||||
}
|
||||
|
||||
export interface IParserState {
|
||||
errors: IRecognitionException[]
|
||||
lexerState: any
|
||||
RULE_STACK: number[]
|
||||
CST_STACK: CstNode[]
|
||||
}
|
||||
|
||||
export type Predicate = () => boolean
|
||||
|
||||
export function EMPTY_ALT(): () => undefined
|
||||
export function EMPTY_ALT<T>(value: T): () => T
|
||||
export function EMPTY_ALT(value: any = undefined) {
|
||||
return function () {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
export class Parser {
|
||||
// Set this flag to true if you don't want the Parser to throw error when problems in it's definition are detected.
|
||||
// (normally during the parser's constructor).
|
||||
// This is a design time flag, it will not affect the runtime error handling of the parser, just design time errors,
|
||||
// for example: duplicate rule names, referencing an unresolved subrule, ect...
|
||||
// This flag should not be enabled during normal usage, it is used in special situations, for example when
|
||||
// needing to display the parser definition errors in some GUI(online playground).
|
||||
static DEFER_DEFINITION_ERRORS_HANDLING: boolean = false
|
||||
|
||||
/**
|
||||
* @deprecated use the **instance** method with the same name instead
|
||||
*/
|
||||
static performSelfAnalysis(parserInstance: Parser): void {
|
||||
throw Error(
|
||||
"The **static** `performSelfAnalysis` method has been deprecated." +
|
||||
"\t\nUse the **instance** method with the same name instead."
|
||||
)
|
||||
}
|
||||
|
||||
public performSelfAnalysis(this: MixedInParser): void {
|
||||
this.TRACE_INIT("performSelfAnalysis", () => {
|
||||
let defErrorsMsgs
|
||||
|
||||
this.selfAnalysisDone = true
|
||||
const className = this.className
|
||||
|
||||
this.TRACE_INIT("toFastProps", () => {
|
||||
// Without this voodoo magic the parser would be x3-x4 slower
|
||||
// It seems it is better to invoke `toFastProperties` **before**
|
||||
// Any manipulations of the `this` object done during the recording phase.
|
||||
toFastProperties(this)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Grammar Recording", () => {
|
||||
try {
|
||||
this.enableRecording()
|
||||
// Building the GAST
|
||||
forEach(this.definedRulesNames, (currRuleName) => {
|
||||
const wrappedRule = (this as any)[
|
||||
currRuleName
|
||||
] as ParserMethodInternal<unknown[], unknown>
|
||||
const originalGrammarAction = wrappedRule["originalGrammarAction"]
|
||||
let recordedRuleGast!: Rule
|
||||
this.TRACE_INIT(`${currRuleName} Rule`, () => {
|
||||
recordedRuleGast = this.topLevelRuleRecord(
|
||||
currRuleName,
|
||||
originalGrammarAction
|
||||
)
|
||||
})
|
||||
this.gastProductionsCache[currRuleName] = recordedRuleGast
|
||||
})
|
||||
} finally {
|
||||
this.disableRecording()
|
||||
}
|
||||
})
|
||||
|
||||
let resolverErrors: IParserDefinitionError[] = []
|
||||
this.TRACE_INIT("Grammar Resolving", () => {
|
||||
resolverErrors = resolveGrammar({
|
||||
rules: values(this.gastProductionsCache)
|
||||
})
|
||||
this.definitionErrors = this.definitionErrors.concat(resolverErrors)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Grammar Validations", () => {
|
||||
// only perform additional grammar validations IFF no resolving errors have occurred.
|
||||
// as unresolved grammar may lead to unhandled runtime exceptions in the follow up validations.
|
||||
if (isEmpty(resolverErrors) && this.skipValidations === false) {
|
||||
const validationErrors = validateGrammar({
|
||||
rules: values(this.gastProductionsCache),
|
||||
tokenTypes: values(this.tokensMap),
|
||||
errMsgProvider: defaultGrammarValidatorErrorProvider,
|
||||
grammarName: className
|
||||
})
|
||||
const lookaheadValidationErrors = validateLookahead({
|
||||
lookaheadStrategy: this.lookaheadStrategy,
|
||||
rules: values(this.gastProductionsCache),
|
||||
tokenTypes: values(this.tokensMap),
|
||||
grammarName: className
|
||||
})
|
||||
this.definitionErrors = this.definitionErrors.concat(
|
||||
validationErrors,
|
||||
lookaheadValidationErrors
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
// this analysis may fail if the grammar is not perfectly valid
|
||||
if (isEmpty(this.definitionErrors)) {
|
||||
// The results of these computations are not needed unless error recovery is enabled.
|
||||
if (this.recoveryEnabled) {
|
||||
this.TRACE_INIT("computeAllProdsFollows", () => {
|
||||
const allFollows = computeAllProdsFollows(
|
||||
values(this.gastProductionsCache)
|
||||
)
|
||||
this.resyncFollows = allFollows
|
||||
})
|
||||
}
|
||||
|
||||
this.TRACE_INIT("ComputeLookaheadFunctions", () => {
|
||||
this.lookaheadStrategy.initialize?.({
|
||||
rules: values(this.gastProductionsCache)
|
||||
})
|
||||
this.preComputeLookaheadFunctions(values(this.gastProductionsCache))
|
||||
})
|
||||
}
|
||||
|
||||
if (
|
||||
!Parser.DEFER_DEFINITION_ERRORS_HANDLING &&
|
||||
!isEmpty(this.definitionErrors)
|
||||
) {
|
||||
defErrorsMsgs = map(
|
||||
this.definitionErrors,
|
||||
(defError) => defError.message
|
||||
)
|
||||
throw new Error(
|
||||
`Parser Definition Errors detected:\n ${defErrorsMsgs.join(
|
||||
"\n-------------------------------\n"
|
||||
)}`
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
definitionErrors: IParserDefinitionError[] = []
|
||||
selfAnalysisDone = false
|
||||
protected skipValidations: boolean
|
||||
|
||||
constructor(tokenVocabulary: TokenVocabulary, config: IParserConfig) {
|
||||
const that: MixedInParser = this as any
|
||||
that.initErrorHandler(config)
|
||||
that.initLexerAdapter()
|
||||
that.initLooksAhead(config)
|
||||
that.initRecognizerEngine(tokenVocabulary, config)
|
||||
that.initRecoverable(config)
|
||||
that.initTreeBuilder(config)
|
||||
that.initContentAssist()
|
||||
that.initGastRecorder(config)
|
||||
that.initPerformanceTracer(config)
|
||||
|
||||
if (has(config, "ignoredIssues")) {
|
||||
throw new Error(
|
||||
"The <ignoredIssues> IParserConfig property has been deprecated.\n\t" +
|
||||
"Please use the <IGNORE_AMBIGUITIES> flag on the relevant DSL method instead.\n\t" +
|
||||
"See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#IGNORING_AMBIGUITIES\n\t" +
|
||||
"For further details."
|
||||
)
|
||||
}
|
||||
|
||||
this.skipValidations = has(config, "skipValidations")
|
||||
? (config.skipValidations as boolean) // casting assumes the end user passing the correct type
|
||||
: DEFAULT_PARSER_CONFIG.skipValidations
|
||||
}
|
||||
}
|
||||
|
||||
applyMixins(Parser, [
|
||||
Recoverable,
|
||||
LooksAhead,
|
||||
TreeBuilder,
|
||||
LexerAdapter,
|
||||
RecognizerEngine,
|
||||
RecognizerApi,
|
||||
ErrorHandler,
|
||||
ContentAssist,
|
||||
GastRecorder,
|
||||
PerformanceTracer
|
||||
])
|
||||
|
||||
export class CstParser extends Parser {
|
||||
constructor(
|
||||
tokenVocabulary: TokenVocabulary,
|
||||
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG
|
||||
) {
|
||||
const configClone = clone(config)
|
||||
configClone.outputCst = true
|
||||
super(tokenVocabulary, configClone)
|
||||
}
|
||||
}
|
||||
|
||||
export class EmbeddedActionsParser extends Parser {
|
||||
constructor(
|
||||
tokenVocabulary: TokenVocabulary,
|
||||
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG
|
||||
) {
|
||||
const configClone = clone(config)
|
||||
configClone.outputCst = false
|
||||
super(tokenVocabulary, configClone)
|
||||
}
|
||||
}
|
||||
52
_node_modules/chevrotain/src/parse/parser/traits/context_assist.ts
generated
Normal file
52
_node_modules/chevrotain/src/parse/parser/traits/context_assist.ts
generated
Normal file
@@ -0,0 +1,52 @@
|
||||
import {
|
||||
ISyntacticContentAssistPath,
|
||||
IToken,
|
||||
ITokenGrammarPath,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import {
|
||||
NextAfterTokenWalker,
|
||||
nextPossibleTokensAfter
|
||||
} from "../../grammar/interpreter"
|
||||
import first from "lodash/first"
|
||||
import isUndefined from "lodash/isUndefined"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
|
||||
export class ContentAssist {
|
||||
initContentAssist() {}
|
||||
|
||||
public computeContentAssist(
|
||||
this: MixedInParser,
|
||||
startRuleName: string,
|
||||
precedingInput: IToken[]
|
||||
): ISyntacticContentAssistPath[] {
|
||||
const startRuleGast = this.gastProductionsCache[startRuleName]
|
||||
|
||||
if (isUndefined(startRuleGast)) {
|
||||
throw Error(`Rule ->${startRuleName}<- does not exist in this grammar.`)
|
||||
}
|
||||
|
||||
return nextPossibleTokensAfter(
|
||||
[startRuleGast],
|
||||
precedingInput,
|
||||
this.tokenMatcher,
|
||||
this.maxLookahead
|
||||
)
|
||||
}
|
||||
|
||||
// TODO: should this be a member method or a utility? it does not have any state or usage of 'this'...
|
||||
// TODO: should this be more explicitly part of the public API?
|
||||
public getNextPossibleTokenTypes(
|
||||
this: MixedInParser,
|
||||
grammarPath: ITokenGrammarPath
|
||||
): TokenType[] {
|
||||
const topRuleName = first(grammarPath.ruleStack)!
|
||||
const gastProductions = this.getGAstProductions()
|
||||
const topProduction = gastProductions[topRuleName]
|
||||
const nextPossibleTokenTypes = new NextAfterTokenWalker(
|
||||
topProduction,
|
||||
grammarPath
|
||||
).startWalking()
|
||||
return nextPossibleTokenTypes
|
||||
}
|
||||
}
|
||||
123
_node_modules/chevrotain/src/parse/parser/traits/error_handler.ts
generated
Normal file
123
_node_modules/chevrotain/src/parse/parser/traits/error_handler.ts
generated
Normal file
@@ -0,0 +1,123 @@
|
||||
import {
|
||||
IParserConfig,
|
||||
IParserErrorMessageProvider,
|
||||
IRecognitionException
|
||||
} from "@chevrotain/types"
|
||||
import {
|
||||
EarlyExitException,
|
||||
isRecognitionException,
|
||||
NoViableAltException
|
||||
} from "../../exceptions_public"
|
||||
import has from "lodash/has"
|
||||
import clone from "lodash/clone"
|
||||
import {
|
||||
getLookaheadPathsForOptionalProd,
|
||||
getLookaheadPathsForOr,
|
||||
PROD_TYPE
|
||||
} from "../../grammar/lookahead"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser"
|
||||
|
||||
/**
|
||||
* Trait responsible for runtime parsing errors.
|
||||
*/
|
||||
export class ErrorHandler {
|
||||
_errors: IRecognitionException[]
|
||||
errorMessageProvider: IParserErrorMessageProvider
|
||||
|
||||
initErrorHandler(config: IParserConfig) {
|
||||
this._errors = []
|
||||
this.errorMessageProvider = has(config, "errorMessageProvider")
|
||||
? (config.errorMessageProvider as IParserErrorMessageProvider) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_PARSER_CONFIG.errorMessageProvider
|
||||
}
|
||||
|
||||
SAVE_ERROR(
|
||||
this: MixedInParser,
|
||||
error: IRecognitionException
|
||||
): IRecognitionException {
|
||||
if (isRecognitionException(error)) {
|
||||
error.context = {
|
||||
ruleStack: this.getHumanReadableRuleStack(),
|
||||
ruleOccurrenceStack: clone(this.RULE_OCCURRENCE_STACK)
|
||||
}
|
||||
this._errors.push(error)
|
||||
return error
|
||||
} else {
|
||||
throw Error("Trying to save an Error which is not a RecognitionException")
|
||||
}
|
||||
}
|
||||
|
||||
get errors(): IRecognitionException[] {
|
||||
return clone(this._errors)
|
||||
}
|
||||
|
||||
set errors(newErrors: IRecognitionException[]) {
|
||||
this._errors = newErrors
|
||||
}
|
||||
|
||||
// TODO: consider caching the error message computed information
|
||||
raiseEarlyExitException(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
prodType: PROD_TYPE,
|
||||
userDefinedErrMsg: string | undefined
|
||||
): never {
|
||||
const ruleName = this.getCurrRuleFullName()
|
||||
const ruleGrammar = this.getGAstProductions()[ruleName]
|
||||
const lookAheadPathsPerAlternative = getLookaheadPathsForOptionalProd(
|
||||
occurrence,
|
||||
ruleGrammar,
|
||||
prodType,
|
||||
this.maxLookahead
|
||||
)
|
||||
const insideProdPaths = lookAheadPathsPerAlternative[0]
|
||||
const actualTokens = []
|
||||
for (let i = 1; i <= this.maxLookahead; i++) {
|
||||
actualTokens.push(this.LA(i))
|
||||
}
|
||||
const msg = this.errorMessageProvider.buildEarlyExitMessage({
|
||||
expectedIterationPaths: insideProdPaths,
|
||||
actual: actualTokens,
|
||||
previous: this.LA(0),
|
||||
customUserDescription: userDefinedErrMsg,
|
||||
ruleName: ruleName
|
||||
})
|
||||
|
||||
throw this.SAVE_ERROR(new EarlyExitException(msg, this.LA(1), this.LA(0)))
|
||||
}
|
||||
|
||||
// TODO: consider caching the error message computed information
|
||||
raiseNoAltException(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
errMsgTypes: string | undefined
|
||||
): never {
|
||||
const ruleName = this.getCurrRuleFullName()
|
||||
const ruleGrammar = this.getGAstProductions()[ruleName]
|
||||
// TODO: getLookaheadPathsForOr can be slow for large enough maxLookahead and certain grammars, consider caching ?
|
||||
const lookAheadPathsPerAlternative = getLookaheadPathsForOr(
|
||||
occurrence,
|
||||
ruleGrammar,
|
||||
this.maxLookahead
|
||||
)
|
||||
|
||||
const actualTokens = []
|
||||
for (let i = 1; i <= this.maxLookahead; i++) {
|
||||
actualTokens.push(this.LA(i))
|
||||
}
|
||||
const previousToken = this.LA(0)
|
||||
|
||||
const errMsg = this.errorMessageProvider.buildNoViableAltMessage({
|
||||
expectedPathsPerAlt: lookAheadPathsPerAlternative,
|
||||
actual: actualTokens,
|
||||
previous: previousToken,
|
||||
customUserDescription: errMsgTypes,
|
||||
ruleName: this.getCurrRuleFullName()
|
||||
})
|
||||
|
||||
throw this.SAVE_ERROR(
|
||||
new NoViableAltException(errMsg, this.LA(1), previousToken)
|
||||
)
|
||||
}
|
||||
}
|
||||
446
_node_modules/chevrotain/src/parse/parser/traits/gast_recorder.ts
generated
Normal file
446
_node_modules/chevrotain/src/parse/parser/traits/gast_recorder.ts
generated
Normal file
@@ -0,0 +1,446 @@
|
||||
import {
|
||||
AtLeastOneSepMethodOpts,
|
||||
ConsumeMethodOpts,
|
||||
CstNode,
|
||||
DSLMethodOpts,
|
||||
DSLMethodOptsWithErr,
|
||||
GrammarAction,
|
||||
IOrAlt,
|
||||
IParserConfig,
|
||||
IProduction,
|
||||
IToken,
|
||||
ManySepMethodOpts,
|
||||
OrMethodOpts,
|
||||
SubruleMethodOpts,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import peek from "lodash/last"
|
||||
import isArray from "lodash/isArray"
|
||||
import some from "lodash/some"
|
||||
import forEach from "lodash/forEach"
|
||||
import isFunction from "lodash/isFunction"
|
||||
import has from "lodash/has"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import {
|
||||
Alternation,
|
||||
Alternative,
|
||||
NonTerminal,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Rule,
|
||||
Terminal
|
||||
} from "@chevrotain/gast"
|
||||
import { Lexer } from "../../../scan/lexer_public"
|
||||
import { augmentTokenTypes, hasShortKeyProperty } from "../../../scan/tokens"
|
||||
import { createToken, createTokenInstance } from "../../../scan/tokens_public"
|
||||
import { END_OF_FILE } from "../parser"
|
||||
import { BITS_FOR_OCCURRENCE_IDX } from "../../grammar/keys"
|
||||
import { ParserMethodInternal } from "../types"
|
||||
|
||||
type ProdWithDef = IProduction & { definition?: IProduction[] }
|
||||
const RECORDING_NULL_OBJECT = {
|
||||
description: "This Object indicates the Parser is during Recording Phase"
|
||||
}
|
||||
Object.freeze(RECORDING_NULL_OBJECT)
|
||||
|
||||
const HANDLE_SEPARATOR = true
|
||||
const MAX_METHOD_IDX = Math.pow(2, BITS_FOR_OCCURRENCE_IDX) - 1
|
||||
|
||||
const RFT = createToken({ name: "RECORDING_PHASE_TOKEN", pattern: Lexer.NA })
|
||||
augmentTokenTypes([RFT])
|
||||
const RECORDING_PHASE_TOKEN = createTokenInstance(
|
||||
RFT,
|
||||
"This IToken indicates the Parser is in Recording Phase\n\t" +
|
||||
"" +
|
||||
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
|
||||
// Using "-1" instead of NaN (as in EOF) because an actual number is less likely to
|
||||
// cause errors if the output of LA or CONSUME would be (incorrectly) used during the recording phase.
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
-1,
|
||||
-1
|
||||
)
|
||||
Object.freeze(RECORDING_PHASE_TOKEN)
|
||||
|
||||
const RECORDING_PHASE_CSTNODE: CstNode = {
|
||||
name:
|
||||
"This CSTNode indicates the Parser is in Recording Phase\n\t" +
|
||||
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
|
||||
children: {}
|
||||
}
|
||||
|
||||
/**
|
||||
* This trait handles the creation of the GAST structure for Chevrotain Grammars
|
||||
*/
|
||||
export class GastRecorder {
|
||||
recordingProdStack: ProdWithDef[]
|
||||
RECORDING_PHASE: boolean
|
||||
|
||||
initGastRecorder(this: MixedInParser, config: IParserConfig): void {
|
||||
this.recordingProdStack = []
|
||||
this.RECORDING_PHASE = false
|
||||
}
|
||||
|
||||
enableRecording(this: MixedInParser): void {
|
||||
this.RECORDING_PHASE = true
|
||||
|
||||
this.TRACE_INIT("Enable Recording", () => {
|
||||
/**
|
||||
* Warning Dark Voodoo Magic upcoming!
|
||||
* We are "replacing" the public parsing DSL methods API
|
||||
* With **new** alternative implementations on the Parser **instance**
|
||||
*
|
||||
* So far this is the only way I've found to avoid performance regressions during parsing time.
|
||||
* - Approx 30% performance regression was measured on Chrome 75 Canary when attempting to replace the "internal"
|
||||
* implementations directly instead.
|
||||
*/
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const idx = i > 0 ? i : ""
|
||||
this[`CONSUME${idx}` as "CONSUME"] = function (arg1, arg2) {
|
||||
return this.consumeInternalRecord(arg1, i, arg2)
|
||||
}
|
||||
this[`SUBRULE${idx}` as "SUBRULE"] = function (arg1, arg2) {
|
||||
return this.subruleInternalRecord(arg1, i, arg2) as any
|
||||
}
|
||||
this[`OPTION${idx}` as "OPTION"] = function (arg1) {
|
||||
return this.optionInternalRecord(arg1, i)
|
||||
}
|
||||
this[`OR${idx}` as "OR"] = function (arg1) {
|
||||
return this.orInternalRecord(arg1, i)
|
||||
}
|
||||
this[`MANY${idx}` as "MANY"] = function (arg1) {
|
||||
this.manyInternalRecord(i, arg1)
|
||||
}
|
||||
this[`MANY_SEP${idx}` as "MANY_SEP"] = function (arg1) {
|
||||
this.manySepFirstInternalRecord(i, arg1)
|
||||
}
|
||||
this[`AT_LEAST_ONE${idx}` as "AT_LEAST_ONE"] = function (arg1) {
|
||||
this.atLeastOneInternalRecord(i, arg1)
|
||||
}
|
||||
this[`AT_LEAST_ONE_SEP${idx}` as "AT_LEAST_ONE_SEP"] = function (arg1) {
|
||||
this.atLeastOneSepFirstInternalRecord(i, arg1)
|
||||
}
|
||||
}
|
||||
|
||||
// DSL methods with the idx(suffix) as an argument
|
||||
this[`consume`] = function (idx, arg1, arg2) {
|
||||
return this.consumeInternalRecord(arg1, idx, arg2)
|
||||
}
|
||||
this[`subrule`] = function (idx, arg1, arg2) {
|
||||
return this.subruleInternalRecord(arg1, idx, arg2) as any
|
||||
}
|
||||
this[`option`] = function (idx, arg1) {
|
||||
return this.optionInternalRecord(arg1, idx)
|
||||
}
|
||||
this[`or`] = function (idx, arg1) {
|
||||
return this.orInternalRecord(arg1, idx)
|
||||
}
|
||||
this[`many`] = function (idx, arg1) {
|
||||
this.manyInternalRecord(idx, arg1)
|
||||
}
|
||||
this[`atLeastOne`] = function (idx, arg1) {
|
||||
this.atLeastOneInternalRecord(idx, arg1)
|
||||
}
|
||||
|
||||
this.ACTION = this.ACTION_RECORD
|
||||
this.BACKTRACK = this.BACKTRACK_RECORD
|
||||
this.LA = this.LA_RECORD
|
||||
})
|
||||
}
|
||||
|
||||
disableRecording(this: MixedInParser) {
|
||||
this.RECORDING_PHASE = false
|
||||
// By deleting these **instance** properties, any future invocation
|
||||
// will be deferred to the original methods on the **prototype** object
|
||||
// This seems to get rid of any incorrect optimizations that V8 may
|
||||
// do during the recording phase.
|
||||
this.TRACE_INIT("Deleting Recording methods", () => {
|
||||
const that: any = this
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const idx = i > 0 ? i : ""
|
||||
delete that[`CONSUME${idx}`]
|
||||
delete that[`SUBRULE${idx}`]
|
||||
delete that[`OPTION${idx}`]
|
||||
delete that[`OR${idx}`]
|
||||
delete that[`MANY${idx}`]
|
||||
delete that[`MANY_SEP${idx}`]
|
||||
delete that[`AT_LEAST_ONE${idx}`]
|
||||
delete that[`AT_LEAST_ONE_SEP${idx}`]
|
||||
}
|
||||
|
||||
delete that[`consume`]
|
||||
delete that[`subrule`]
|
||||
delete that[`option`]
|
||||
delete that[`or`]
|
||||
delete that[`many`]
|
||||
delete that[`atLeastOne`]
|
||||
|
||||
delete that.ACTION
|
||||
delete that.BACKTRACK
|
||||
delete that.LA
|
||||
})
|
||||
}
|
||||
|
||||
// Parser methods are called inside an ACTION?
|
||||
// Maybe try/catch/finally on ACTIONS while disabling the recorders state changes?
|
||||
// @ts-expect-error -- noop place holder
|
||||
ACTION_RECORD<T>(this: MixedInParser, impl: () => T): T {
|
||||
// NO-OP during recording
|
||||
}
|
||||
|
||||
// Executing backtracking logic will break our recording logic assumptions
|
||||
BACKTRACK_RECORD<T>(
|
||||
grammarRule: (...args: any[]) => T,
|
||||
args?: any[]
|
||||
): () => boolean {
|
||||
return () => true
|
||||
}
|
||||
|
||||
// LA is part of the official API and may be used for custom lookahead logic
|
||||
// by end users who may forget to wrap it in ACTION or inside a GATE
|
||||
LA_RECORD(howMuch: number): IToken {
|
||||
// We cannot use the RECORD_PHASE_TOKEN here because someone may depend
|
||||
// On LA return EOF at the end of the input so an infinite loop may occur.
|
||||
return END_OF_FILE
|
||||
}
|
||||
|
||||
topLevelRuleRecord(name: string, def: Function): Rule {
|
||||
try {
|
||||
const newTopLevelRule = new Rule({ definition: [], name: name })
|
||||
newTopLevelRule.name = name
|
||||
this.recordingProdStack.push(newTopLevelRule)
|
||||
def.call(this)
|
||||
this.recordingProdStack.pop()
|
||||
return newTopLevelRule
|
||||
} catch (originalError) {
|
||||
if (originalError.KNOWN_RECORDER_ERROR !== true) {
|
||||
try {
|
||||
originalError.message =
|
||||
originalError.message +
|
||||
'\n\t This error was thrown during the "grammar recording phase" For more info see:\n\t' +
|
||||
"https://chevrotain.io/docs/guide/internals.html#grammar-recording"
|
||||
} catch (mutabilityError) {
|
||||
// We may not be able to modify the original error object
|
||||
throw originalError
|
||||
}
|
||||
}
|
||||
throw originalError
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation of parsing DSL
|
||||
optionInternalRecord<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
|
||||
occurrence: number
|
||||
): OUT {
|
||||
return recordProd.call(this, Option, actionORMethodDef, occurrence)
|
||||
}
|
||||
|
||||
atLeastOneInternalRecord<OUT>(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
recordProd.call(this, RepetitionMandatory, actionORMethodDef, occurrence)
|
||||
}
|
||||
|
||||
atLeastOneSepFirstInternalRecord<OUT>(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
recordProd.call(
|
||||
this,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
options,
|
||||
occurrence,
|
||||
HANDLE_SEPARATOR
|
||||
)
|
||||
}
|
||||
|
||||
manyInternalRecord<OUT>(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
recordProd.call(this, Repetition, actionORMethodDef, occurrence)
|
||||
}
|
||||
|
||||
manySepFirstInternalRecord<OUT>(
|
||||
this: MixedInParser,
|
||||
occurrence: number,
|
||||
options: ManySepMethodOpts<OUT>
|
||||
): void {
|
||||
recordProd.call(
|
||||
this,
|
||||
RepetitionWithSeparator,
|
||||
options,
|
||||
occurrence,
|
||||
HANDLE_SEPARATOR
|
||||
)
|
||||
}
|
||||
|
||||
orInternalRecord<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
|
||||
occurrence: number
|
||||
): T {
|
||||
return recordOrProd.call(this, altsOrOpts, occurrence)
|
||||
}
|
||||
|
||||
subruleInternalRecord<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
occurrence: number,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R | CstNode {
|
||||
assertMethodIdxIsValid(occurrence)
|
||||
if (!ruleToCall || has(ruleToCall, "ruleName") === false) {
|
||||
const error: any = new Error(
|
||||
`<SUBRULE${getIdxSuffix(occurrence)}> argument is invalid` +
|
||||
` expecting a Parser method reference but got: <${JSON.stringify(
|
||||
ruleToCall
|
||||
)}>` +
|
||||
`\n inside top level rule: <${
|
||||
(<Rule>this.recordingProdStack[0]).name
|
||||
}>`
|
||||
)
|
||||
error.KNOWN_RECORDER_ERROR = true
|
||||
throw error
|
||||
}
|
||||
|
||||
const prevProd: any = peek(this.recordingProdStack)
|
||||
const ruleName = ruleToCall.ruleName
|
||||
const newNoneTerminal = new NonTerminal({
|
||||
idx: occurrence,
|
||||
nonTerminalName: ruleName,
|
||||
label: options?.LABEL,
|
||||
// The resolving of the `referencedRule` property will be done once all the Rule's GASTs have been created
|
||||
referencedRule: undefined
|
||||
})
|
||||
prevProd.definition.push(newNoneTerminal)
|
||||
|
||||
return this.outputCst ? RECORDING_PHASE_CSTNODE : <any>RECORDING_NULL_OBJECT
|
||||
}
|
||||
|
||||
consumeInternalRecord(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
occurrence: number,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
assertMethodIdxIsValid(occurrence)
|
||||
if (!hasShortKeyProperty(tokType)) {
|
||||
const error: any = new Error(
|
||||
`<CONSUME${getIdxSuffix(occurrence)}> argument is invalid` +
|
||||
` expecting a TokenType reference but got: <${JSON.stringify(
|
||||
tokType
|
||||
)}>` +
|
||||
`\n inside top level rule: <${
|
||||
(<Rule>this.recordingProdStack[0]).name
|
||||
}>`
|
||||
)
|
||||
error.KNOWN_RECORDER_ERROR = true
|
||||
throw error
|
||||
}
|
||||
const prevProd: any = peek(this.recordingProdStack)
|
||||
const newNoneTerminal = new Terminal({
|
||||
idx: occurrence,
|
||||
terminalType: tokType,
|
||||
label: options?.LABEL
|
||||
})
|
||||
prevProd.definition.push(newNoneTerminal)
|
||||
|
||||
return RECORDING_PHASE_TOKEN
|
||||
}
|
||||
}
|
||||
|
||||
function recordProd(
|
||||
prodConstructor: any,
|
||||
mainProdArg: any,
|
||||
occurrence: number,
|
||||
handleSep: boolean = false
|
||||
): any {
|
||||
assertMethodIdxIsValid(occurrence)
|
||||
const prevProd: any = peek(this.recordingProdStack)
|
||||
const grammarAction = isFunction(mainProdArg) ? mainProdArg : mainProdArg.DEF
|
||||
|
||||
const newProd = new prodConstructor({ definition: [], idx: occurrence })
|
||||
if (handleSep) {
|
||||
newProd.separator = mainProdArg.SEP
|
||||
}
|
||||
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
|
||||
newProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
|
||||
}
|
||||
|
||||
this.recordingProdStack.push(newProd)
|
||||
grammarAction.call(this)
|
||||
prevProd.definition.push(newProd)
|
||||
this.recordingProdStack.pop()
|
||||
|
||||
return RECORDING_NULL_OBJECT
|
||||
}
|
||||
|
||||
function recordOrProd(mainProdArg: any, occurrence: number): any {
|
||||
assertMethodIdxIsValid(occurrence)
|
||||
const prevProd: any = peek(this.recordingProdStack)
|
||||
// Only an array of alternatives
|
||||
const hasOptions = isArray(mainProdArg) === false
|
||||
const alts: IOrAlt<unknown>[] =
|
||||
hasOptions === false ? mainProdArg : mainProdArg.DEF
|
||||
|
||||
const newOrProd = new Alternation({
|
||||
definition: [],
|
||||
idx: occurrence,
|
||||
ignoreAmbiguities: hasOptions && mainProdArg.IGNORE_AMBIGUITIES === true
|
||||
})
|
||||
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
|
||||
newOrProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
|
||||
}
|
||||
|
||||
const hasPredicates = some(alts, (currAlt: any) => isFunction(currAlt.GATE))
|
||||
newOrProd.hasPredicates = hasPredicates
|
||||
|
||||
prevProd.definition.push(newOrProd)
|
||||
|
||||
forEach(alts, (currAlt) => {
|
||||
const currAltFlat = new Alternative({ definition: [] })
|
||||
newOrProd.definition.push(currAltFlat)
|
||||
if (has(currAlt, "IGNORE_AMBIGUITIES")) {
|
||||
currAltFlat.ignoreAmbiguities = currAlt.IGNORE_AMBIGUITIES as boolean // assumes end user provides the correct config value/type
|
||||
}
|
||||
// **implicit** ignoreAmbiguities due to usage of gate
|
||||
else if (has(currAlt, "GATE")) {
|
||||
currAltFlat.ignoreAmbiguities = true
|
||||
}
|
||||
this.recordingProdStack.push(currAltFlat)
|
||||
currAlt.ALT.call(this)
|
||||
this.recordingProdStack.pop()
|
||||
})
|
||||
return RECORDING_NULL_OBJECT
|
||||
}
|
||||
|
||||
function getIdxSuffix(idx: number): string {
|
||||
return idx === 0 ? "" : `${idx}`
|
||||
}
|
||||
|
||||
function assertMethodIdxIsValid(idx: number): void {
|
||||
if (idx < 0 || idx > MAX_METHOD_IDX) {
|
||||
const error: any = new Error(
|
||||
// The stack trace will contain all the needed details
|
||||
`Invalid DSL Method idx value: <${idx}>\n\t` +
|
||||
`Idx value must be a none negative value smaller than ${
|
||||
MAX_METHOD_IDX + 1
|
||||
}`
|
||||
)
|
||||
error.KNOWN_RECORDER_ERROR = true
|
||||
throw error
|
||||
}
|
||||
}
|
||||
86
_node_modules/chevrotain/src/parse/parser/traits/lexer_adapter.ts
generated
Normal file
86
_node_modules/chevrotain/src/parse/parser/traits/lexer_adapter.ts
generated
Normal file
@@ -0,0 +1,86 @@
|
||||
import { END_OF_FILE } from "../parser"
|
||||
import { IToken } from "@chevrotain/types"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
|
||||
/**
|
||||
* Trait responsible abstracting over the interaction with Lexer output (Token vector).
|
||||
*
|
||||
* This could be generalized to support other kinds of lexers, e.g.
|
||||
* - Just in Time Lexing / Lexer-Less parsing.
|
||||
* - Streaming Lexer.
|
||||
*/
|
||||
export class LexerAdapter {
|
||||
tokVector: IToken[]
|
||||
tokVectorLength: number
|
||||
currIdx: number
|
||||
|
||||
initLexerAdapter() {
|
||||
this.tokVector = []
|
||||
this.tokVectorLength = 0
|
||||
this.currIdx = -1
|
||||
}
|
||||
|
||||
set input(newInput: IToken[]) {
|
||||
// @ts-ignore - `this parameter` not supported in setters/getters
|
||||
// - https://www.typescriptlang.org/docs/handbook/functions.html#this-parameters
|
||||
if (this.selfAnalysisDone !== true) {
|
||||
throw Error(
|
||||
`Missing <performSelfAnalysis> invocation at the end of the Parser's constructor.`
|
||||
)
|
||||
}
|
||||
// @ts-ignore - `this parameter` not supported in setters/getters
|
||||
// - https://www.typescriptlang.org/docs/handbook/functions.html#this-parameters
|
||||
this.reset()
|
||||
this.tokVector = newInput
|
||||
this.tokVectorLength = newInput.length
|
||||
}
|
||||
|
||||
get input(): IToken[] {
|
||||
return this.tokVector
|
||||
}
|
||||
|
||||
// skips a token and returns the next token
|
||||
SKIP_TOKEN(this: MixedInParser): IToken {
|
||||
if (this.currIdx <= this.tokVector.length - 2) {
|
||||
this.consumeToken()
|
||||
return this.LA(1)
|
||||
} else {
|
||||
return END_OF_FILE
|
||||
}
|
||||
}
|
||||
|
||||
// Lexer (accessing Token vector) related methods which can be overridden to implement lazy lexers
|
||||
// or lexers dependent on parser context.
|
||||
LA(this: MixedInParser, howMuch: number): IToken {
|
||||
const soughtIdx = this.currIdx + howMuch
|
||||
if (soughtIdx < 0 || this.tokVectorLength <= soughtIdx) {
|
||||
return END_OF_FILE
|
||||
} else {
|
||||
return this.tokVector[soughtIdx]
|
||||
}
|
||||
}
|
||||
|
||||
consumeToken(this: MixedInParser) {
|
||||
this.currIdx++
|
||||
}
|
||||
|
||||
exportLexerState(this: MixedInParser): number {
|
||||
return this.currIdx
|
||||
}
|
||||
|
||||
importLexerState(this: MixedInParser, newState: number) {
|
||||
this.currIdx = newState
|
||||
}
|
||||
|
||||
resetLexerState(this: MixedInParser): void {
|
||||
this.currIdx = -1
|
||||
}
|
||||
|
||||
moveToTerminatedState(this: MixedInParser): void {
|
||||
this.currIdx = this.tokVector.length - 1
|
||||
}
|
||||
|
||||
getLexerPosition(this: MixedInParser): number {
|
||||
return this.exportLexerState()
|
||||
}
|
||||
}
|
||||
270
_node_modules/chevrotain/src/parse/parser/traits/looksahead.ts
generated
Normal file
270
_node_modules/chevrotain/src/parse/parser/traits/looksahead.ts
generated
Normal file
@@ -0,0 +1,270 @@
|
||||
import forEach from "lodash/forEach"
|
||||
import has from "lodash/has"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser"
|
||||
import {
|
||||
ILookaheadStrategy,
|
||||
IParserConfig,
|
||||
OptionalProductionType
|
||||
} from "@chevrotain/types"
|
||||
import {
|
||||
AT_LEAST_ONE_IDX,
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
getKeyForAutomaticLookahead,
|
||||
MANY_IDX,
|
||||
MANY_SEP_IDX,
|
||||
OPTION_IDX,
|
||||
OR_IDX
|
||||
} from "../../grammar/keys"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import {
|
||||
Alternation,
|
||||
GAstVisitor,
|
||||
Option,
|
||||
Repetition,
|
||||
RepetitionMandatory,
|
||||
RepetitionMandatoryWithSeparator,
|
||||
RepetitionWithSeparator,
|
||||
Rule
|
||||
} from "@chevrotain/gast"
|
||||
import { getProductionDslName } from "@chevrotain/gast"
|
||||
import { LLkLookaheadStrategy } from "../../grammar/llk_lookahead"
|
||||
|
||||
/**
|
||||
* Trait responsible for the lookahead related utilities and optimizations.
|
||||
*/
|
||||
export class LooksAhead {
|
||||
maxLookahead: number
|
||||
lookAheadFuncsCache: any
|
||||
dynamicTokensEnabled: boolean
|
||||
lookaheadStrategy: ILookaheadStrategy
|
||||
|
||||
initLooksAhead(config: IParserConfig) {
|
||||
this.dynamicTokensEnabled = has(config, "dynamicTokensEnabled")
|
||||
? (config.dynamicTokensEnabled as boolean) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_PARSER_CONFIG.dynamicTokensEnabled
|
||||
|
||||
this.maxLookahead = has(config, "maxLookahead")
|
||||
? (config.maxLookahead as number) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_PARSER_CONFIG.maxLookahead
|
||||
|
||||
this.lookaheadStrategy = has(config, "lookaheadStrategy")
|
||||
? (config.lookaheadStrategy as ILookaheadStrategy) // assumes end user provides the correct config value/type
|
||||
: new LLkLookaheadStrategy({ maxLookahead: this.maxLookahead })
|
||||
|
||||
this.lookAheadFuncsCache = new Map()
|
||||
}
|
||||
|
||||
preComputeLookaheadFunctions(this: MixedInParser, rules: Rule[]): void {
|
||||
forEach(rules, (currRule) => {
|
||||
this.TRACE_INIT(`${currRule.name} Rule Lookahead`, () => {
|
||||
const {
|
||||
alternation,
|
||||
repetition,
|
||||
option,
|
||||
repetitionMandatory,
|
||||
repetitionMandatoryWithSeparator,
|
||||
repetitionWithSeparator
|
||||
} = collectMethods(currRule)
|
||||
|
||||
forEach(alternation, (currProd) => {
|
||||
const prodIdx = currProd.idx === 0 ? "" : currProd.idx
|
||||
this.TRACE_INIT(`${getProductionDslName(currProd)}${prodIdx}`, () => {
|
||||
const laFunc = this.lookaheadStrategy.buildLookaheadForAlternation({
|
||||
prodOccurrence: currProd.idx,
|
||||
rule: currRule,
|
||||
maxLookahead: currProd.maxLookahead || this.maxLookahead,
|
||||
hasPredicates: currProd.hasPredicates,
|
||||
dynamicTokensEnabled: this.dynamicTokensEnabled
|
||||
})
|
||||
|
||||
const key = getKeyForAutomaticLookahead(
|
||||
this.fullRuleNameToShort[currRule.name],
|
||||
OR_IDX,
|
||||
currProd.idx
|
||||
)
|
||||
this.setLaFuncCache(key, laFunc)
|
||||
})
|
||||
})
|
||||
|
||||
forEach(repetition, (currProd) => {
|
||||
this.computeLookaheadFunc(
|
||||
currRule,
|
||||
currProd.idx,
|
||||
MANY_IDX,
|
||||
"Repetition",
|
||||
currProd.maxLookahead,
|
||||
getProductionDslName(currProd)
|
||||
)
|
||||
})
|
||||
|
||||
forEach(option, (currProd) => {
|
||||
this.computeLookaheadFunc(
|
||||
currRule,
|
||||
currProd.idx,
|
||||
OPTION_IDX,
|
||||
"Option",
|
||||
currProd.maxLookahead,
|
||||
getProductionDslName(currProd)
|
||||
)
|
||||
})
|
||||
|
||||
forEach(repetitionMandatory, (currProd) => {
|
||||
this.computeLookaheadFunc(
|
||||
currRule,
|
||||
currProd.idx,
|
||||
AT_LEAST_ONE_IDX,
|
||||
"RepetitionMandatory",
|
||||
currProd.maxLookahead,
|
||||
getProductionDslName(currProd)
|
||||
)
|
||||
})
|
||||
|
||||
forEach(repetitionMandatoryWithSeparator, (currProd) => {
|
||||
this.computeLookaheadFunc(
|
||||
currRule,
|
||||
currProd.idx,
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
"RepetitionMandatoryWithSeparator",
|
||||
currProd.maxLookahead,
|
||||
getProductionDslName(currProd)
|
||||
)
|
||||
})
|
||||
|
||||
forEach(repetitionWithSeparator, (currProd) => {
|
||||
this.computeLookaheadFunc(
|
||||
currRule,
|
||||
currProd.idx,
|
||||
MANY_SEP_IDX,
|
||||
"RepetitionWithSeparator",
|
||||
currProd.maxLookahead,
|
||||
getProductionDslName(currProd)
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
computeLookaheadFunc(
|
||||
this: MixedInParser,
|
||||
rule: Rule,
|
||||
prodOccurrence: number,
|
||||
prodKey: number,
|
||||
prodType: OptionalProductionType,
|
||||
prodMaxLookahead: number | undefined,
|
||||
dslMethodName: string
|
||||
): void {
|
||||
this.TRACE_INIT(
|
||||
`${dslMethodName}${prodOccurrence === 0 ? "" : prodOccurrence}`,
|
||||
() => {
|
||||
const laFunc = this.lookaheadStrategy.buildLookaheadForOptional({
|
||||
prodOccurrence,
|
||||
rule,
|
||||
maxLookahead: prodMaxLookahead || this.maxLookahead,
|
||||
dynamicTokensEnabled: this.dynamicTokensEnabled,
|
||||
prodType
|
||||
})
|
||||
const key = getKeyForAutomaticLookahead(
|
||||
this.fullRuleNameToShort[rule.name],
|
||||
prodKey,
|
||||
prodOccurrence
|
||||
)
|
||||
this.setLaFuncCache(key, laFunc)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// this actually returns a number, but it is always used as a string (object prop key)
|
||||
getKeyForAutomaticLookahead(
|
||||
this: MixedInParser,
|
||||
dslMethodIdx: number,
|
||||
occurrence: number
|
||||
): number {
|
||||
const currRuleShortName: any = this.getLastExplicitRuleShortName()
|
||||
return getKeyForAutomaticLookahead(
|
||||
currRuleShortName,
|
||||
dslMethodIdx,
|
||||
occurrence
|
||||
)
|
||||
}
|
||||
|
||||
getLaFuncFromCache(this: MixedInParser, key: number): Function {
|
||||
return this.lookAheadFuncsCache.get(key)
|
||||
}
|
||||
|
||||
/* istanbul ignore next */
|
||||
setLaFuncCache(this: MixedInParser, key: number, value: Function): void {
|
||||
this.lookAheadFuncsCache.set(key, value)
|
||||
}
|
||||
}
|
||||
|
||||
class DslMethodsCollectorVisitor extends GAstVisitor {
|
||||
public dslMethods: {
|
||||
option: Option[]
|
||||
alternation: Alternation[]
|
||||
repetition: Repetition[]
|
||||
repetitionWithSeparator: RepetitionWithSeparator[]
|
||||
repetitionMandatory: RepetitionMandatory[]
|
||||
repetitionMandatoryWithSeparator: RepetitionMandatoryWithSeparator[]
|
||||
} = {
|
||||
option: [],
|
||||
alternation: [],
|
||||
repetition: [],
|
||||
repetitionWithSeparator: [],
|
||||
repetitionMandatory: [],
|
||||
repetitionMandatoryWithSeparator: []
|
||||
}
|
||||
|
||||
reset() {
|
||||
this.dslMethods = {
|
||||
option: [],
|
||||
alternation: [],
|
||||
repetition: [],
|
||||
repetitionWithSeparator: [],
|
||||
repetitionMandatory: [],
|
||||
repetitionMandatoryWithSeparator: []
|
||||
}
|
||||
}
|
||||
|
||||
public visitOption(option: Option): void {
|
||||
this.dslMethods.option.push(option)
|
||||
}
|
||||
|
||||
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
|
||||
this.dslMethods.repetitionWithSeparator.push(manySep)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
|
||||
this.dslMethods.repetitionMandatory.push(atLeastOne)
|
||||
}
|
||||
|
||||
public visitRepetitionMandatoryWithSeparator(
|
||||
atLeastOneSep: RepetitionMandatoryWithSeparator
|
||||
): void {
|
||||
this.dslMethods.repetitionMandatoryWithSeparator.push(atLeastOneSep)
|
||||
}
|
||||
|
||||
public visitRepetition(many: Repetition): void {
|
||||
this.dslMethods.repetition.push(many)
|
||||
}
|
||||
|
||||
public visitAlternation(or: Alternation): void {
|
||||
this.dslMethods.alternation.push(or)
|
||||
}
|
||||
}
|
||||
|
||||
const collectorVisitor = new DslMethodsCollectorVisitor()
|
||||
export function collectMethods(rule: Rule): {
|
||||
option: Option[]
|
||||
alternation: Alternation[]
|
||||
repetition: Repetition[]
|
||||
repetitionWithSeparator: RepetitionWithSeparator[]
|
||||
repetitionMandatory: RepetitionMandatory[]
|
||||
repetitionMandatoryWithSeparator: RepetitionMandatoryWithSeparator[]
|
||||
} {
|
||||
collectorVisitor.reset()
|
||||
rule.accept(collectorVisitor)
|
||||
const dslMethods = collectorVisitor.dslMethods
|
||||
// avoid uncleaned references
|
||||
collectorVisitor.reset()
|
||||
return <any>dslMethods
|
||||
}
|
||||
58
_node_modules/chevrotain/src/parse/parser/traits/parser_traits.ts
generated
Normal file
58
_node_modules/chevrotain/src/parse/parser/traits/parser_traits.ts
generated
Normal file
@@ -0,0 +1,58 @@
|
||||
import { ErrorHandler } from "./error_handler"
|
||||
import { LexerAdapter } from "./lexer_adapter"
|
||||
import { LooksAhead } from "./looksahead"
|
||||
import { RecognizerApi } from "./recognizer_api"
|
||||
import { RecognizerEngine } from "./recognizer_engine"
|
||||
import { Recoverable } from "./recoverable"
|
||||
import { TreeBuilder } from "./tree_builder"
|
||||
import {
|
||||
Parser as ParserConstructorImpel,
|
||||
CstParser as CstParserConstructorImpel,
|
||||
EmbeddedActionsParser as EmbeddedActionsParserConstructorImpl
|
||||
} from "../parser"
|
||||
import * as defs from "@chevrotain/types"
|
||||
import { ContentAssist } from "./context_assist"
|
||||
import { GastRecorder } from "./gast_recorder"
|
||||
import { PerformanceTracer } from "./perf_tracer"
|
||||
|
||||
/**
|
||||
* This Type combines all the Parser traits.
|
||||
* It is used in all traits in the "this type assertion"
|
||||
* - https://github.com/Microsoft/TypeScript/wiki/What%27s-new-in-TypeScript#specifying-the-type-of-this-for-functions
|
||||
* This enables strong Type Checks inside trait methods that invoke methods from other traits.
|
||||
* This pattern is very similar to "self types" in Scala.
|
||||
* - https://docs.scala-lang.org/tour/self-types.html
|
||||
*/
|
||||
export type MixedInParser = ParserConstructorImpel &
|
||||
ErrorHandler &
|
||||
LexerAdapter &
|
||||
LooksAhead &
|
||||
RecognizerApi &
|
||||
RecognizerEngine &
|
||||
Recoverable &
|
||||
TreeBuilder &
|
||||
ContentAssist &
|
||||
GastRecorder &
|
||||
PerformanceTracer
|
||||
|
||||
interface MixedInCstParserConstructor {
|
||||
new (
|
||||
tokenVocabulary: defs.TokenVocabulary,
|
||||
config?: defs.IParserConfig
|
||||
): defs.CstParser
|
||||
}
|
||||
|
||||
export const CstParser: MixedInCstParserConstructor = <any>(
|
||||
CstParserConstructorImpel
|
||||
)
|
||||
|
||||
interface MixedInEmbeddedActionsParserConstructor {
|
||||
new (
|
||||
tokenVocabulary: defs.TokenVocabulary,
|
||||
config?: defs.IParserConfig
|
||||
): defs.EmbeddedActionsParser
|
||||
}
|
||||
|
||||
export const EmbeddedActionsParser: MixedInEmbeddedActionsParserConstructor = <
|
||||
any
|
||||
>EmbeddedActionsParserConstructorImpl
|
||||
54
_node_modules/chevrotain/src/parse/parser/traits/perf_tracer.ts
generated
Normal file
54
_node_modules/chevrotain/src/parse/parser/traits/perf_tracer.ts
generated
Normal file
@@ -0,0 +1,54 @@
|
||||
import { IParserConfig } from "@chevrotain/types"
|
||||
import has from "lodash/has"
|
||||
import { timer } from "@chevrotain/utils"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser"
|
||||
|
||||
/**
|
||||
* Trait responsible for runtime parsing errors.
|
||||
*/
|
||||
export class PerformanceTracer {
|
||||
traceInitPerf: boolean | number
|
||||
traceInitMaxIdent: number
|
||||
traceInitIndent: number
|
||||
|
||||
initPerformanceTracer(config: IParserConfig) {
|
||||
if (has(config, "traceInitPerf")) {
|
||||
const userTraceInitPerf = config.traceInitPerf
|
||||
const traceIsNumber = typeof userTraceInitPerf === "number"
|
||||
this.traceInitMaxIdent = traceIsNumber
|
||||
? <number>userTraceInitPerf
|
||||
: Infinity
|
||||
this.traceInitPerf = traceIsNumber
|
||||
? userTraceInitPerf > 0
|
||||
: (userTraceInitPerf as boolean) // assumes end user provides the correct config value/type
|
||||
} else {
|
||||
this.traceInitMaxIdent = 0
|
||||
this.traceInitPerf = DEFAULT_PARSER_CONFIG.traceInitPerf
|
||||
}
|
||||
|
||||
this.traceInitIndent = -1
|
||||
}
|
||||
|
||||
TRACE_INIT<T>(this: MixedInParser, phaseDesc: string, phaseImpl: () => T): T {
|
||||
// No need to optimize this using NOOP pattern because
|
||||
// It is not called in a hot spot...
|
||||
if (this.traceInitPerf === true) {
|
||||
this.traceInitIndent++
|
||||
const indent = new Array(this.traceInitIndent + 1).join("\t")
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
console.log(`${indent}--> <${phaseDesc}>`)
|
||||
}
|
||||
const { time, value } = timer(phaseImpl)
|
||||
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
|
||||
const traceMethod = time > 10 ? console.warn : console.log
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`)
|
||||
}
|
||||
this.traceInitIndent--
|
||||
return value
|
||||
} else {
|
||||
return phaseImpl()
|
||||
}
|
||||
}
|
||||
}
|
||||
720
_node_modules/chevrotain/src/parse/parser/traits/recognizer_api.ts
generated
Normal file
720
_node_modules/chevrotain/src/parse/parser/traits/recognizer_api.ts
generated
Normal file
@@ -0,0 +1,720 @@
|
||||
import {
|
||||
AtLeastOneSepMethodOpts,
|
||||
ConsumeMethodOpts,
|
||||
DSLMethodOpts,
|
||||
DSLMethodOptsWithErr,
|
||||
GrammarAction,
|
||||
IOrAlt,
|
||||
IRuleConfig,
|
||||
ISerializedGast,
|
||||
IToken,
|
||||
ManySepMethodOpts,
|
||||
OrMethodOpts,
|
||||
SubruleMethodOpts,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import values from "lodash/values"
|
||||
import includes from "lodash/includes"
|
||||
import { isRecognitionException } from "../../exceptions_public"
|
||||
import { DEFAULT_RULE_CONFIG, ParserDefinitionErrorType } from "../parser"
|
||||
import { defaultGrammarValidatorErrorProvider } from "../../errors_public"
|
||||
import { validateRuleIsOverridden } from "../../grammar/checks"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import { Rule, serializeGrammar } from "@chevrotain/gast"
|
||||
import { IParserDefinitionError } from "../../grammar/types"
|
||||
import { ParserMethodInternal } from "../types"
|
||||
|
||||
/**
|
||||
* This trait is responsible for implementing the public API
|
||||
* for defining Chevrotain parsers, i.e:
|
||||
* - CONSUME
|
||||
* - RULE
|
||||
* - OPTION
|
||||
* - ...
|
||||
*/
|
||||
export class RecognizerApi {
|
||||
ACTION<T>(this: MixedInParser, impl: () => T): T {
|
||||
return impl.call(this)
|
||||
}
|
||||
|
||||
consume(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, idx, options)
|
||||
}
|
||||
|
||||
subrule<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, idx, options)
|
||||
}
|
||||
|
||||
option<OUT>(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, idx)
|
||||
}
|
||||
|
||||
or(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<any>
|
||||
): any {
|
||||
return this.orInternal(altsOrOpts, idx)
|
||||
}
|
||||
|
||||
many(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
|
||||
): void {
|
||||
return this.manyInternal(idx, actionORMethodDef)
|
||||
}
|
||||
|
||||
atLeastOne(
|
||||
this: MixedInParser,
|
||||
idx: number,
|
||||
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
|
||||
): void {
|
||||
return this.atLeastOneInternal(idx, actionORMethodDef)
|
||||
}
|
||||
|
||||
CONSUME(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 0, options)
|
||||
}
|
||||
|
||||
CONSUME1(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 1, options)
|
||||
}
|
||||
|
||||
CONSUME2(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 2, options)
|
||||
}
|
||||
|
||||
CONSUME3(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 3, options)
|
||||
}
|
||||
|
||||
CONSUME4(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 4, options)
|
||||
}
|
||||
|
||||
CONSUME5(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 5, options)
|
||||
}
|
||||
|
||||
CONSUME6(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 6, options)
|
||||
}
|
||||
|
||||
CONSUME7(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 7, options)
|
||||
}
|
||||
|
||||
CONSUME8(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 8, options)
|
||||
}
|
||||
|
||||
CONSUME9(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
options?: ConsumeMethodOpts
|
||||
): IToken {
|
||||
return this.consumeInternal(tokType, 9, options)
|
||||
}
|
||||
|
||||
SUBRULE<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 0, options)
|
||||
}
|
||||
|
||||
SUBRULE1<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 1, options)
|
||||
}
|
||||
|
||||
SUBRULE2<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 2, options)
|
||||
}
|
||||
|
||||
SUBRULE3<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 3, options)
|
||||
}
|
||||
|
||||
SUBRULE4<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 4, options)
|
||||
}
|
||||
|
||||
SUBRULE5<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 5, options)
|
||||
}
|
||||
|
||||
SUBRULE6<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 6, options)
|
||||
}
|
||||
|
||||
SUBRULE7<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 7, options)
|
||||
}
|
||||
|
||||
SUBRULE8<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 8, options)
|
||||
}
|
||||
|
||||
SUBRULE9<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
return this.subruleInternal(ruleToCall, 9, options)
|
||||
}
|
||||
|
||||
OPTION<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 0)
|
||||
}
|
||||
|
||||
OPTION1<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 1)
|
||||
}
|
||||
|
||||
OPTION2<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 2)
|
||||
}
|
||||
|
||||
OPTION3<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 3)
|
||||
}
|
||||
|
||||
OPTION4<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 4)
|
||||
}
|
||||
|
||||
OPTION5<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 5)
|
||||
}
|
||||
|
||||
OPTION6<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 6)
|
||||
}
|
||||
|
||||
OPTION7<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 7)
|
||||
}
|
||||
|
||||
OPTION8<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 8)
|
||||
}
|
||||
|
||||
OPTION9<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): OUT | undefined {
|
||||
return this.optionInternal(actionORMethodDef, 9)
|
||||
}
|
||||
|
||||
OR<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 0)
|
||||
}
|
||||
|
||||
OR1<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 1)
|
||||
}
|
||||
|
||||
OR2<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 2)
|
||||
}
|
||||
|
||||
OR3<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 3)
|
||||
}
|
||||
|
||||
OR4<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 4)
|
||||
}
|
||||
|
||||
OR5<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 5)
|
||||
}
|
||||
|
||||
OR6<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 6)
|
||||
}
|
||||
|
||||
OR7<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 7)
|
||||
}
|
||||
|
||||
OR8<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 8)
|
||||
}
|
||||
|
||||
OR9<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>
|
||||
): T {
|
||||
return this.orInternal(altsOrOpts, 9)
|
||||
}
|
||||
|
||||
MANY<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(0, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY1<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(1, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY2<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(2, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY3<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(3, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY4<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(4, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY5<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(5, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY6<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(6, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY7<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(7, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY8<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(8, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY9<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
this.manyInternal(9, actionORMethodDef)
|
||||
}
|
||||
|
||||
MANY_SEP<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(0, options)
|
||||
}
|
||||
|
||||
MANY_SEP1<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(1, options)
|
||||
}
|
||||
|
||||
MANY_SEP2<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(2, options)
|
||||
}
|
||||
|
||||
MANY_SEP3<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(3, options)
|
||||
}
|
||||
|
||||
MANY_SEP4<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(4, options)
|
||||
}
|
||||
|
||||
MANY_SEP5<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(5, options)
|
||||
}
|
||||
|
||||
MANY_SEP6<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(6, options)
|
||||
}
|
||||
|
||||
MANY_SEP7<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(7, options)
|
||||
}
|
||||
|
||||
MANY_SEP8<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(8, options)
|
||||
}
|
||||
|
||||
MANY_SEP9<OUT>(this: MixedInParser, options: ManySepMethodOpts<OUT>): void {
|
||||
this.manySepFirstInternal(9, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(0, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE1<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
return this.atLeastOneInternal(1, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE2<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(2, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE3<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(3, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE4<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(4, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE5<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(5, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE6<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(6, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE7<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(7, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE8<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(8, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE9<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
this.atLeastOneInternal(9, actionORMethodDef)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(0, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP1<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(1, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP2<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(2, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP3<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(3, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP4<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(4, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP5<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(5, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP6<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(6, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP7<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(7, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP8<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(8, options)
|
||||
}
|
||||
|
||||
AT_LEAST_ONE_SEP9<OUT>(
|
||||
this: MixedInParser,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
this.atLeastOneSepFirstInternal(9, options)
|
||||
}
|
||||
|
||||
RULE<T>(
|
||||
this: MixedInParser,
|
||||
name: string,
|
||||
implementation: (...implArgs: any[]) => T,
|
||||
config: IRuleConfig<T> = DEFAULT_RULE_CONFIG
|
||||
): (idxInCallingRule?: number, ...args: any[]) => T | any {
|
||||
if (includes(this.definedRulesNames, name)) {
|
||||
const errMsg =
|
||||
defaultGrammarValidatorErrorProvider.buildDuplicateRuleNameError({
|
||||
topLevelRule: name,
|
||||
grammarName: this.className
|
||||
})
|
||||
|
||||
const error = {
|
||||
message: errMsg,
|
||||
type: ParserDefinitionErrorType.DUPLICATE_RULE_NAME,
|
||||
ruleName: name
|
||||
}
|
||||
this.definitionErrors.push(error)
|
||||
}
|
||||
|
||||
this.definedRulesNames.push(name)
|
||||
|
||||
const ruleImplementation = this.defineRule(name, implementation, config)
|
||||
;(this as any)[name] = ruleImplementation
|
||||
return ruleImplementation
|
||||
}
|
||||
|
||||
OVERRIDE_RULE<T>(
|
||||
this: MixedInParser,
|
||||
name: string,
|
||||
impl: (...implArgs: any[]) => T,
|
||||
config: IRuleConfig<T> = DEFAULT_RULE_CONFIG
|
||||
): (idxInCallingRule?: number, ...args: any[]) => T {
|
||||
const ruleErrors: IParserDefinitionError[] = validateRuleIsOverridden(
|
||||
name,
|
||||
this.definedRulesNames,
|
||||
this.className
|
||||
)
|
||||
this.definitionErrors = this.definitionErrors.concat(ruleErrors)
|
||||
|
||||
const ruleImplementation = this.defineRule(name, impl, config)
|
||||
;(this as any)[name] = ruleImplementation
|
||||
return ruleImplementation
|
||||
}
|
||||
|
||||
BACKTRACK<T>(
|
||||
this: MixedInParser,
|
||||
grammarRule: (...args: any[]) => T,
|
||||
args?: any[]
|
||||
): () => boolean {
|
||||
return function () {
|
||||
// save org state
|
||||
this.isBackTrackingStack.push(1)
|
||||
const orgState = this.saveRecogState()
|
||||
try {
|
||||
grammarRule.apply(this, args)
|
||||
// if no exception was thrown we have succeed parsing the rule.
|
||||
return true
|
||||
} catch (e) {
|
||||
if (isRecognitionException(e)) {
|
||||
return false
|
||||
} else {
|
||||
throw e
|
||||
}
|
||||
} finally {
|
||||
this.reloadRecogState(orgState)
|
||||
this.isBackTrackingStack.pop()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GAST export APIs
|
||||
public getGAstProductions(this: MixedInParser): Record<string, Rule> {
|
||||
return this.gastProductionsCache
|
||||
}
|
||||
|
||||
public getSerializedGastProductions(this: MixedInParser): ISerializedGast[] {
|
||||
return serializeGrammar(values(this.gastProductionsCache))
|
||||
}
|
||||
}
|
||||
860
_node_modules/chevrotain/src/parse/parser/traits/recognizer_engine.ts
generated
Normal file
860
_node_modules/chevrotain/src/parse/parser/traits/recognizer_engine.ts
generated
Normal file
@@ -0,0 +1,860 @@
|
||||
import {
|
||||
AtLeastOneSepMethodOpts,
|
||||
ConsumeMethodOpts,
|
||||
DSLMethodOpts,
|
||||
DSLMethodOptsWithErr,
|
||||
GrammarAction,
|
||||
IOrAlt,
|
||||
IParserConfig,
|
||||
IRuleConfig,
|
||||
IToken,
|
||||
ManySepMethodOpts,
|
||||
OrMethodOpts,
|
||||
ParserMethod,
|
||||
SubruleMethodOpts,
|
||||
TokenType,
|
||||
TokenTypeDictionary,
|
||||
TokenVocabulary
|
||||
} from "@chevrotain/types"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import isArray from "lodash/isArray"
|
||||
import flatten from "lodash/flatten"
|
||||
import every from "lodash/every"
|
||||
import uniq from "lodash/uniq"
|
||||
import isObject from "lodash/isObject"
|
||||
import has from "lodash/has"
|
||||
import values from "lodash/values"
|
||||
import reduce from "lodash/reduce"
|
||||
import clone from "lodash/clone"
|
||||
import {
|
||||
AT_LEAST_ONE_IDX,
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
BITS_FOR_METHOD_TYPE,
|
||||
BITS_FOR_OCCURRENCE_IDX,
|
||||
MANY_IDX,
|
||||
MANY_SEP_IDX,
|
||||
OPTION_IDX,
|
||||
OR_IDX
|
||||
} from "../../grammar/keys"
|
||||
import {
|
||||
isRecognitionException,
|
||||
MismatchedTokenException,
|
||||
NotAllInputParsedException
|
||||
} from "../../exceptions_public"
|
||||
import { PROD_TYPE } from "../../grammar/lookahead"
|
||||
import {
|
||||
AbstractNextTerminalAfterProductionWalker,
|
||||
NextTerminalAfterAtLeastOneSepWalker,
|
||||
NextTerminalAfterAtLeastOneWalker,
|
||||
NextTerminalAfterManySepWalker,
|
||||
NextTerminalAfterManyWalker
|
||||
} from "../../grammar/interpreter"
|
||||
import { DEFAULT_RULE_CONFIG, IParserState, TokenMatcher } from "../parser"
|
||||
import { IN_RULE_RECOVERY_EXCEPTION } from "./recoverable"
|
||||
import { EOF } from "../../../scan/tokens_public"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import {
|
||||
augmentTokenTypes,
|
||||
isTokenType,
|
||||
tokenStructuredMatcher,
|
||||
tokenStructuredMatcherNoCategories
|
||||
} from "../../../scan/tokens"
|
||||
import { Rule } from "@chevrotain/gast"
|
||||
import { ParserMethodInternal } from "../types"
|
||||
|
||||
/**
|
||||
* This trait is responsible for the runtime parsing engine
|
||||
* Used by the official API (recognizer_api.ts)
|
||||
*/
|
||||
export class RecognizerEngine {
|
||||
isBackTrackingStack: boolean[]
|
||||
className: string
|
||||
RULE_STACK: number[]
|
||||
RULE_OCCURRENCE_STACK: number[]
|
||||
definedRulesNames: string[]
|
||||
tokensMap: { [fqn: string]: TokenType }
|
||||
gastProductionsCache: Record<string, Rule>
|
||||
shortRuleNameToFull: Record<string, string>
|
||||
fullRuleNameToShort: Record<string, number>
|
||||
// The shortName Index must be coded "after" the first 8bits to enable building unique lookahead keys
|
||||
ruleShortNameIdx: number
|
||||
tokenMatcher: TokenMatcher
|
||||
subruleIdx: number
|
||||
|
||||
initRecognizerEngine(
|
||||
tokenVocabulary: TokenVocabulary,
|
||||
config: IParserConfig
|
||||
) {
|
||||
this.className = this.constructor.name
|
||||
// TODO: would using an ES6 Map or plain object be faster (CST building scenario)
|
||||
this.shortRuleNameToFull = {}
|
||||
this.fullRuleNameToShort = {}
|
||||
this.ruleShortNameIdx = 256
|
||||
this.tokenMatcher = tokenStructuredMatcherNoCategories
|
||||
this.subruleIdx = 0
|
||||
|
||||
this.definedRulesNames = []
|
||||
this.tokensMap = {}
|
||||
this.isBackTrackingStack = []
|
||||
this.RULE_STACK = []
|
||||
this.RULE_OCCURRENCE_STACK = []
|
||||
this.gastProductionsCache = {}
|
||||
|
||||
if (has(config, "serializedGrammar")) {
|
||||
throw Error(
|
||||
"The Parser's configuration can no longer contain a <serializedGrammar> property.\n" +
|
||||
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_6-0-0\n" +
|
||||
"\tFor Further details."
|
||||
)
|
||||
}
|
||||
|
||||
if (isArray(tokenVocabulary)) {
|
||||
// This only checks for Token vocabularies provided as arrays.
|
||||
// That is good enough because the main objective is to detect users of pre-V4.0 APIs
|
||||
// rather than all edge cases of empty Token vocabularies.
|
||||
if (isEmpty(tokenVocabulary as any[])) {
|
||||
throw Error(
|
||||
"A Token Vocabulary cannot be empty.\n" +
|
||||
"\tNote that the first argument for the parser constructor\n" +
|
||||
"\tis no longer a Token vector (since v4.0)."
|
||||
)
|
||||
}
|
||||
|
||||
if (typeof (tokenVocabulary as any[])[0].startOffset === "number") {
|
||||
throw Error(
|
||||
"The Parser constructor no longer accepts a token vector as the first argument.\n" +
|
||||
"\tSee: https://chevrotain.io/docs/changes/BREAKING_CHANGES.html#_4-0-0\n" +
|
||||
"\tFor Further details."
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (isArray(tokenVocabulary)) {
|
||||
this.tokensMap = reduce(
|
||||
tokenVocabulary,
|
||||
(acc, tokType: TokenType) => {
|
||||
acc[tokType.name] = tokType
|
||||
return acc
|
||||
},
|
||||
{} as { [tokenName: string]: TokenType }
|
||||
)
|
||||
} else if (
|
||||
has(tokenVocabulary, "modes") &&
|
||||
every(flatten(values((<any>tokenVocabulary).modes)), isTokenType)
|
||||
) {
|
||||
const allTokenTypes = flatten(values((<any>tokenVocabulary).modes))
|
||||
const uniqueTokens = uniq(allTokenTypes)
|
||||
this.tokensMap = <any>reduce(
|
||||
uniqueTokens,
|
||||
(acc, tokType: TokenType) => {
|
||||
acc[tokType.name] = tokType
|
||||
return acc
|
||||
},
|
||||
{} as { [tokenName: string]: TokenType }
|
||||
)
|
||||
} else if (isObject(tokenVocabulary)) {
|
||||
this.tokensMap = clone(tokenVocabulary as TokenTypeDictionary)
|
||||
} else {
|
||||
throw new Error(
|
||||
"<tokensDictionary> argument must be An Array of Token constructors," +
|
||||
" A dictionary of Token constructors or an IMultiModeLexerDefinition"
|
||||
)
|
||||
}
|
||||
|
||||
// always add EOF to the tokenNames -> constructors map. it is useful to assure all the input has been
|
||||
// parsed with a clear error message ("expecting EOF but found ...")
|
||||
this.tokensMap["EOF"] = EOF
|
||||
|
||||
const allTokenTypes = has(tokenVocabulary, "modes")
|
||||
? flatten(values((<any>tokenVocabulary).modes))
|
||||
: values(tokenVocabulary)
|
||||
const noTokenCategoriesUsed = every(allTokenTypes, (tokenConstructor) =>
|
||||
isEmpty(tokenConstructor.categoryMatches)
|
||||
)
|
||||
|
||||
this.tokenMatcher = noTokenCategoriesUsed
|
||||
? tokenStructuredMatcherNoCategories
|
||||
: tokenStructuredMatcher
|
||||
|
||||
// Because ES2015+ syntax should be supported for creating Token classes
|
||||
// We cannot assume that the Token classes were created using the "extendToken" utilities
|
||||
// Therefore we must augment the Token classes both on Lexer initialization and on Parser initialization
|
||||
augmentTokenTypes(values(this.tokensMap))
|
||||
}
|
||||
|
||||
defineRule<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleName: string,
|
||||
impl: (...args: ARGS) => R,
|
||||
config: IRuleConfig<R>
|
||||
): ParserMethodInternal<ARGS, R> {
|
||||
if (this.selfAnalysisDone) {
|
||||
throw Error(
|
||||
`Grammar rule <${ruleName}> may not be defined after the 'performSelfAnalysis' method has been called'\n` +
|
||||
`Make sure that all grammar rule definitions are done before 'performSelfAnalysis' is called.`
|
||||
)
|
||||
}
|
||||
const resyncEnabled: boolean = has(config, "resyncEnabled")
|
||||
? (config.resyncEnabled as boolean) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_RULE_CONFIG.resyncEnabled
|
||||
const recoveryValueFunc = has(config, "recoveryValueFunc")
|
||||
? (config.recoveryValueFunc as () => R) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_RULE_CONFIG.recoveryValueFunc
|
||||
|
||||
// performance optimization: Use small integers as keys for the longer human readable "full" rule names.
|
||||
// this greatly improves Map access time (as much as 8% for some performance benchmarks).
|
||||
const shortName =
|
||||
this.ruleShortNameIdx << (BITS_FOR_METHOD_TYPE + BITS_FOR_OCCURRENCE_IDX)
|
||||
|
||||
this.ruleShortNameIdx++
|
||||
this.shortRuleNameToFull[shortName] = ruleName
|
||||
this.fullRuleNameToShort[ruleName] = shortName
|
||||
|
||||
let invokeRuleWithTry: ParserMethod<ARGS, R>
|
||||
|
||||
// Micro optimization, only check the condition **once** on rule definition
|
||||
// instead of **every single** rule invocation.
|
||||
if (this.outputCst === true) {
|
||||
invokeRuleWithTry = function invokeRuleWithTry(
|
||||
this: MixedInParser,
|
||||
...args: ARGS
|
||||
): R {
|
||||
try {
|
||||
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx)
|
||||
impl.apply(this, args)
|
||||
const cst = this.CST_STACK[this.CST_STACK.length - 1]
|
||||
this.cstPostRule(cst)
|
||||
return cst as unknown as R
|
||||
} catch (e) {
|
||||
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R
|
||||
} finally {
|
||||
this.ruleFinallyStateUpdate()
|
||||
}
|
||||
}
|
||||
} else {
|
||||
invokeRuleWithTry = function invokeRuleWithTryCst(
|
||||
this: MixedInParser,
|
||||
...args: ARGS
|
||||
): R {
|
||||
try {
|
||||
this.ruleInvocationStateUpdate(shortName, ruleName, this.subruleIdx)
|
||||
return impl.apply(this, args)
|
||||
} catch (e) {
|
||||
return this.invokeRuleCatch(e, resyncEnabled, recoveryValueFunc) as R
|
||||
} finally {
|
||||
this.ruleFinallyStateUpdate()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const wrappedGrammarRule: ParserMethodInternal<ARGS, R> = Object.assign(
|
||||
invokeRuleWithTry as any,
|
||||
{ ruleName, originalGrammarAction: impl }
|
||||
)
|
||||
|
||||
return wrappedGrammarRule
|
||||
}
|
||||
|
||||
invokeRuleCatch(
|
||||
this: MixedInParser,
|
||||
e: Error,
|
||||
resyncEnabledConfig: boolean,
|
||||
recoveryValueFunc: Function
|
||||
): unknown {
|
||||
const isFirstInvokedRule = this.RULE_STACK.length === 1
|
||||
// note the reSync is always enabled for the first rule invocation, because we must always be able to
|
||||
// reSync with EOF and just output some INVALID ParseTree
|
||||
// during backtracking reSync recovery is disabled, otherwise we can't be certain the backtracking
|
||||
// path is really the most valid one
|
||||
const reSyncEnabled =
|
||||
resyncEnabledConfig && !this.isBackTracking() && this.recoveryEnabled
|
||||
|
||||
if (isRecognitionException(e)) {
|
||||
const recogError: any = e
|
||||
if (reSyncEnabled) {
|
||||
const reSyncTokType = this.findReSyncTokenType()
|
||||
if (this.isInCurrentRuleReSyncSet(reSyncTokType)) {
|
||||
recogError.resyncedTokens = this.reSyncTo(reSyncTokType)
|
||||
if (this.outputCst) {
|
||||
const partialCstResult: any =
|
||||
this.CST_STACK[this.CST_STACK.length - 1]
|
||||
partialCstResult.recoveredNode = true
|
||||
return partialCstResult
|
||||
} else {
|
||||
return recoveryValueFunc(e)
|
||||
}
|
||||
} else {
|
||||
if (this.outputCst) {
|
||||
const partialCstResult: any =
|
||||
this.CST_STACK[this.CST_STACK.length - 1]
|
||||
partialCstResult.recoveredNode = true
|
||||
recogError.partialCstResult = partialCstResult
|
||||
}
|
||||
// to be handled Further up the call stack
|
||||
throw recogError
|
||||
}
|
||||
} else if (isFirstInvokedRule) {
|
||||
// otherwise a Redundant input error will be created as well and we cannot guarantee that this is indeed the case
|
||||
this.moveToTerminatedState()
|
||||
// the parser should never throw one of its own errors outside its flow.
|
||||
// even if error recovery is disabled
|
||||
return recoveryValueFunc(e)
|
||||
} else {
|
||||
// to be recovered Further up the call stack
|
||||
throw recogError
|
||||
}
|
||||
} else {
|
||||
// some other Error type which we don't know how to handle (for example a built in JavaScript Error)
|
||||
throw e
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation of parsing DSL
|
||||
optionInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
|
||||
occurrence: number
|
||||
): OUT | undefined {
|
||||
const key = this.getKeyForAutomaticLookahead(OPTION_IDX, occurrence)
|
||||
return this.optionInternalLogic(actionORMethodDef, occurrence, key)
|
||||
}
|
||||
|
||||
optionInternalLogic<OUT>(
|
||||
this: MixedInParser,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
|
||||
occurrence: number,
|
||||
key: number
|
||||
): OUT | undefined {
|
||||
let lookAheadFunc = this.getLaFuncFromCache(key)
|
||||
let action: GrammarAction<OUT>
|
||||
if (typeof actionORMethodDef !== "function") {
|
||||
action = actionORMethodDef.DEF
|
||||
const predicate = actionORMethodDef.GATE
|
||||
// predicate present
|
||||
if (predicate !== undefined) {
|
||||
const orgLookaheadFunction = lookAheadFunc
|
||||
lookAheadFunc = () => {
|
||||
return predicate.call(this) && orgLookaheadFunction.call(this)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
action = actionORMethodDef
|
||||
}
|
||||
|
||||
if (lookAheadFunc.call(this) === true) {
|
||||
return action.call(this)
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
atLeastOneInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
|
||||
): void {
|
||||
const laKey = this.getKeyForAutomaticLookahead(
|
||||
AT_LEAST_ONE_IDX,
|
||||
prodOccurrence
|
||||
)
|
||||
return this.atLeastOneInternalLogic(
|
||||
prodOccurrence,
|
||||
actionORMethodDef,
|
||||
laKey
|
||||
)
|
||||
}
|
||||
|
||||
atLeastOneInternalLogic<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>,
|
||||
key: number
|
||||
): void {
|
||||
let lookAheadFunc = this.getLaFuncFromCache(key)
|
||||
let action
|
||||
if (typeof actionORMethodDef !== "function") {
|
||||
action = actionORMethodDef.DEF
|
||||
const predicate = actionORMethodDef.GATE
|
||||
// predicate present
|
||||
if (predicate !== undefined) {
|
||||
const orgLookaheadFunction = lookAheadFunc
|
||||
lookAheadFunc = () => {
|
||||
return predicate.call(this) && orgLookaheadFunction.call(this)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
action = actionORMethodDef
|
||||
}
|
||||
|
||||
if ((<Function>lookAheadFunc).call(this) === true) {
|
||||
let notStuck = this.doSingleRepetition(action)
|
||||
while (
|
||||
(<Function>lookAheadFunc).call(this) === true &&
|
||||
notStuck === true
|
||||
) {
|
||||
notStuck = this.doSingleRepetition(action)
|
||||
}
|
||||
} else {
|
||||
throw this.raiseEarlyExitException(
|
||||
prodOccurrence,
|
||||
PROD_TYPE.REPETITION_MANDATORY,
|
||||
(<DSLMethodOptsWithErr<OUT>>actionORMethodDef).ERR_MSG
|
||||
)
|
||||
}
|
||||
|
||||
// note that while it may seem that this can cause an error because by using a recursive call to
|
||||
// AT_LEAST_ONE we change the grammar to AT_LEAST_TWO, AT_LEAST_THREE ... , the possible recursive call
|
||||
// from the tryInRepetitionRecovery(...) will only happen IFF there really are TWO/THREE/.... items.
|
||||
|
||||
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
|
||||
this.attemptInRepetitionRecovery(
|
||||
this.atLeastOneInternal,
|
||||
[prodOccurrence, actionORMethodDef],
|
||||
<any>lookAheadFunc,
|
||||
AT_LEAST_ONE_IDX,
|
||||
prodOccurrence,
|
||||
NextTerminalAfterAtLeastOneWalker
|
||||
)
|
||||
}
|
||||
|
||||
atLeastOneSepFirstInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
options: AtLeastOneSepMethodOpts<OUT>
|
||||
): void {
|
||||
const laKey = this.getKeyForAutomaticLookahead(
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
prodOccurrence
|
||||
)
|
||||
this.atLeastOneSepFirstInternalLogic(prodOccurrence, options, laKey)
|
||||
}
|
||||
|
||||
atLeastOneSepFirstInternalLogic<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
options: AtLeastOneSepMethodOpts<OUT>,
|
||||
key: number
|
||||
): void {
|
||||
const action = options.DEF
|
||||
const separator = options.SEP
|
||||
|
||||
const firstIterationLookaheadFunc = this.getLaFuncFromCache(key)
|
||||
|
||||
// 1st iteration
|
||||
if (firstIterationLookaheadFunc.call(this) === true) {
|
||||
;(<GrammarAction<OUT>>action).call(this)
|
||||
|
||||
// TODO: Optimization can move this function construction into "attemptInRepetitionRecovery"
|
||||
// because it is only needed in error recovery scenarios.
|
||||
const separatorLookAheadFunc = () => {
|
||||
return this.tokenMatcher(this.LA(1), separator)
|
||||
}
|
||||
|
||||
// 2nd..nth iterations
|
||||
while (this.tokenMatcher(this.LA(1), separator) === true) {
|
||||
// note that this CONSUME will never enter recovery because
|
||||
// the separatorLookAheadFunc checks that the separator really does exist.
|
||||
this.CONSUME(separator)
|
||||
// No need for checking infinite loop here due to consuming the separator.
|
||||
;(<GrammarAction<OUT>>action).call(this)
|
||||
}
|
||||
|
||||
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
|
||||
this.attemptInRepetitionRecovery(
|
||||
this.repetitionSepSecondInternal,
|
||||
[
|
||||
prodOccurrence,
|
||||
separator,
|
||||
separatorLookAheadFunc,
|
||||
action,
|
||||
NextTerminalAfterAtLeastOneSepWalker
|
||||
],
|
||||
separatorLookAheadFunc,
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
prodOccurrence,
|
||||
NextTerminalAfterAtLeastOneSepWalker
|
||||
)
|
||||
} else {
|
||||
throw this.raiseEarlyExitException(
|
||||
prodOccurrence,
|
||||
PROD_TYPE.REPETITION_MANDATORY_WITH_SEPARATOR,
|
||||
options.ERR_MSG
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
manyInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
|
||||
): void {
|
||||
const laKey = this.getKeyForAutomaticLookahead(MANY_IDX, prodOccurrence)
|
||||
return this.manyInternalLogic(prodOccurrence, actionORMethodDef, laKey)
|
||||
}
|
||||
|
||||
manyInternalLogic<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
|
||||
key: number
|
||||
) {
|
||||
let lookaheadFunction = this.getLaFuncFromCache(key)
|
||||
let action
|
||||
if (typeof actionORMethodDef !== "function") {
|
||||
action = actionORMethodDef.DEF
|
||||
const predicate = actionORMethodDef.GATE
|
||||
// predicate present
|
||||
if (predicate !== undefined) {
|
||||
const orgLookaheadFunction = lookaheadFunction
|
||||
lookaheadFunction = () => {
|
||||
return predicate.call(this) && orgLookaheadFunction.call(this)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
action = actionORMethodDef
|
||||
}
|
||||
|
||||
let notStuck = true
|
||||
while (lookaheadFunction.call(this) === true && notStuck === true) {
|
||||
notStuck = this.doSingleRepetition(action)
|
||||
}
|
||||
|
||||
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
|
||||
this.attemptInRepetitionRecovery(
|
||||
this.manyInternal,
|
||||
[prodOccurrence, actionORMethodDef],
|
||||
<any>lookaheadFunction,
|
||||
MANY_IDX,
|
||||
prodOccurrence,
|
||||
NextTerminalAfterManyWalker,
|
||||
// The notStuck parameter is only relevant when "attemptInRepetitionRecovery"
|
||||
// is invoked from manyInternal, in the MANY_SEP case and AT_LEAST_ONE[_SEP]
|
||||
// An infinite loop cannot occur as:
|
||||
// - Either the lookahead is guaranteed to consume something (Single Token Separator)
|
||||
// - AT_LEAST_ONE by definition is guaranteed to consume something (or error out).
|
||||
notStuck
|
||||
)
|
||||
}
|
||||
|
||||
manySepFirstInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
options: ManySepMethodOpts<OUT>
|
||||
): void {
|
||||
const laKey = this.getKeyForAutomaticLookahead(MANY_SEP_IDX, prodOccurrence)
|
||||
this.manySepFirstInternalLogic(prodOccurrence, options, laKey)
|
||||
}
|
||||
|
||||
manySepFirstInternalLogic<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
options: ManySepMethodOpts<OUT>,
|
||||
key: number
|
||||
): void {
|
||||
const action = options.DEF
|
||||
const separator = options.SEP
|
||||
const firstIterationLaFunc = this.getLaFuncFromCache(key)
|
||||
|
||||
// 1st iteration
|
||||
if (firstIterationLaFunc.call(this) === true) {
|
||||
action.call(this)
|
||||
|
||||
const separatorLookAheadFunc = () => {
|
||||
return this.tokenMatcher(this.LA(1), separator)
|
||||
}
|
||||
// 2nd..nth iterations
|
||||
while (this.tokenMatcher(this.LA(1), separator) === true) {
|
||||
// note that this CONSUME will never enter recovery because
|
||||
// the separatorLookAheadFunc checks that the separator really does exist.
|
||||
this.CONSUME(separator)
|
||||
// No need for checking infinite loop here due to consuming the separator.
|
||||
action.call(this)
|
||||
}
|
||||
|
||||
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
|
||||
this.attemptInRepetitionRecovery(
|
||||
this.repetitionSepSecondInternal,
|
||||
[
|
||||
prodOccurrence,
|
||||
separator,
|
||||
separatorLookAheadFunc,
|
||||
action,
|
||||
NextTerminalAfterManySepWalker
|
||||
],
|
||||
separatorLookAheadFunc,
|
||||
MANY_SEP_IDX,
|
||||
prodOccurrence,
|
||||
NextTerminalAfterManySepWalker
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
repetitionSepSecondInternal<OUT>(
|
||||
this: MixedInParser,
|
||||
prodOccurrence: number,
|
||||
separator: TokenType,
|
||||
separatorLookAheadFunc: () => boolean,
|
||||
action: GrammarAction<OUT>,
|
||||
nextTerminalAfterWalker: typeof AbstractNextTerminalAfterProductionWalker
|
||||
): void {
|
||||
while (separatorLookAheadFunc()) {
|
||||
// note that this CONSUME will never enter recovery because
|
||||
// the separatorLookAheadFunc checks that the separator really does exist.
|
||||
this.CONSUME(separator)
|
||||
action.call(this)
|
||||
}
|
||||
|
||||
// we can only arrive to this function after an error
|
||||
// has occurred (hence the name 'second') so the following
|
||||
// IF will always be entered, its possible to remove it...
|
||||
// however it is kept to avoid confusion and be consistent.
|
||||
// Performance optimization: "attemptInRepetitionRecovery" will be defined as NOOP unless recovery is enabled
|
||||
/* istanbul ignore else */
|
||||
this.attemptInRepetitionRecovery(
|
||||
this.repetitionSepSecondInternal,
|
||||
[
|
||||
prodOccurrence,
|
||||
separator,
|
||||
separatorLookAheadFunc,
|
||||
action,
|
||||
nextTerminalAfterWalker
|
||||
],
|
||||
separatorLookAheadFunc,
|
||||
AT_LEAST_ONE_SEP_IDX,
|
||||
prodOccurrence,
|
||||
nextTerminalAfterWalker
|
||||
)
|
||||
}
|
||||
|
||||
doSingleRepetition(this: MixedInParser, action: Function): any {
|
||||
const beforeIteration = this.getLexerPosition()
|
||||
action.call(this)
|
||||
const afterIteration = this.getLexerPosition()
|
||||
|
||||
// This boolean will indicate if this repetition progressed
|
||||
// or if we are "stuck" (potential infinite loop in the repetition).
|
||||
return afterIteration > beforeIteration
|
||||
}
|
||||
|
||||
orInternal<T>(
|
||||
this: MixedInParser,
|
||||
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
|
||||
occurrence: number
|
||||
): T {
|
||||
const laKey = this.getKeyForAutomaticLookahead(OR_IDX, occurrence)
|
||||
const alts = isArray(altsOrOpts) ? altsOrOpts : altsOrOpts.DEF
|
||||
|
||||
const laFunc = this.getLaFuncFromCache(laKey)
|
||||
const altIdxToTake = laFunc.call(this, alts)
|
||||
if (altIdxToTake !== undefined) {
|
||||
const chosenAlternative: any = alts[altIdxToTake]
|
||||
return chosenAlternative.ALT.call(this)
|
||||
}
|
||||
this.raiseNoAltException(
|
||||
occurrence,
|
||||
(altsOrOpts as OrMethodOpts<unknown>).ERR_MSG
|
||||
)
|
||||
}
|
||||
|
||||
ruleFinallyStateUpdate(this: MixedInParser): void {
|
||||
this.RULE_STACK.pop()
|
||||
this.RULE_OCCURRENCE_STACK.pop()
|
||||
|
||||
// NOOP when cst is disabled
|
||||
this.cstFinallyStateUpdate()
|
||||
|
||||
if (this.RULE_STACK.length === 0 && this.isAtEndOfInput() === false) {
|
||||
const firstRedundantTok = this.LA(1)
|
||||
const errMsg = this.errorMessageProvider.buildNotAllInputParsedMessage({
|
||||
firstRedundant: firstRedundantTok,
|
||||
ruleName: this.getCurrRuleFullName()
|
||||
})
|
||||
this.SAVE_ERROR(new NotAllInputParsedException(errMsg, firstRedundantTok))
|
||||
}
|
||||
}
|
||||
|
||||
subruleInternal<ARGS extends unknown[], R>(
|
||||
this: MixedInParser,
|
||||
ruleToCall: ParserMethodInternal<ARGS, R>,
|
||||
idx: number,
|
||||
options?: SubruleMethodOpts<ARGS>
|
||||
): R {
|
||||
let ruleResult
|
||||
try {
|
||||
const args = options !== undefined ? options.ARGS : undefined
|
||||
this.subruleIdx = idx
|
||||
ruleResult = ruleToCall.apply(this, args)
|
||||
this.cstPostNonTerminal(
|
||||
ruleResult,
|
||||
options !== undefined && options.LABEL !== undefined
|
||||
? options.LABEL
|
||||
: ruleToCall.ruleName
|
||||
)
|
||||
return ruleResult
|
||||
} catch (e) {
|
||||
throw this.subruleInternalError(e, options, ruleToCall.ruleName)
|
||||
}
|
||||
}
|
||||
|
||||
subruleInternalError(
|
||||
this: MixedInParser,
|
||||
e: any,
|
||||
options: SubruleMethodOpts<unknown[]> | undefined,
|
||||
ruleName: string
|
||||
): void {
|
||||
if (isRecognitionException(e) && e.partialCstResult !== undefined) {
|
||||
this.cstPostNonTerminal(
|
||||
e.partialCstResult,
|
||||
options !== undefined && options.LABEL !== undefined
|
||||
? options.LABEL
|
||||
: ruleName
|
||||
)
|
||||
|
||||
delete e.partialCstResult
|
||||
}
|
||||
throw e
|
||||
}
|
||||
|
||||
consumeInternal(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
idx: number,
|
||||
options: ConsumeMethodOpts | undefined
|
||||
): IToken {
|
||||
let consumedToken!: IToken
|
||||
try {
|
||||
const nextToken = this.LA(1)
|
||||
if (this.tokenMatcher(nextToken, tokType) === true) {
|
||||
this.consumeToken()
|
||||
consumedToken = nextToken
|
||||
} else {
|
||||
this.consumeInternalError(tokType, nextToken, options)
|
||||
}
|
||||
} catch (eFromConsumption) {
|
||||
consumedToken = this.consumeInternalRecovery(
|
||||
tokType,
|
||||
idx,
|
||||
eFromConsumption
|
||||
)
|
||||
}
|
||||
|
||||
this.cstPostTerminal(
|
||||
options !== undefined && options.LABEL !== undefined
|
||||
? options.LABEL
|
||||
: tokType.name,
|
||||
consumedToken
|
||||
)
|
||||
return consumedToken
|
||||
}
|
||||
|
||||
consumeInternalError(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
nextToken: IToken,
|
||||
options: ConsumeMethodOpts | undefined
|
||||
): void {
|
||||
let msg
|
||||
const previousToken = this.LA(0)
|
||||
if (options !== undefined && options.ERR_MSG) {
|
||||
msg = options.ERR_MSG
|
||||
} else {
|
||||
msg = this.errorMessageProvider.buildMismatchTokenMessage({
|
||||
expected: tokType,
|
||||
actual: nextToken,
|
||||
previous: previousToken,
|
||||
ruleName: this.getCurrRuleFullName()
|
||||
})
|
||||
}
|
||||
throw this.SAVE_ERROR(
|
||||
new MismatchedTokenException(msg, nextToken, previousToken)
|
||||
)
|
||||
}
|
||||
|
||||
consumeInternalRecovery(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
idx: number,
|
||||
eFromConsumption: Error
|
||||
): IToken {
|
||||
// no recovery allowed during backtracking, otherwise backtracking may recover invalid syntax and accept it
|
||||
// but the original syntax could have been parsed successfully without any backtracking + recovery
|
||||
if (
|
||||
this.recoveryEnabled &&
|
||||
// TODO: more robust checking of the exception type. Perhaps Typescript extending expressions?
|
||||
eFromConsumption.name === "MismatchedTokenException" &&
|
||||
!this.isBackTracking()
|
||||
) {
|
||||
const follows = this.getFollowsForInRuleRecovery(<any>tokType, idx)
|
||||
try {
|
||||
return this.tryInRuleRecovery(<any>tokType, follows)
|
||||
} catch (eFromInRuleRecovery) {
|
||||
if (eFromInRuleRecovery.name === IN_RULE_RECOVERY_EXCEPTION) {
|
||||
// failed in RuleRecovery.
|
||||
// throw the original error in order to trigger reSync error recovery
|
||||
throw eFromConsumption
|
||||
} else {
|
||||
throw eFromInRuleRecovery
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw eFromConsumption
|
||||
}
|
||||
}
|
||||
|
||||
saveRecogState(this: MixedInParser): IParserState {
|
||||
// errors is a getter which will clone the errors array
|
||||
const savedErrors = this.errors
|
||||
const savedRuleStack = clone(this.RULE_STACK)
|
||||
return {
|
||||
errors: savedErrors,
|
||||
lexerState: this.exportLexerState(),
|
||||
RULE_STACK: savedRuleStack,
|
||||
CST_STACK: this.CST_STACK
|
||||
}
|
||||
}
|
||||
|
||||
reloadRecogState(this: MixedInParser, newState: IParserState) {
|
||||
this.errors = newState.errors
|
||||
this.importLexerState(newState.lexerState)
|
||||
this.RULE_STACK = newState.RULE_STACK
|
||||
}
|
||||
|
||||
ruleInvocationStateUpdate(
|
||||
this: MixedInParser,
|
||||
shortName: number,
|
||||
fullName: string,
|
||||
idxInCallingRule: number
|
||||
): void {
|
||||
this.RULE_OCCURRENCE_STACK.push(idxInCallingRule)
|
||||
this.RULE_STACK.push(shortName)
|
||||
// NOOP when cst is disabled
|
||||
this.cstInvocationStateUpdate(fullName)
|
||||
}
|
||||
|
||||
isBackTracking(this: MixedInParser): boolean {
|
||||
return this.isBackTrackingStack.length !== 0
|
||||
}
|
||||
|
||||
getCurrRuleFullName(this: MixedInParser): string {
|
||||
const shortName = this.getLastExplicitRuleShortName()
|
||||
return this.shortRuleNameToFull[shortName]
|
||||
}
|
||||
|
||||
shortRuleNameToFullName(this: MixedInParser, shortName: number) {
|
||||
return this.shortRuleNameToFull[shortName]
|
||||
}
|
||||
|
||||
public isAtEndOfInput(this: MixedInParser): boolean {
|
||||
return this.tokenMatcher(this.LA(1), EOF)
|
||||
}
|
||||
|
||||
public reset(this: MixedInParser): void {
|
||||
this.resetLexerState()
|
||||
this.subruleIdx = 0
|
||||
this.isBackTrackingStack = []
|
||||
this.errors = []
|
||||
this.RULE_STACK = []
|
||||
// TODO: extract a specific reset for TreeBuilder trait
|
||||
this.CST_STACK = []
|
||||
this.RULE_OCCURRENCE_STACK = []
|
||||
}
|
||||
}
|
||||
471
_node_modules/chevrotain/src/parse/parser/traits/recoverable.ts
generated
Normal file
471
_node_modules/chevrotain/src/parse/parser/traits/recoverable.ts
generated
Normal file
@@ -0,0 +1,471 @@
|
||||
import {
|
||||
createTokenInstance,
|
||||
EOF,
|
||||
tokenMatcher
|
||||
} from "../../../scan/tokens_public"
|
||||
import {
|
||||
AbstractNextTerminalAfterProductionWalker,
|
||||
IFirstAfterRepetition
|
||||
} from "../../grammar/interpreter"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import dropRight from "lodash/dropRight"
|
||||
import flatten from "lodash/flatten"
|
||||
import map from "lodash/map"
|
||||
import find from "lodash/find"
|
||||
import has from "lodash/has"
|
||||
import includes from "lodash/includes"
|
||||
import clone from "lodash/clone"
|
||||
import {
|
||||
IParserConfig,
|
||||
IToken,
|
||||
ITokenGrammarPath,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import { MismatchedTokenException } from "../../exceptions_public"
|
||||
import { IN } from "../../constants"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser"
|
||||
|
||||
export const EOF_FOLLOW_KEY: any = {}
|
||||
|
||||
export interface IFollowKey {
|
||||
ruleName: string
|
||||
idxInCallingRule: number
|
||||
inRule: string
|
||||
}
|
||||
|
||||
export const IN_RULE_RECOVERY_EXCEPTION = "InRuleRecoveryException"
|
||||
|
||||
export class InRuleRecoveryException extends Error {
|
||||
constructor(message: string) {
|
||||
super(message)
|
||||
this.name = IN_RULE_RECOVERY_EXCEPTION
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This trait is responsible for the error recovery and fault tolerant logic
|
||||
*/
|
||||
export class Recoverable {
|
||||
recoveryEnabled: boolean
|
||||
firstAfterRepMap: Record<string, IFirstAfterRepetition>
|
||||
resyncFollows: Record<string, TokenType[]>
|
||||
|
||||
initRecoverable(config: IParserConfig) {
|
||||
this.firstAfterRepMap = {}
|
||||
this.resyncFollows = {}
|
||||
|
||||
this.recoveryEnabled = has(config, "recoveryEnabled")
|
||||
? (config.recoveryEnabled as boolean) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_PARSER_CONFIG.recoveryEnabled
|
||||
|
||||
// performance optimization, NOOP will be inlined which
|
||||
// effectively means that this optional feature does not exist
|
||||
// when not used.
|
||||
if (this.recoveryEnabled) {
|
||||
this.attemptInRepetitionRecovery = attemptInRepetitionRecovery
|
||||
}
|
||||
}
|
||||
|
||||
public getTokenToInsert(tokType: TokenType): IToken {
|
||||
const tokToInsert = createTokenInstance(
|
||||
tokType,
|
||||
"",
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN,
|
||||
NaN
|
||||
)
|
||||
tokToInsert.isInsertedInRecovery = true
|
||||
return tokToInsert
|
||||
}
|
||||
|
||||
public canTokenTypeBeInsertedInRecovery(tokType: TokenType): boolean {
|
||||
return true
|
||||
}
|
||||
|
||||
public canTokenTypeBeDeletedInRecovery(tokType: TokenType): boolean {
|
||||
return true
|
||||
}
|
||||
|
||||
tryInRepetitionRecovery(
|
||||
this: MixedInParser,
|
||||
grammarRule: Function,
|
||||
grammarRuleArgs: any[],
|
||||
lookAheadFunc: () => boolean,
|
||||
expectedTokType: TokenType
|
||||
): void {
|
||||
// TODO: can the resyncTokenType be cached?
|
||||
const reSyncTokType = this.findReSyncTokenType()
|
||||
const savedLexerState = this.exportLexerState()
|
||||
const resyncedTokens: IToken[] = []
|
||||
let passedResyncPoint = false
|
||||
|
||||
const nextTokenWithoutResync = this.LA(1)
|
||||
let currToken = this.LA(1)
|
||||
|
||||
const generateErrorMessage = () => {
|
||||
const previousToken = this.LA(0)
|
||||
// we are preemptively re-syncing before an error has been detected, therefor we must reproduce
|
||||
// the error that would have been thrown
|
||||
const msg = this.errorMessageProvider.buildMismatchTokenMessage({
|
||||
expected: expectedTokType,
|
||||
actual: nextTokenWithoutResync,
|
||||
previous: previousToken,
|
||||
ruleName: this.getCurrRuleFullName()
|
||||
})
|
||||
const error = new MismatchedTokenException(
|
||||
msg,
|
||||
nextTokenWithoutResync,
|
||||
this.LA(0)
|
||||
)
|
||||
// the first token here will be the original cause of the error, this is not part of the resyncedTokens property.
|
||||
error.resyncedTokens = dropRight(resyncedTokens)
|
||||
this.SAVE_ERROR(error)
|
||||
}
|
||||
|
||||
while (!passedResyncPoint) {
|
||||
// re-synced to a point where we can safely exit the repetition/
|
||||
if (this.tokenMatcher(currToken, expectedTokType)) {
|
||||
generateErrorMessage()
|
||||
return // must return here to avoid reverting the inputIdx
|
||||
} else if (lookAheadFunc.call(this)) {
|
||||
// we skipped enough tokens so we can resync right back into another iteration of the repetition grammar rule
|
||||
generateErrorMessage()
|
||||
// recursive invocation in other to support multiple re-syncs in the same top level repetition grammar rule
|
||||
grammarRule.apply(this, grammarRuleArgs)
|
||||
return // must return here to avoid reverting the inputIdx
|
||||
} else if (this.tokenMatcher(currToken, reSyncTokType)) {
|
||||
passedResyncPoint = true
|
||||
} else {
|
||||
currToken = this.SKIP_TOKEN()
|
||||
this.addToResyncTokens(currToken, resyncedTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// we were unable to find a CLOSER point to resync inside the Repetition, reset the state.
|
||||
// The parsing exception we were trying to prevent will happen in the NEXT parsing step. it may be handled by
|
||||
// "between rules" resync recovery later in the flow.
|
||||
this.importLexerState(savedLexerState)
|
||||
}
|
||||
|
||||
shouldInRepetitionRecoveryBeTried(
|
||||
this: MixedInParser,
|
||||
expectTokAfterLastMatch: TokenType,
|
||||
nextTokIdx: number,
|
||||
notStuck: boolean | undefined
|
||||
): boolean {
|
||||
// Edge case of arriving from a MANY repetition which is stuck
|
||||
// Attempting recovery in this case could cause an infinite loop
|
||||
if (notStuck === false) {
|
||||
return false
|
||||
}
|
||||
|
||||
// no need to recover, next token is what we expect...
|
||||
if (this.tokenMatcher(this.LA(1), expectTokAfterLastMatch)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// error recovery is disabled during backtracking as it can make the parser ignore a valid grammar path
|
||||
// and prefer some backtracking path that includes recovered errors.
|
||||
if (this.isBackTracking()) {
|
||||
return false
|
||||
}
|
||||
|
||||
// if we can perform inRule recovery (single token insertion or deletion) we always prefer that recovery algorithm
|
||||
// because if it works, it makes the least amount of changes to the input stream (greedy algorithm)
|
||||
//noinspection RedundantIfStatementJS
|
||||
if (
|
||||
this.canPerformInRuleRecovery(
|
||||
expectTokAfterLastMatch,
|
||||
this.getFollowsForInRuleRecovery(expectTokAfterLastMatch, nextTokIdx)
|
||||
)
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Error Recovery functionality
|
||||
getFollowsForInRuleRecovery(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
tokIdxInRule: number
|
||||
): TokenType[] {
|
||||
const grammarPath = this.getCurrentGrammarPath(tokType, tokIdxInRule)
|
||||
const follows = this.getNextPossibleTokenTypes(grammarPath)
|
||||
return follows
|
||||
}
|
||||
|
||||
tryInRuleRecovery(
|
||||
this: MixedInParser,
|
||||
expectedTokType: TokenType,
|
||||
follows: TokenType[]
|
||||
): IToken {
|
||||
if (this.canRecoverWithSingleTokenInsertion(expectedTokType, follows)) {
|
||||
const tokToInsert = this.getTokenToInsert(expectedTokType)
|
||||
return tokToInsert
|
||||
}
|
||||
|
||||
if (this.canRecoverWithSingleTokenDeletion(expectedTokType)) {
|
||||
const nextTok = this.SKIP_TOKEN()
|
||||
this.consumeToken()
|
||||
return nextTok
|
||||
}
|
||||
|
||||
throw new InRuleRecoveryException("sad sad panda")
|
||||
}
|
||||
|
||||
canPerformInRuleRecovery(
|
||||
this: MixedInParser,
|
||||
expectedToken: TokenType,
|
||||
follows: TokenType[]
|
||||
): boolean {
|
||||
return (
|
||||
this.canRecoverWithSingleTokenInsertion(expectedToken, follows) ||
|
||||
this.canRecoverWithSingleTokenDeletion(expectedToken)
|
||||
)
|
||||
}
|
||||
|
||||
canRecoverWithSingleTokenInsertion(
|
||||
this: MixedInParser,
|
||||
expectedTokType: TokenType,
|
||||
follows: TokenType[]
|
||||
): boolean {
|
||||
if (!this.canTokenTypeBeInsertedInRecovery(expectedTokType)) {
|
||||
return false
|
||||
}
|
||||
|
||||
// must know the possible following tokens to perform single token insertion
|
||||
if (isEmpty(follows)) {
|
||||
return false
|
||||
}
|
||||
|
||||
const mismatchedTok = this.LA(1)
|
||||
const isMisMatchedTokInFollows =
|
||||
find(follows, (possibleFollowsTokType: TokenType) => {
|
||||
return this.tokenMatcher(mismatchedTok, possibleFollowsTokType)
|
||||
}) !== undefined
|
||||
|
||||
return isMisMatchedTokInFollows
|
||||
}
|
||||
|
||||
canRecoverWithSingleTokenDeletion(
|
||||
this: MixedInParser,
|
||||
expectedTokType: TokenType
|
||||
): boolean {
|
||||
if (!this.canTokenTypeBeDeletedInRecovery(expectedTokType)) {
|
||||
return false
|
||||
}
|
||||
|
||||
const isNextTokenWhatIsExpected = this.tokenMatcher(
|
||||
this.LA(2),
|
||||
expectedTokType
|
||||
)
|
||||
return isNextTokenWhatIsExpected
|
||||
}
|
||||
|
||||
isInCurrentRuleReSyncSet(
|
||||
this: MixedInParser,
|
||||
tokenTypeIdx: TokenType
|
||||
): boolean {
|
||||
const followKey = this.getCurrFollowKey()
|
||||
const currentRuleReSyncSet = this.getFollowSetFromFollowKey(followKey)
|
||||
return includes(currentRuleReSyncSet, tokenTypeIdx)
|
||||
}
|
||||
|
||||
findReSyncTokenType(this: MixedInParser): TokenType {
|
||||
const allPossibleReSyncTokTypes = this.flattenFollowSet()
|
||||
// this loop will always terminate as EOF is always in the follow stack and also always (virtually) in the input
|
||||
let nextToken = this.LA(1)
|
||||
let k = 2
|
||||
while (true) {
|
||||
const foundMatch = find(allPossibleReSyncTokTypes, (resyncTokType) => {
|
||||
const canMatch = tokenMatcher(nextToken, resyncTokType)
|
||||
return canMatch
|
||||
})
|
||||
if (foundMatch !== undefined) {
|
||||
return foundMatch
|
||||
}
|
||||
nextToken = this.LA(k)
|
||||
k++
|
||||
}
|
||||
}
|
||||
|
||||
getCurrFollowKey(this: MixedInParser): IFollowKey {
|
||||
// the length is at least one as we always add the ruleName to the stack before invoking the rule.
|
||||
if (this.RULE_STACK.length === 1) {
|
||||
return EOF_FOLLOW_KEY
|
||||
}
|
||||
const currRuleShortName = this.getLastExplicitRuleShortName()
|
||||
const currRuleIdx = this.getLastExplicitRuleOccurrenceIndex()
|
||||
const prevRuleShortName = this.getPreviousExplicitRuleShortName()
|
||||
|
||||
return {
|
||||
ruleName: this.shortRuleNameToFullName(currRuleShortName),
|
||||
idxInCallingRule: currRuleIdx,
|
||||
inRule: this.shortRuleNameToFullName(prevRuleShortName)
|
||||
}
|
||||
}
|
||||
|
||||
buildFullFollowKeyStack(this: MixedInParser): IFollowKey[] {
|
||||
const explicitRuleStack = this.RULE_STACK
|
||||
const explicitOccurrenceStack = this.RULE_OCCURRENCE_STACK
|
||||
|
||||
return map(explicitRuleStack, (ruleName, idx) => {
|
||||
if (idx === 0) {
|
||||
return EOF_FOLLOW_KEY
|
||||
}
|
||||
return {
|
||||
ruleName: this.shortRuleNameToFullName(ruleName),
|
||||
idxInCallingRule: explicitOccurrenceStack[idx],
|
||||
inRule: this.shortRuleNameToFullName(explicitRuleStack[idx - 1])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
flattenFollowSet(this: MixedInParser): TokenType[] {
|
||||
const followStack = map(this.buildFullFollowKeyStack(), (currKey) => {
|
||||
return this.getFollowSetFromFollowKey(currKey)
|
||||
})
|
||||
return <any>flatten(followStack)
|
||||
}
|
||||
|
||||
getFollowSetFromFollowKey(
|
||||
this: MixedInParser,
|
||||
followKey: IFollowKey
|
||||
): TokenType[] {
|
||||
if (followKey === EOF_FOLLOW_KEY) {
|
||||
return [EOF]
|
||||
}
|
||||
|
||||
const followName =
|
||||
followKey.ruleName + followKey.idxInCallingRule + IN + followKey.inRule
|
||||
|
||||
return this.resyncFollows[followName]
|
||||
}
|
||||
|
||||
// It does not make any sense to include a virtual EOF token in the list of resynced tokens
|
||||
// as EOF does not really exist and thus does not contain any useful information (line/column numbers)
|
||||
addToResyncTokens(
|
||||
this: MixedInParser,
|
||||
token: IToken,
|
||||
resyncTokens: IToken[]
|
||||
): IToken[] {
|
||||
if (!this.tokenMatcher(token, EOF)) {
|
||||
resyncTokens.push(token)
|
||||
}
|
||||
return resyncTokens
|
||||
}
|
||||
|
||||
reSyncTo(this: MixedInParser, tokType: TokenType): IToken[] {
|
||||
const resyncedTokens: IToken[] = []
|
||||
let nextTok = this.LA(1)
|
||||
while (this.tokenMatcher(nextTok, tokType) === false) {
|
||||
nextTok = this.SKIP_TOKEN()
|
||||
this.addToResyncTokens(nextTok, resyncedTokens)
|
||||
}
|
||||
// the last token is not part of the error.
|
||||
return dropRight(resyncedTokens)
|
||||
}
|
||||
|
||||
attemptInRepetitionRecovery(
|
||||
this: MixedInParser,
|
||||
prodFunc: Function,
|
||||
args: any[],
|
||||
lookaheadFunc: () => boolean,
|
||||
dslMethodIdx: number,
|
||||
prodOccurrence: number,
|
||||
nextToksWalker: typeof AbstractNextTerminalAfterProductionWalker,
|
||||
notStuck?: boolean
|
||||
): void {
|
||||
// by default this is a NO-OP
|
||||
// The actual implementation is with the function(not method) below
|
||||
}
|
||||
|
||||
getCurrentGrammarPath(
|
||||
this: MixedInParser,
|
||||
tokType: TokenType,
|
||||
tokIdxInRule: number
|
||||
): ITokenGrammarPath {
|
||||
const pathRuleStack: string[] = this.getHumanReadableRuleStack()
|
||||
const pathOccurrenceStack: number[] = clone(this.RULE_OCCURRENCE_STACK)
|
||||
const grammarPath: any = {
|
||||
ruleStack: pathRuleStack,
|
||||
occurrenceStack: pathOccurrenceStack,
|
||||
lastTok: tokType,
|
||||
lastTokOccurrence: tokIdxInRule
|
||||
}
|
||||
|
||||
return grammarPath
|
||||
}
|
||||
getHumanReadableRuleStack(this: MixedInParser): string[] {
|
||||
return map(this.RULE_STACK, (currShortName) =>
|
||||
this.shortRuleNameToFullName(currShortName)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export function attemptInRepetitionRecovery(
|
||||
this: MixedInParser,
|
||||
prodFunc: Function,
|
||||
args: any[],
|
||||
lookaheadFunc: () => boolean,
|
||||
dslMethodIdx: number,
|
||||
prodOccurrence: number,
|
||||
nextToksWalker: typeof AbstractNextTerminalAfterProductionWalker,
|
||||
notStuck?: boolean
|
||||
): void {
|
||||
const key = this.getKeyForAutomaticLookahead(dslMethodIdx, prodOccurrence)
|
||||
let firstAfterRepInfo = this.firstAfterRepMap[key]
|
||||
if (firstAfterRepInfo === undefined) {
|
||||
const currRuleName = this.getCurrRuleFullName()
|
||||
const ruleGrammar = this.getGAstProductions()[currRuleName]
|
||||
const walker: AbstractNextTerminalAfterProductionWalker =
|
||||
new nextToksWalker(ruleGrammar, prodOccurrence)
|
||||
firstAfterRepInfo = walker.startWalking()
|
||||
this.firstAfterRepMap[key] = firstAfterRepInfo
|
||||
}
|
||||
|
||||
let expectTokAfterLastMatch = firstAfterRepInfo.token
|
||||
let nextTokIdx = firstAfterRepInfo.occurrence
|
||||
const isEndOfRule = firstAfterRepInfo.isEndOfRule
|
||||
|
||||
// special edge case of a TOP most repetition after which the input should END.
|
||||
// this will force an attempt for inRule recovery in that scenario.
|
||||
if (
|
||||
this.RULE_STACK.length === 1 &&
|
||||
isEndOfRule &&
|
||||
expectTokAfterLastMatch === undefined
|
||||
) {
|
||||
expectTokAfterLastMatch = EOF
|
||||
nextTokIdx = 1
|
||||
}
|
||||
|
||||
// We don't have anything to re-sync to...
|
||||
// this condition was extracted from `shouldInRepetitionRecoveryBeTried` to act as a type-guard
|
||||
if (expectTokAfterLastMatch === undefined || nextTokIdx === undefined) {
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
this.shouldInRepetitionRecoveryBeTried(
|
||||
expectTokAfterLastMatch,
|
||||
nextTokIdx,
|
||||
notStuck
|
||||
)
|
||||
) {
|
||||
// TODO: performance optimization: instead of passing the original args here, we modify
|
||||
// the args param (or create a new one) and make sure the lookahead func is explicitly provided
|
||||
// to avoid searching the cache for it once more.
|
||||
this.tryInRepetitionRecovery(
|
||||
prodFunc,
|
||||
args,
|
||||
lookaheadFunc,
|
||||
expectTokAfterLastMatch
|
||||
)
|
||||
}
|
||||
}
|
||||
278
_node_modules/chevrotain/src/parse/parser/traits/tree_builder.ts
generated
Normal file
278
_node_modules/chevrotain/src/parse/parser/traits/tree_builder.ts
generated
Normal file
@@ -0,0 +1,278 @@
|
||||
import {
|
||||
addNoneTerminalToCst,
|
||||
addTerminalToCst,
|
||||
setNodeLocationFull,
|
||||
setNodeLocationOnlyOffset
|
||||
} from "../../cst/cst"
|
||||
import noop from "lodash/noop"
|
||||
import has from "lodash/has"
|
||||
import keys from "lodash/keys"
|
||||
import isUndefined from "lodash/isUndefined"
|
||||
import {
|
||||
createBaseSemanticVisitorConstructor,
|
||||
createBaseVisitorConstructorWithDefaults
|
||||
} from "../../cst/cst_visitor"
|
||||
import {
|
||||
CstNode,
|
||||
CstNodeLocation,
|
||||
ICstVisitor,
|
||||
IParserConfig,
|
||||
IToken,
|
||||
nodeLocationTrackingOptions
|
||||
} from "@chevrotain/types"
|
||||
import { MixedInParser } from "./parser_traits"
|
||||
import { DEFAULT_PARSER_CONFIG } from "../parser"
|
||||
|
||||
/**
|
||||
* This trait is responsible for the CST building logic.
|
||||
*/
|
||||
export class TreeBuilder {
|
||||
outputCst: boolean
|
||||
CST_STACK: CstNode[]
|
||||
baseCstVisitorConstructor: Function
|
||||
baseCstVisitorWithDefaultsConstructor: Function
|
||||
|
||||
// dynamically assigned Methods
|
||||
setNodeLocationFromNode: (
|
||||
nodeLocation: CstNodeLocation,
|
||||
locationInformation: CstNodeLocation
|
||||
) => void
|
||||
setNodeLocationFromToken: (
|
||||
nodeLocation: CstNodeLocation,
|
||||
locationInformation: CstNodeLocation
|
||||
) => void
|
||||
cstPostRule: (this: MixedInParser, ruleCstNode: CstNode) => void
|
||||
|
||||
setInitialNodeLocation: (cstNode: CstNode) => void
|
||||
nodeLocationTracking: nodeLocationTrackingOptions
|
||||
|
||||
initTreeBuilder(this: MixedInParser, config: IParserConfig) {
|
||||
this.CST_STACK = []
|
||||
|
||||
// outputCst is no longer exposed/defined in the pubic API
|
||||
this.outputCst = (config as any).outputCst
|
||||
|
||||
this.nodeLocationTracking = has(config, "nodeLocationTracking")
|
||||
? (config.nodeLocationTracking as nodeLocationTrackingOptions) // assumes end user provides the correct config value/type
|
||||
: DEFAULT_PARSER_CONFIG.nodeLocationTracking
|
||||
|
||||
if (!this.outputCst) {
|
||||
this.cstInvocationStateUpdate = noop
|
||||
this.cstFinallyStateUpdate = noop
|
||||
this.cstPostTerminal = noop
|
||||
this.cstPostNonTerminal = noop
|
||||
this.cstPostRule = noop
|
||||
} else {
|
||||
if (/full/i.test(this.nodeLocationTracking)) {
|
||||
if (this.recoveryEnabled) {
|
||||
this.setNodeLocationFromToken = setNodeLocationFull
|
||||
this.setNodeLocationFromNode = setNodeLocationFull
|
||||
this.cstPostRule = noop
|
||||
this.setInitialNodeLocation = this.setInitialNodeLocationFullRecovery
|
||||
} else {
|
||||
this.setNodeLocationFromToken = noop
|
||||
this.setNodeLocationFromNode = noop
|
||||
this.cstPostRule = this.cstPostRuleFull
|
||||
this.setInitialNodeLocation = this.setInitialNodeLocationFullRegular
|
||||
}
|
||||
} else if (/onlyOffset/i.test(this.nodeLocationTracking)) {
|
||||
if (this.recoveryEnabled) {
|
||||
this.setNodeLocationFromToken = <any>setNodeLocationOnlyOffset
|
||||
this.setNodeLocationFromNode = <any>setNodeLocationOnlyOffset
|
||||
this.cstPostRule = noop
|
||||
this.setInitialNodeLocation =
|
||||
this.setInitialNodeLocationOnlyOffsetRecovery
|
||||
} else {
|
||||
this.setNodeLocationFromToken = noop
|
||||
this.setNodeLocationFromNode = noop
|
||||
this.cstPostRule = this.cstPostRuleOnlyOffset
|
||||
this.setInitialNodeLocation =
|
||||
this.setInitialNodeLocationOnlyOffsetRegular
|
||||
}
|
||||
} else if (/none/i.test(this.nodeLocationTracking)) {
|
||||
this.setNodeLocationFromToken = noop
|
||||
this.setNodeLocationFromNode = noop
|
||||
this.cstPostRule = noop
|
||||
this.setInitialNodeLocation = noop
|
||||
} else {
|
||||
throw Error(
|
||||
`Invalid <nodeLocationTracking> config option: "${config.nodeLocationTracking}"`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setInitialNodeLocationOnlyOffsetRecovery(
|
||||
this: MixedInParser,
|
||||
cstNode: any
|
||||
): void {
|
||||
cstNode.location = {
|
||||
startOffset: NaN,
|
||||
endOffset: NaN
|
||||
}
|
||||
}
|
||||
|
||||
setInitialNodeLocationOnlyOffsetRegular(
|
||||
this: MixedInParser,
|
||||
cstNode: any
|
||||
): void {
|
||||
cstNode.location = {
|
||||
// without error recovery the starting Location of a new CstNode is guaranteed
|
||||
// To be the next Token's startOffset (for valid inputs).
|
||||
// For invalid inputs there won't be any CSTOutput so this potential
|
||||
// inaccuracy does not matter
|
||||
startOffset: this.LA(1).startOffset,
|
||||
endOffset: NaN
|
||||
}
|
||||
}
|
||||
|
||||
setInitialNodeLocationFullRecovery(this: MixedInParser, cstNode: any): void {
|
||||
cstNode.location = {
|
||||
startOffset: NaN,
|
||||
startLine: NaN,
|
||||
startColumn: NaN,
|
||||
endOffset: NaN,
|
||||
endLine: NaN,
|
||||
endColumn: NaN
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see setInitialNodeLocationOnlyOffsetRegular for explanation why this work
|
||||
|
||||
* @param cstNode
|
||||
*/
|
||||
setInitialNodeLocationFullRegular(this: MixedInParser, cstNode: any): void {
|
||||
const nextToken = this.LA(1)
|
||||
cstNode.location = {
|
||||
startOffset: nextToken.startOffset,
|
||||
startLine: nextToken.startLine,
|
||||
startColumn: nextToken.startColumn,
|
||||
endOffset: NaN,
|
||||
endLine: NaN,
|
||||
endColumn: NaN
|
||||
}
|
||||
}
|
||||
|
||||
cstInvocationStateUpdate(this: MixedInParser, fullRuleName: string): void {
|
||||
const cstNode: CstNode = {
|
||||
name: fullRuleName,
|
||||
children: Object.create(null)
|
||||
}
|
||||
|
||||
this.setInitialNodeLocation(cstNode)
|
||||
this.CST_STACK.push(cstNode)
|
||||
}
|
||||
|
||||
cstFinallyStateUpdate(this: MixedInParser): void {
|
||||
this.CST_STACK.pop()
|
||||
}
|
||||
|
||||
cstPostRuleFull(this: MixedInParser, ruleCstNode: CstNode): void {
|
||||
// casts to `required<CstNodeLocation>` are safe because `cstPostRuleFull` should only be invoked when full location is enabled
|
||||
const prevToken = this.LA(0) as Required<CstNodeLocation>
|
||||
const loc = ruleCstNode.location as Required<CstNodeLocation>
|
||||
|
||||
// If this condition is true it means we consumed at least one Token
|
||||
// In this CstNode.
|
||||
if (loc.startOffset <= prevToken.startOffset === true) {
|
||||
loc.endOffset = prevToken.endOffset
|
||||
loc.endLine = prevToken.endLine
|
||||
loc.endColumn = prevToken.endColumn
|
||||
}
|
||||
// "empty" CstNode edge case
|
||||
else {
|
||||
loc.startOffset = NaN
|
||||
loc.startLine = NaN
|
||||
loc.startColumn = NaN
|
||||
}
|
||||
}
|
||||
|
||||
cstPostRuleOnlyOffset(this: MixedInParser, ruleCstNode: CstNode): void {
|
||||
const prevToken = this.LA(0)
|
||||
// `location' is not null because `cstPostRuleOnlyOffset` will only be invoked when location tracking is enabled.
|
||||
const loc = ruleCstNode.location!
|
||||
|
||||
// If this condition is true it means we consumed at least one Token
|
||||
// In this CstNode.
|
||||
if (loc.startOffset <= prevToken.startOffset === true) {
|
||||
loc.endOffset = prevToken.endOffset
|
||||
}
|
||||
// "empty" CstNode edge case
|
||||
else {
|
||||
loc.startOffset = NaN
|
||||
}
|
||||
}
|
||||
|
||||
cstPostTerminal(
|
||||
this: MixedInParser,
|
||||
key: string,
|
||||
consumedToken: IToken
|
||||
): void {
|
||||
const rootCst = this.CST_STACK[this.CST_STACK.length - 1]
|
||||
addTerminalToCst(rootCst, consumedToken, key)
|
||||
// This is only used when **both** error recovery and CST Output are enabled.
|
||||
this.setNodeLocationFromToken(rootCst.location!, <any>consumedToken)
|
||||
}
|
||||
|
||||
cstPostNonTerminal(
|
||||
this: MixedInParser,
|
||||
ruleCstResult: CstNode,
|
||||
ruleName: string
|
||||
): void {
|
||||
const preCstNode = this.CST_STACK[this.CST_STACK.length - 1]
|
||||
addNoneTerminalToCst(preCstNode, ruleName, ruleCstResult)
|
||||
// This is only used when **both** error recovery and CST Output are enabled.
|
||||
this.setNodeLocationFromNode(preCstNode.location!, ruleCstResult.location!)
|
||||
}
|
||||
|
||||
getBaseCstVisitorConstructor<IN = any, OUT = any>(
|
||||
this: MixedInParser
|
||||
): {
|
||||
new (...args: any[]): ICstVisitor<IN, OUT>
|
||||
} {
|
||||
if (isUndefined(this.baseCstVisitorConstructor)) {
|
||||
const newBaseCstVisitorConstructor = createBaseSemanticVisitorConstructor(
|
||||
this.className,
|
||||
keys(this.gastProductionsCache)
|
||||
)
|
||||
this.baseCstVisitorConstructor = newBaseCstVisitorConstructor
|
||||
return newBaseCstVisitorConstructor
|
||||
}
|
||||
|
||||
return <any>this.baseCstVisitorConstructor
|
||||
}
|
||||
|
||||
getBaseCstVisitorConstructorWithDefaults<IN = any, OUT = any>(
|
||||
this: MixedInParser
|
||||
): {
|
||||
new (...args: any[]): ICstVisitor<IN, OUT>
|
||||
} {
|
||||
if (isUndefined(this.baseCstVisitorWithDefaultsConstructor)) {
|
||||
const newConstructor = createBaseVisitorConstructorWithDefaults(
|
||||
this.className,
|
||||
keys(this.gastProductionsCache),
|
||||
this.getBaseCstVisitorConstructor()
|
||||
)
|
||||
this.baseCstVisitorWithDefaultsConstructor = newConstructor
|
||||
return newConstructor
|
||||
}
|
||||
|
||||
return <any>this.baseCstVisitorWithDefaultsConstructor
|
||||
}
|
||||
|
||||
getLastExplicitRuleShortName(this: MixedInParser): number {
|
||||
const ruleStack = this.RULE_STACK
|
||||
return ruleStack[ruleStack.length - 1]
|
||||
}
|
||||
|
||||
getPreviousExplicitRuleShortName(this: MixedInParser): number {
|
||||
const ruleStack = this.RULE_STACK
|
||||
return ruleStack[ruleStack.length - 2]
|
||||
}
|
||||
|
||||
getLastExplicitRuleOccurrenceIndex(this: MixedInParser): number {
|
||||
const occurrenceStack = this.RULE_OCCURRENCE_STACK
|
||||
return occurrenceStack[occurrenceStack.length - 1]
|
||||
}
|
||||
}
|
||||
16
_node_modules/chevrotain/src/parse/parser/types.ts
generated
Normal file
16
_node_modules/chevrotain/src/parse/parser/types.ts
generated
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Helper common type definitions
|
||||
* Particularly useful when expending the public API
|
||||
* to include additional **internal** properties.
|
||||
*/
|
||||
import { IParserConfig, ParserMethod } from "@chevrotain/types"
|
||||
|
||||
export type ParserMethodInternal<ARGS extends unknown[], R> = ParserMethod<
|
||||
ARGS,
|
||||
R
|
||||
> & {
|
||||
ruleName: string
|
||||
originalGrammarAction: Function
|
||||
}
|
||||
|
||||
export type IParserConfigInternal = IParserConfig & { outputCst: boolean }
|
||||
28
_node_modules/chevrotain/src/parse/parser/utils/apply_mixins.ts
generated
Normal file
28
_node_modules/chevrotain/src/parse/parser/utils/apply_mixins.ts
generated
Normal file
@@ -0,0 +1,28 @@
|
||||
export function applyMixins(derivedCtor: any, baseCtors: any[]) {
|
||||
baseCtors.forEach((baseCtor) => {
|
||||
const baseProto = baseCtor.prototype
|
||||
Object.getOwnPropertyNames(baseProto).forEach((propName) => {
|
||||
if (propName === "constructor") {
|
||||
return
|
||||
}
|
||||
|
||||
const basePropDescriptor = Object.getOwnPropertyDescriptor(
|
||||
baseProto,
|
||||
propName
|
||||
)
|
||||
// Handle Accessors
|
||||
if (
|
||||
basePropDescriptor &&
|
||||
(basePropDescriptor.get || basePropDescriptor.set)
|
||||
) {
|
||||
Object.defineProperty(
|
||||
derivedCtor.prototype,
|
||||
propName,
|
||||
basePropDescriptor
|
||||
)
|
||||
} else {
|
||||
derivedCtor.prototype[propName] = baseCtor.prototype[propName]
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
1169
_node_modules/chevrotain/src/scan/lexer.ts
generated
Normal file
1169
_node_modules/chevrotain/src/scan/lexer.ts
generated
Normal file
File diff suppressed because it is too large
Load Diff
21
_node_modules/chevrotain/src/scan/lexer_errors_public.ts
generated
Normal file
21
_node_modules/chevrotain/src/scan/lexer_errors_public.ts
generated
Normal file
@@ -0,0 +1,21 @@
|
||||
import { ILexerErrorMessageProvider, IToken } from "@chevrotain/types"
|
||||
|
||||
export const defaultLexerErrorProvider: ILexerErrorMessageProvider = {
|
||||
buildUnableToPopLexerModeMessage(token: IToken): string {
|
||||
return `Unable to pop Lexer Mode after encountering Token ->${token.image}<- The Mode Stack is empty`
|
||||
},
|
||||
|
||||
buildUnexpectedCharactersMessage(
|
||||
fullText: string,
|
||||
startOffset: number,
|
||||
length: number,
|
||||
line?: number,
|
||||
column?: number
|
||||
): string {
|
||||
return (
|
||||
`unexpected character: ->${fullText.charAt(
|
||||
startOffset
|
||||
)}<- at offset: ${startOffset},` + ` skipped ${length} characters.`
|
||||
)
|
||||
}
|
||||
}
|
||||
969
_node_modules/chevrotain/src/scan/lexer_public.ts
generated
Normal file
969
_node_modules/chevrotain/src/scan/lexer_public.ts
generated
Normal file
@@ -0,0 +1,969 @@
|
||||
import {
|
||||
analyzeTokenTypes,
|
||||
charCodeToOptimizedIndex,
|
||||
cloneEmptyGroups,
|
||||
DEFAULT_MODE,
|
||||
IAnalyzeResult,
|
||||
IPatternConfig,
|
||||
LineTerminatorOptimizedTester,
|
||||
performRuntimeChecks,
|
||||
performWarningRuntimeChecks,
|
||||
SUPPORT_STICKY,
|
||||
validatePatterns
|
||||
} from "./lexer"
|
||||
import noop from "lodash/noop"
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import isArray from "lodash/isArray"
|
||||
import last from "lodash/last"
|
||||
import reject from "lodash/reject"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import keys from "lodash/keys"
|
||||
import isUndefined from "lodash/isUndefined"
|
||||
import identity from "lodash/identity"
|
||||
import assign from "lodash/assign"
|
||||
import reduce from "lodash/reduce"
|
||||
import clone from "lodash/clone"
|
||||
import { PRINT_WARNING, timer, toFastProperties } from "@chevrotain/utils"
|
||||
import { augmentTokenTypes } from "./tokens"
|
||||
import {
|
||||
CustomPatternMatcherFunc,
|
||||
CustomPatternMatcherReturn,
|
||||
ILexerConfig,
|
||||
ILexerDefinitionError,
|
||||
ILexingError,
|
||||
IMultiModeLexerDefinition,
|
||||
IToken,
|
||||
TokenType
|
||||
} from "@chevrotain/types"
|
||||
import { defaultLexerErrorProvider } from "./lexer_errors_public"
|
||||
import { clearRegExpParserCache } from "./reg_exp_parser"
|
||||
|
||||
export interface ILexingResult {
|
||||
tokens: IToken[]
|
||||
groups: { [groupName: string]: IToken[] }
|
||||
errors: ILexingError[]
|
||||
}
|
||||
|
||||
export enum LexerDefinitionErrorType {
|
||||
MISSING_PATTERN,
|
||||
INVALID_PATTERN,
|
||||
EOI_ANCHOR_FOUND,
|
||||
UNSUPPORTED_FLAGS_FOUND,
|
||||
DUPLICATE_PATTERNS_FOUND,
|
||||
INVALID_GROUP_TYPE_FOUND,
|
||||
PUSH_MODE_DOES_NOT_EXIST,
|
||||
MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE,
|
||||
MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY,
|
||||
MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST,
|
||||
LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED,
|
||||
SOI_ANCHOR_FOUND,
|
||||
EMPTY_MATCH_PATTERN,
|
||||
NO_LINE_BREAKS_FLAGS,
|
||||
UNREACHABLE_PATTERN,
|
||||
IDENTIFY_TERMINATOR,
|
||||
CUSTOM_LINE_BREAK,
|
||||
MULTI_MODE_LEXER_LONGER_ALT_NOT_IN_CURRENT_MODE
|
||||
}
|
||||
|
||||
export interface IRegExpExec {
|
||||
exec: CustomPatternMatcherFunc
|
||||
}
|
||||
|
||||
const DEFAULT_LEXER_CONFIG: Required<ILexerConfig> = {
|
||||
deferDefinitionErrorsHandling: false,
|
||||
positionTracking: "full",
|
||||
lineTerminatorsPattern: /\n|\r\n?/g,
|
||||
lineTerminatorCharacters: ["\n", "\r"],
|
||||
ensureOptimizations: false,
|
||||
safeMode: false,
|
||||
errorMessageProvider: defaultLexerErrorProvider,
|
||||
traceInitPerf: false,
|
||||
skipValidations: false,
|
||||
recoveryEnabled: true
|
||||
}
|
||||
|
||||
Object.freeze(DEFAULT_LEXER_CONFIG)
|
||||
|
||||
export class Lexer {
|
||||
public static SKIPPED =
|
||||
"This marks a skipped Token pattern, this means each token identified by it will" +
|
||||
"be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace."
|
||||
|
||||
public static NA = /NOT_APPLICABLE/
|
||||
public lexerDefinitionErrors: ILexerDefinitionError[] = []
|
||||
public lexerDefinitionWarning: ILexerDefinitionError[] = []
|
||||
|
||||
protected patternIdxToConfig: Record<string, IPatternConfig[]> = {}
|
||||
protected charCodeToPatternIdxToConfig: {
|
||||
[modeName: string]: { [charCode: number]: IPatternConfig[] }
|
||||
} = {}
|
||||
|
||||
protected modes: string[] = []
|
||||
protected defaultMode!: string
|
||||
protected emptyGroups: { [groupName: string]: IToken } = {}
|
||||
|
||||
private config: Required<ILexerConfig>
|
||||
private trackStartLines: boolean = true
|
||||
private trackEndLines: boolean = true
|
||||
private hasCustom: boolean = false
|
||||
private canModeBeOptimized: Record<string, boolean> = {}
|
||||
|
||||
private traceInitPerf!: boolean | number
|
||||
private traceInitMaxIdent!: number
|
||||
private traceInitIndent: number
|
||||
|
||||
constructor(
|
||||
protected lexerDefinition: TokenType[] | IMultiModeLexerDefinition,
|
||||
config: ILexerConfig = DEFAULT_LEXER_CONFIG
|
||||
) {
|
||||
if (typeof config === "boolean") {
|
||||
throw Error(
|
||||
"The second argument to the Lexer constructor is now an ILexerConfig Object.\n" +
|
||||
"a boolean 2nd argument is no longer supported"
|
||||
)
|
||||
}
|
||||
|
||||
// todo: defaults func?
|
||||
this.config = assign({}, DEFAULT_LEXER_CONFIG, config) as any
|
||||
|
||||
const traceInitVal = this.config.traceInitPerf
|
||||
if (traceInitVal === true) {
|
||||
this.traceInitMaxIdent = Infinity
|
||||
this.traceInitPerf = true
|
||||
} else if (typeof traceInitVal === "number") {
|
||||
this.traceInitMaxIdent = traceInitVal
|
||||
this.traceInitPerf = true
|
||||
}
|
||||
this.traceInitIndent = -1
|
||||
|
||||
this.TRACE_INIT("Lexer Constructor", () => {
|
||||
let actualDefinition!: IMultiModeLexerDefinition
|
||||
let hasOnlySingleMode = true
|
||||
this.TRACE_INIT("Lexer Config handling", () => {
|
||||
if (
|
||||
this.config.lineTerminatorsPattern ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorsPattern
|
||||
) {
|
||||
// optimized built-in implementation for the defaults definition of lineTerminators
|
||||
this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester
|
||||
} else {
|
||||
if (
|
||||
this.config.lineTerminatorCharacters ===
|
||||
DEFAULT_LEXER_CONFIG.lineTerminatorCharacters
|
||||
) {
|
||||
throw Error(
|
||||
"Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" +
|
||||
"\tFor details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (config.safeMode && config.ensureOptimizations) {
|
||||
throw Error(
|
||||
'"safeMode" and "ensureOptimizations" flags are mutually exclusive.'
|
||||
)
|
||||
}
|
||||
|
||||
this.trackStartLines = /full|onlyStart/i.test(
|
||||
this.config.positionTracking
|
||||
)
|
||||
this.trackEndLines = /full/i.test(this.config.positionTracking)
|
||||
|
||||
// Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition.
|
||||
if (isArray(lexerDefinition)) {
|
||||
actualDefinition = {
|
||||
modes: { defaultMode: clone(lexerDefinition) },
|
||||
defaultMode: DEFAULT_MODE
|
||||
}
|
||||
} else {
|
||||
// no conversion needed, input should already be a IMultiModeLexerDefinition
|
||||
hasOnlySingleMode = false
|
||||
actualDefinition = clone(<IMultiModeLexerDefinition>lexerDefinition)
|
||||
}
|
||||
})
|
||||
|
||||
if (this.config.skipValidations === false) {
|
||||
this.TRACE_INIT("performRuntimeChecks", () => {
|
||||
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
|
||||
performRuntimeChecks(
|
||||
actualDefinition,
|
||||
this.trackStartLines,
|
||||
this.config.lineTerminatorCharacters
|
||||
)
|
||||
)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("performWarningRuntimeChecks", () => {
|
||||
this.lexerDefinitionWarning = this.lexerDefinitionWarning.concat(
|
||||
performWarningRuntimeChecks(
|
||||
actualDefinition,
|
||||
this.trackStartLines,
|
||||
this.config.lineTerminatorCharacters
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// for extra robustness to avoid throwing an none informative error message
|
||||
actualDefinition.modes = actualDefinition.modes
|
||||
? actualDefinition.modes
|
||||
: {}
|
||||
|
||||
// an error of undefined TokenTypes will be detected in "performRuntimeChecks" above.
|
||||
// this transformation is to increase robustness in the case of partially invalid lexer definition.
|
||||
forEach(actualDefinition.modes, (currModeValue, currModeName) => {
|
||||
actualDefinition.modes[currModeName] = reject<TokenType>(
|
||||
currModeValue,
|
||||
(currTokType) => isUndefined(currTokType)
|
||||
)
|
||||
})
|
||||
|
||||
const allModeNames = keys(actualDefinition.modes)
|
||||
|
||||
forEach(
|
||||
actualDefinition.modes,
|
||||
(currModDef: TokenType[], currModName) => {
|
||||
this.TRACE_INIT(`Mode: <${currModName}> processing`, () => {
|
||||
this.modes.push(currModName)
|
||||
|
||||
if (this.config.skipValidations === false) {
|
||||
this.TRACE_INIT(`validatePatterns`, () => {
|
||||
this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat(
|
||||
validatePatterns(currModDef, allModeNames)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// If definition errors were encountered, the analysis phase may fail unexpectedly/
|
||||
// Considering a lexer with definition errors may never be used, there is no point
|
||||
// to performing the analysis anyhow...
|
||||
if (isEmpty(this.lexerDefinitionErrors)) {
|
||||
augmentTokenTypes(currModDef)
|
||||
|
||||
let currAnalyzeResult!: IAnalyzeResult
|
||||
this.TRACE_INIT(`analyzeTokenTypes`, () => {
|
||||
currAnalyzeResult = analyzeTokenTypes(currModDef, {
|
||||
lineTerminatorCharacters:
|
||||
this.config.lineTerminatorCharacters,
|
||||
positionTracking: config.positionTracking,
|
||||
ensureOptimizations: config.ensureOptimizations,
|
||||
safeMode: config.safeMode,
|
||||
tracer: this.TRACE_INIT
|
||||
})
|
||||
})
|
||||
|
||||
this.patternIdxToConfig[currModName] =
|
||||
currAnalyzeResult.patternIdxToConfig
|
||||
|
||||
this.charCodeToPatternIdxToConfig[currModName] =
|
||||
currAnalyzeResult.charCodeToPatternIdxToConfig
|
||||
|
||||
this.emptyGroups = assign(
|
||||
{},
|
||||
this.emptyGroups,
|
||||
currAnalyzeResult.emptyGroups
|
||||
) as any
|
||||
|
||||
this.hasCustom = currAnalyzeResult.hasCustom || this.hasCustom
|
||||
|
||||
this.canModeBeOptimized[currModName] =
|
||||
currAnalyzeResult.canBeOptimized
|
||||
}
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
this.defaultMode = actualDefinition.defaultMode
|
||||
|
||||
if (
|
||||
!isEmpty(this.lexerDefinitionErrors) &&
|
||||
!this.config.deferDefinitionErrorsHandling
|
||||
) {
|
||||
const allErrMessages = map(this.lexerDefinitionErrors, (error) => {
|
||||
return error.message
|
||||
})
|
||||
const allErrMessagesString = allErrMessages.join(
|
||||
"-----------------------\n"
|
||||
)
|
||||
throw new Error(
|
||||
"Errors detected in definition of Lexer:\n" + allErrMessagesString
|
||||
)
|
||||
}
|
||||
|
||||
// Only print warning if there are no errors, This will avoid pl
|
||||
forEach(this.lexerDefinitionWarning, (warningDescriptor) => {
|
||||
PRINT_WARNING(warningDescriptor.message)
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Choosing sub-methods implementations", () => {
|
||||
// Choose the relevant internal implementations for this specific parser.
|
||||
// These implementations should be in-lined by the JavaScript engine
|
||||
// to provide optimal performance in each scenario.
|
||||
if (SUPPORT_STICKY) {
|
||||
this.chopInput = <any>identity
|
||||
this.match = this.matchWithTest
|
||||
} else {
|
||||
this.updateLastIndex = noop
|
||||
this.match = this.matchWithExec
|
||||
}
|
||||
|
||||
if (hasOnlySingleMode) {
|
||||
this.handleModes = noop
|
||||
}
|
||||
|
||||
if (this.trackStartLines === false) {
|
||||
this.computeNewColumn = identity
|
||||
}
|
||||
|
||||
if (this.trackEndLines === false) {
|
||||
this.updateTokenEndLineColumnLocation = noop
|
||||
}
|
||||
|
||||
if (/full/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createFullToken
|
||||
} else if (/onlyStart/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createStartOnlyToken
|
||||
} else if (/onlyOffset/i.test(this.config.positionTracking)) {
|
||||
this.createTokenInstance = this.createOffsetOnlyToken
|
||||
} else {
|
||||
throw Error(
|
||||
`Invalid <positionTracking> config option: "${this.config.positionTracking}"`
|
||||
)
|
||||
}
|
||||
|
||||
if (this.hasCustom) {
|
||||
this.addToken = this.addTokenUsingPush
|
||||
this.handlePayload = this.handlePayloadWithCustom
|
||||
} else {
|
||||
this.addToken = this.addTokenUsingMemberAccess
|
||||
this.handlePayload = this.handlePayloadNoCustom
|
||||
}
|
||||
})
|
||||
|
||||
this.TRACE_INIT("Failed Optimization Warnings", () => {
|
||||
const unOptimizedModes = reduce(
|
||||
this.canModeBeOptimized,
|
||||
(cannotBeOptimized, canBeOptimized, modeName) => {
|
||||
if (canBeOptimized === false) {
|
||||
cannotBeOptimized.push(modeName)
|
||||
}
|
||||
return cannotBeOptimized
|
||||
},
|
||||
[] as string[]
|
||||
)
|
||||
|
||||
if (config.ensureOptimizations && !isEmpty(unOptimizedModes)) {
|
||||
throw Error(
|
||||
`Lexer Modes: < ${unOptimizedModes.join(
|
||||
", "
|
||||
)} > cannot be optimized.\n` +
|
||||
'\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' +
|
||||
"\t Or inspect the console log for details on how to resolve these issues."
|
||||
)
|
||||
}
|
||||
})
|
||||
|
||||
this.TRACE_INIT("clearRegExpParserCache", () => {
|
||||
clearRegExpParserCache()
|
||||
})
|
||||
|
||||
this.TRACE_INIT("toFastProperties", () => {
|
||||
toFastProperties(this)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
public tokenize(
|
||||
text: string,
|
||||
initialMode: string = this.defaultMode
|
||||
): ILexingResult {
|
||||
if (!isEmpty(this.lexerDefinitionErrors)) {
|
||||
const allErrMessages = map(this.lexerDefinitionErrors, (error) => {
|
||||
return error.message
|
||||
})
|
||||
const allErrMessagesString = allErrMessages.join(
|
||||
"-----------------------\n"
|
||||
)
|
||||
throw new Error(
|
||||
"Unable to Tokenize because Errors detected in definition of Lexer:\n" +
|
||||
allErrMessagesString
|
||||
)
|
||||
}
|
||||
|
||||
return this.tokenizeInternal(text, initialMode)
|
||||
}
|
||||
|
||||
// There is quite a bit of duplication between this and "tokenizeInternalLazy"
|
||||
// This is intentional due to performance considerations.
|
||||
// this method also used quite a bit of `!` none null assertions because it is too optimized
|
||||
// for `tsc` to always understand it is "safe"
|
||||
private tokenizeInternal(text: string, initialMode: string): ILexingResult {
|
||||
let i,
|
||||
j,
|
||||
k,
|
||||
matchAltImage,
|
||||
longerAlt,
|
||||
matchedImage: string | null,
|
||||
payload,
|
||||
altPayload,
|
||||
imageLength,
|
||||
group,
|
||||
tokType,
|
||||
newToken: IToken,
|
||||
errLength,
|
||||
droppedChar,
|
||||
msg,
|
||||
match
|
||||
const orgText = text
|
||||
const orgLength = orgText.length
|
||||
let offset = 0
|
||||
let matchedTokensIndex = 0
|
||||
// initializing the tokensArray to the "guessed" size.
|
||||
// guessing too little will still reduce the number of array re-sizes on pushes.
|
||||
// guessing too large (Tested by guessing x4 too large) may cost a bit more of memory
|
||||
// but would still have a faster runtime by avoiding (All but one) array resizing.
|
||||
const guessedNumberOfTokens = this.hasCustom
|
||||
? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements.
|
||||
: Math.floor(text.length / 10)
|
||||
const matchedTokens = new Array(guessedNumberOfTokens)
|
||||
const errors: ILexingError[] = []
|
||||
let line = this.trackStartLines ? 1 : undefined
|
||||
let column = this.trackStartLines ? 1 : undefined
|
||||
const groups: any = cloneEmptyGroups(this.emptyGroups)
|
||||
const trackLines = this.trackStartLines
|
||||
const lineTerminatorPattern = this.config.lineTerminatorsPattern
|
||||
|
||||
let currModePatternsLength = 0
|
||||
let patternIdxToConfig: IPatternConfig[] = []
|
||||
let currCharCodeToPatternIdxToConfig: {
|
||||
[charCode: number]: IPatternConfig[]
|
||||
} = []
|
||||
|
||||
const modeStack: string[] = []
|
||||
|
||||
const emptyArray: IPatternConfig[] = []
|
||||
Object.freeze(emptyArray)
|
||||
let getPossiblePatterns!: (charCode: number) => IPatternConfig[]
|
||||
|
||||
function getPossiblePatternsSlow() {
|
||||
return patternIdxToConfig
|
||||
}
|
||||
|
||||
function getPossiblePatternsOptimized(charCode: number): IPatternConfig[] {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(charCode)
|
||||
const possiblePatterns =
|
||||
currCharCodeToPatternIdxToConfig[optimizedCharIdx]
|
||||
if (possiblePatterns === undefined) {
|
||||
return emptyArray
|
||||
} else {
|
||||
return possiblePatterns
|
||||
}
|
||||
}
|
||||
|
||||
const pop_mode = (popToken: IToken) => {
|
||||
// TODO: perhaps avoid this error in the edge case there is no more input?
|
||||
if (
|
||||
modeStack.length === 1 &&
|
||||
// if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition"
|
||||
// So no error should occur.
|
||||
popToken.tokenType.PUSH_MODE === undefined
|
||||
) {
|
||||
// if we try to pop the last mode there lexer will no longer have ANY mode.
|
||||
// thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode.
|
||||
const msg =
|
||||
this.config.errorMessageProvider.buildUnableToPopLexerModeMessage(
|
||||
popToken
|
||||
)
|
||||
|
||||
errors.push({
|
||||
offset: popToken.startOffset,
|
||||
line: popToken.startLine,
|
||||
column: popToken.startColumn,
|
||||
length: popToken.image.length,
|
||||
message: msg
|
||||
})
|
||||
} else {
|
||||
modeStack.pop()
|
||||
const newMode = last(modeStack)!
|
||||
patternIdxToConfig = this.patternIdxToConfig[newMode]
|
||||
currCharCodeToPatternIdxToConfig =
|
||||
this.charCodeToPatternIdxToConfig[newMode]
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
const modeCanBeOptimized =
|
||||
this.canModeBeOptimized[newMode] && this.config.safeMode === false
|
||||
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized
|
||||
} else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function push_mode(this: Lexer, newMode: string) {
|
||||
modeStack.push(newMode)
|
||||
currCharCodeToPatternIdxToConfig =
|
||||
this.charCodeToPatternIdxToConfig[newMode]
|
||||
|
||||
patternIdxToConfig = this.patternIdxToConfig[newMode]
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
|
||||
currModePatternsLength = patternIdxToConfig.length
|
||||
const modeCanBeOptimized =
|
||||
this.canModeBeOptimized[newMode] && this.config.safeMode === false
|
||||
|
||||
if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) {
|
||||
getPossiblePatterns = getPossiblePatternsOptimized
|
||||
} else {
|
||||
getPossiblePatterns = getPossiblePatternsSlow
|
||||
}
|
||||
}
|
||||
|
||||
// this pattern seems to avoid a V8 de-optimization, although that de-optimization does not
|
||||
// seem to matter performance wise.
|
||||
push_mode.call(this, initialMode)
|
||||
|
||||
let currConfig!: IPatternConfig
|
||||
|
||||
const recoveryEnabled = this.config.recoveryEnabled
|
||||
|
||||
while (offset < orgLength) {
|
||||
matchedImage = null
|
||||
|
||||
const nextCharCode = orgText.charCodeAt(offset)
|
||||
const chosenPatternIdxToConfig = getPossiblePatterns(nextCharCode)
|
||||
const chosenPatternsLength = chosenPatternIdxToConfig.length
|
||||
|
||||
for (i = 0; i < chosenPatternsLength; i++) {
|
||||
currConfig = chosenPatternIdxToConfig[i]
|
||||
const currPattern = currConfig.pattern
|
||||
payload = null
|
||||
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
const singleCharCode = currConfig.short
|
||||
if (singleCharCode !== false) {
|
||||
if (nextCharCode === singleCharCode) {
|
||||
// single character string
|
||||
matchedImage = currPattern as string
|
||||
}
|
||||
} else if (currConfig.isCustom === true) {
|
||||
match = (currPattern as IRegExpExec).exec(
|
||||
orgText,
|
||||
offset,
|
||||
matchedTokens,
|
||||
groups
|
||||
)
|
||||
if (match !== null) {
|
||||
matchedImage = match[0]
|
||||
if ((match as CustomPatternMatcherReturn).payload !== undefined) {
|
||||
payload = (match as CustomPatternMatcherReturn).payload
|
||||
}
|
||||
} else {
|
||||
matchedImage = null
|
||||
}
|
||||
} else {
|
||||
this.updateLastIndex(currPattern as RegExp, offset)
|
||||
matchedImage = this.match(currPattern as RegExp, text, offset)
|
||||
}
|
||||
|
||||
if (matchedImage !== null) {
|
||||
// even though this pattern matched we must try a another longer alternative.
|
||||
// this can be used to prioritize keywords over identifiers
|
||||
longerAlt = currConfig.longerAlt
|
||||
if (longerAlt !== undefined) {
|
||||
// TODO: micro optimize, avoid extra prop access
|
||||
// by saving/linking longerAlt on the original config?
|
||||
const longerAltLength = longerAlt.length
|
||||
for (k = 0; k < longerAltLength; k++) {
|
||||
const longerAltConfig = patternIdxToConfig[longerAlt[k]]
|
||||
const longerAltPattern = longerAltConfig.pattern
|
||||
altPayload = null
|
||||
|
||||
// single Char can never be a longer alt so no need to test it.
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
if (longerAltConfig.isCustom === true) {
|
||||
match = (longerAltPattern as IRegExpExec).exec(
|
||||
orgText,
|
||||
offset,
|
||||
matchedTokens,
|
||||
groups
|
||||
)
|
||||
if (match !== null) {
|
||||
matchAltImage = match[0]
|
||||
if (
|
||||
(match as CustomPatternMatcherReturn).payload !== undefined
|
||||
) {
|
||||
altPayload = (match as CustomPatternMatcherReturn).payload
|
||||
}
|
||||
} else {
|
||||
matchAltImage = null
|
||||
}
|
||||
} else {
|
||||
this.updateLastIndex(longerAltPattern as RegExp, offset)
|
||||
matchAltImage = this.match(
|
||||
longerAltPattern as RegExp,
|
||||
text,
|
||||
offset
|
||||
)
|
||||
}
|
||||
|
||||
if (matchAltImage && matchAltImage.length > matchedImage.length) {
|
||||
matchedImage = matchAltImage
|
||||
payload = altPayload
|
||||
currConfig = longerAltConfig
|
||||
// Exit the loop early after matching one of the longer alternatives
|
||||
// The first matched alternative takes precedence
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// successful match
|
||||
if (matchedImage !== null) {
|
||||
imageLength = matchedImage.length
|
||||
group = currConfig.group
|
||||
if (group !== undefined) {
|
||||
tokType = currConfig.tokenTypeIdx
|
||||
// TODO: "offset + imageLength" and the new column may be computed twice in case of "full" location information inside
|
||||
// createFullToken method
|
||||
newToken = this.createTokenInstance(
|
||||
matchedImage,
|
||||
offset,
|
||||
tokType,
|
||||
currConfig.tokenType,
|
||||
line,
|
||||
column,
|
||||
imageLength
|
||||
)
|
||||
|
||||
this.handlePayload(newToken, payload)
|
||||
|
||||
// TODO: optimize NOOP in case there are no special groups?
|
||||
if (group === false) {
|
||||
matchedTokensIndex = this.addToken(
|
||||
matchedTokens,
|
||||
matchedTokensIndex,
|
||||
newToken
|
||||
)
|
||||
} else {
|
||||
groups[group].push(newToken)
|
||||
}
|
||||
}
|
||||
text = this.chopInput(text, imageLength)
|
||||
offset = offset + imageLength
|
||||
|
||||
// TODO: with newlines the column may be assigned twice
|
||||
column = this.computeNewColumn(column!, imageLength)
|
||||
|
||||
if (trackLines === true && currConfig.canLineTerminator === true) {
|
||||
let numOfLTsInMatch = 0
|
||||
let foundTerminator
|
||||
let lastLTEndOffset: number
|
||||
lineTerminatorPattern.lastIndex = 0
|
||||
do {
|
||||
foundTerminator = lineTerminatorPattern.test(matchedImage)
|
||||
if (foundTerminator === true) {
|
||||
lastLTEndOffset = lineTerminatorPattern.lastIndex - 1
|
||||
numOfLTsInMatch++
|
||||
}
|
||||
} while (foundTerminator === true)
|
||||
|
||||
if (numOfLTsInMatch !== 0) {
|
||||
line = line! + numOfLTsInMatch
|
||||
column = imageLength - lastLTEndOffset!
|
||||
this.updateTokenEndLineColumnLocation(
|
||||
newToken!,
|
||||
group!,
|
||||
lastLTEndOffset!,
|
||||
numOfLTsInMatch,
|
||||
line,
|
||||
column,
|
||||
imageLength
|
||||
)
|
||||
}
|
||||
}
|
||||
// will be NOOP if no modes present
|
||||
this.handleModes(currConfig, pop_mode, push_mode, newToken!)
|
||||
} else {
|
||||
// error recovery, drop characters until we identify a valid token's start point
|
||||
const errorStartOffset = offset
|
||||
const errorLine = line
|
||||
const errorColumn = column
|
||||
let foundResyncPoint = recoveryEnabled === false
|
||||
|
||||
while (foundResyncPoint === false && offset < orgLength) {
|
||||
// Identity Func (when sticky flag is enabled)
|
||||
text = this.chopInput(text, 1)
|
||||
offset++
|
||||
for (j = 0; j < currModePatternsLength; j++) {
|
||||
const currConfig = patternIdxToConfig[j]
|
||||
const currPattern = currConfig.pattern
|
||||
|
||||
// manually in-lined because > 600 chars won't be in-lined in V8
|
||||
const singleCharCode = currConfig.short
|
||||
if (singleCharCode !== false) {
|
||||
if (orgText.charCodeAt(offset) === singleCharCode) {
|
||||
// single character string
|
||||
foundResyncPoint = true
|
||||
}
|
||||
} else if (currConfig.isCustom === true) {
|
||||
foundResyncPoint =
|
||||
(currPattern as IRegExpExec).exec(
|
||||
orgText,
|
||||
offset,
|
||||
matchedTokens,
|
||||
groups
|
||||
) !== null
|
||||
} else {
|
||||
this.updateLastIndex(currPattern as RegExp, offset)
|
||||
foundResyncPoint = (currPattern as RegExp).exec(text) !== null
|
||||
}
|
||||
|
||||
if (foundResyncPoint === true) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
errLength = offset - errorStartOffset
|
||||
// at this point we either re-synced or reached the end of the input text
|
||||
msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage(
|
||||
orgText,
|
||||
errorStartOffset,
|
||||
errLength,
|
||||
errorLine,
|
||||
errorColumn
|
||||
)
|
||||
errors.push({
|
||||
offset: errorStartOffset,
|
||||
line: errorLine,
|
||||
column: errorColumn,
|
||||
length: errLength,
|
||||
message: msg
|
||||
})
|
||||
|
||||
if (recoveryEnabled === false) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we do have custom patterns which push directly into the
|
||||
// TODO: custom tokens should not push directly??
|
||||
if (!this.hasCustom) {
|
||||
// if we guessed a too large size for the tokens array this will shrink it to the right size.
|
||||
matchedTokens.length = matchedTokensIndex
|
||||
}
|
||||
|
||||
return {
|
||||
tokens: matchedTokens,
|
||||
groups: groups,
|
||||
errors: errors
|
||||
}
|
||||
}
|
||||
|
||||
private handleModes(
|
||||
config: IPatternConfig,
|
||||
pop_mode: (tok: IToken) => void,
|
||||
push_mode: (this: Lexer, pushMode: string) => void,
|
||||
newToken: IToken
|
||||
) {
|
||||
if (config.pop === true) {
|
||||
// need to save the PUSH_MODE property as if the mode is popped
|
||||
// patternIdxToPopMode is updated to reflect the new mode after popping the stack
|
||||
const pushMode = config.push
|
||||
pop_mode(newToken)
|
||||
if (pushMode !== undefined) {
|
||||
push_mode.call(this, pushMode)
|
||||
}
|
||||
} else if (config.push !== undefined) {
|
||||
push_mode.call(this, config.push)
|
||||
}
|
||||
}
|
||||
|
||||
private chopInput(text: string, length: number): string {
|
||||
return text.substring(length)
|
||||
}
|
||||
|
||||
private updateLastIndex(regExp: RegExp, newLastIndex: number): void {
|
||||
regExp.lastIndex = newLastIndex
|
||||
}
|
||||
|
||||
// TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler
|
||||
private updateTokenEndLineColumnLocation(
|
||||
newToken: IToken,
|
||||
group: string | false,
|
||||
lastLTIdx: number,
|
||||
numOfLTsInMatch: number,
|
||||
line: number,
|
||||
column: number,
|
||||
imageLength: number
|
||||
): void {
|
||||
let lastCharIsLT, fixForEndingInLT
|
||||
if (group !== undefined) {
|
||||
// a none skipped multi line Token, need to update endLine/endColumn
|
||||
lastCharIsLT = lastLTIdx === imageLength - 1
|
||||
fixForEndingInLT = lastCharIsLT ? -1 : 0
|
||||
if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) {
|
||||
// if a token ends in a LT that last LT only affects the line numbering of following Tokens
|
||||
newToken.endLine = line + fixForEndingInLT
|
||||
// the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd)
|
||||
// inclusive to exclusive range.
|
||||
newToken.endColumn = column - 1 + -fixForEndingInLT
|
||||
}
|
||||
// else single LT in the last character of a token, no need to modify the endLine/EndColumn
|
||||
}
|
||||
}
|
||||
|
||||
private computeNewColumn(oldColumn: number, imageLength: number) {
|
||||
return oldColumn + imageLength
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
private createTokenInstance!: (...args: any[]) => IToken
|
||||
|
||||
private createOffsetOnlyToken(
|
||||
image: string,
|
||||
startOffset: number,
|
||||
tokenTypeIdx: number,
|
||||
tokenType: TokenType
|
||||
) {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
private createStartOnlyToken(
|
||||
image: string,
|
||||
startOffset: number,
|
||||
tokenTypeIdx: number,
|
||||
tokenType: TokenType,
|
||||
startLine: number,
|
||||
startColumn: number
|
||||
) {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
startLine,
|
||||
startColumn,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
private createFullToken(
|
||||
image: string,
|
||||
startOffset: number,
|
||||
tokenTypeIdx: number,
|
||||
tokenType: TokenType,
|
||||
startLine: number,
|
||||
startColumn: number,
|
||||
imageLength: number
|
||||
): IToken {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
endOffset: startOffset + imageLength - 1,
|
||||
startLine,
|
||||
endLine: startLine,
|
||||
startColumn,
|
||||
endColumn: startColumn + imageLength - 1,
|
||||
tokenTypeIdx,
|
||||
tokenType
|
||||
}
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the locationTracking option at runtime.
|
||||
/* istanbul ignore next - place holder */
|
||||
private addToken!: (
|
||||
tokenVector: IToken[],
|
||||
index: number,
|
||||
tokenToAdd: IToken
|
||||
) => number
|
||||
|
||||
private addTokenUsingPush(
|
||||
tokenVector: IToken[],
|
||||
index: number,
|
||||
tokenToAdd: IToken
|
||||
): number {
|
||||
tokenVector.push(tokenToAdd)
|
||||
return index
|
||||
}
|
||||
|
||||
private addTokenUsingMemberAccess(
|
||||
tokenVector: IToken[],
|
||||
index: number,
|
||||
tokenToAdd: IToken
|
||||
): number {
|
||||
tokenVector[index] = tokenToAdd
|
||||
index++
|
||||
return index
|
||||
}
|
||||
|
||||
// Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime.
|
||||
private handlePayload: (token: IToken, payload: any) => void
|
||||
|
||||
private handlePayloadNoCustom(token: IToken, payload: any): void {}
|
||||
|
||||
private handlePayloadWithCustom(token: IToken, payload: any): void {
|
||||
if (payload !== null) {
|
||||
token.payload = payload
|
||||
}
|
||||
}
|
||||
|
||||
// place holder to be replaced with chosen alternative at runtime
|
||||
private match!: (
|
||||
pattern: RegExp,
|
||||
text: string,
|
||||
offset: number
|
||||
) => string | null
|
||||
|
||||
private matchWithTest(
|
||||
pattern: RegExp,
|
||||
text: string,
|
||||
offset: number
|
||||
): string | null {
|
||||
const found = pattern.test(text)
|
||||
if (found === true) {
|
||||
return text.substring(offset, pattern.lastIndex)
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
private matchWithExec(pattern: RegExp, text: string): string | null {
|
||||
const regExpArray = pattern.exec(text)
|
||||
return regExpArray !== null ? regExpArray[0] : null
|
||||
}
|
||||
|
||||
// Duplicated from the parser's perf trace trait to allow future extraction
|
||||
// of the lexer to a separate package.
|
||||
TRACE_INIT = <T>(phaseDesc: string, phaseImpl: () => T): T => {
|
||||
// No need to optimize this using NOOP pattern because
|
||||
// It is not called in a hot spot...
|
||||
if (this.traceInitPerf === true) {
|
||||
this.traceInitIndent++
|
||||
const indent = new Array(this.traceInitIndent + 1).join("\t")
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
console.log(`${indent}--> <${phaseDesc}>`)
|
||||
}
|
||||
const { time, value } = timer(phaseImpl)
|
||||
/* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */
|
||||
const traceMethod = time > 10 ? console.warn : console.log
|
||||
if (this.traceInitIndent < this.traceInitMaxIdent) {
|
||||
traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`)
|
||||
}
|
||||
this.traceInitIndent--
|
||||
return value
|
||||
} else {
|
||||
return phaseImpl()
|
||||
}
|
||||
}
|
||||
}
|
||||
320
_node_modules/chevrotain/src/scan/reg_exp.ts
generated
Normal file
320
_node_modules/chevrotain/src/scan/reg_exp.ts
generated
Normal file
@@ -0,0 +1,320 @@
|
||||
import {
|
||||
Alternative,
|
||||
Atom,
|
||||
BaseRegExpVisitor,
|
||||
Character,
|
||||
Disjunction,
|
||||
Group,
|
||||
Set,
|
||||
Term,
|
||||
VERSION
|
||||
} from "regexp-to-ast"
|
||||
import isArray from "lodash/isArray"
|
||||
import every from "lodash/every"
|
||||
import forEach from "lodash/forEach"
|
||||
import find from "lodash/find"
|
||||
import values from "lodash/values"
|
||||
import includes from "lodash/includes"
|
||||
import { PRINT_ERROR, PRINT_WARNING } from "@chevrotain/utils"
|
||||
import { ASTNode, getRegExpAst } from "./reg_exp_parser"
|
||||
import { charCodeToOptimizedIndex, minOptimizationVal } from "./lexer"
|
||||
|
||||
const complementErrorMessage =
|
||||
"Complement Sets are not supported for first char optimization"
|
||||
export const failedOptimizationPrefixMsg =
|
||||
'Unable to use "first char" lexer optimizations:\n'
|
||||
|
||||
export function getOptimizedStartCodesIndices(
|
||||
regExp: RegExp,
|
||||
ensureOptimizations = false
|
||||
): number[] {
|
||||
try {
|
||||
const ast = getRegExpAst(regExp)
|
||||
const firstChars = firstCharOptimizedIndices(
|
||||
ast.value,
|
||||
{},
|
||||
ast.flags.ignoreCase
|
||||
)
|
||||
return firstChars
|
||||
} catch (e) {
|
||||
/* istanbul ignore next */
|
||||
// Testing this relies on the regexp-to-ast library having a bug... */
|
||||
// TODO: only the else branch needs to be ignored, try to fix with newer prettier / tsc
|
||||
if (e.message === complementErrorMessage) {
|
||||
if (ensureOptimizations) {
|
||||
PRINT_WARNING(
|
||||
`${failedOptimizationPrefixMsg}` +
|
||||
`\tUnable to optimize: < ${regExp.toString()} >\n` +
|
||||
"\tComplement Sets cannot be automatically optimized.\n" +
|
||||
"\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details."
|
||||
)
|
||||
}
|
||||
} else {
|
||||
let msgSuffix = ""
|
||||
if (ensureOptimizations) {
|
||||
msgSuffix =
|
||||
"\n\tThis will disable the lexer's first char optimizations.\n" +
|
||||
"\tSee: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details."
|
||||
}
|
||||
PRINT_ERROR(
|
||||
`${failedOptimizationPrefixMsg}\n` +
|
||||
`\tFailed parsing: < ${regExp.toString()} >\n` +
|
||||
`\tUsing the regexp-to-ast library version: ${VERSION}\n` +
|
||||
"\tPlease open an issue at: https://github.com/bd82/regexp-to-ast/issues" +
|
||||
msgSuffix
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return []
|
||||
}
|
||||
|
||||
export function firstCharOptimizedIndices(
|
||||
ast: ASTNode,
|
||||
result: { [charCode: number]: number },
|
||||
ignoreCase: boolean
|
||||
): number[] {
|
||||
switch (ast.type) {
|
||||
case "Disjunction":
|
||||
for (let i = 0; i < ast.value.length; i++) {
|
||||
firstCharOptimizedIndices(ast.value[i], result, ignoreCase)
|
||||
}
|
||||
break
|
||||
case "Alternative":
|
||||
const terms = ast.value
|
||||
for (let i = 0; i < terms.length; i++) {
|
||||
const term = terms[i]
|
||||
|
||||
// skip terms that cannot effect the first char results
|
||||
switch (term.type) {
|
||||
case "EndAnchor":
|
||||
// A group back reference cannot affect potential starting char.
|
||||
// because if a back reference is the first production than automatically
|
||||
// the group being referenced has had to come BEFORE so its codes have already been added
|
||||
case "GroupBackReference":
|
||||
// assertions do not affect potential starting codes
|
||||
case "Lookahead":
|
||||
case "NegativeLookahead":
|
||||
case "StartAnchor":
|
||||
case "WordBoundary":
|
||||
case "NonWordBoundary":
|
||||
continue
|
||||
}
|
||||
|
||||
const atom = term
|
||||
switch (atom.type) {
|
||||
case "Character":
|
||||
addOptimizedIdxToResult(atom.value, result, ignoreCase)
|
||||
break
|
||||
case "Set":
|
||||
if (atom.complement === true) {
|
||||
throw Error(complementErrorMessage)
|
||||
}
|
||||
forEach(atom.value, (code) => {
|
||||
if (typeof code === "number") {
|
||||
addOptimizedIdxToResult(code, result, ignoreCase)
|
||||
} else {
|
||||
// range
|
||||
const range = code as any
|
||||
// cannot optimize when ignoreCase is
|
||||
if (ignoreCase === true) {
|
||||
for (
|
||||
let rangeCode = range.from;
|
||||
rangeCode <= range.to;
|
||||
rangeCode++
|
||||
) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
|
||||
}
|
||||
}
|
||||
// Optimization (2 orders of magnitude less work for very large ranges)
|
||||
else {
|
||||
// handle unoptimized values
|
||||
for (
|
||||
let rangeCode = range.from;
|
||||
rangeCode <= range.to && rangeCode < minOptimizationVal;
|
||||
rangeCode++
|
||||
) {
|
||||
addOptimizedIdxToResult(rangeCode, result, ignoreCase)
|
||||
}
|
||||
|
||||
// Less common charCode where we optimize for faster init time, by using larger "buckets"
|
||||
if (range.to >= minOptimizationVal) {
|
||||
const minUnOptVal =
|
||||
range.from >= minOptimizationVal
|
||||
? range.from
|
||||
: minOptimizationVal
|
||||
const maxUnOptVal = range.to
|
||||
const minOptIdx = charCodeToOptimizedIndex(minUnOptVal)
|
||||
const maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal)
|
||||
|
||||
for (
|
||||
let currOptIdx = minOptIdx;
|
||||
currOptIdx <= maxOptIdx;
|
||||
currOptIdx++
|
||||
) {
|
||||
result[currOptIdx] = currOptIdx
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
break
|
||||
case "Group":
|
||||
firstCharOptimizedIndices(atom.value, result, ignoreCase)
|
||||
break
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("Non Exhaustive Match")
|
||||
}
|
||||
|
||||
// reached a mandatory production, no more **start** codes can be found on this alternative
|
||||
const isOptionalQuantifier =
|
||||
atom.quantifier !== undefined && atom.quantifier.atLeast === 0
|
||||
if (
|
||||
// A group may be optional due to empty contents /(?:)/
|
||||
// or if everything inside it is optional /((a)?)/
|
||||
(atom.type === "Group" && isWholeOptional(atom) === false) ||
|
||||
// If this term is not a group it may only be optional if it has an optional quantifier
|
||||
(atom.type !== "Group" && isOptionalQuantifier === false)
|
||||
) {
|
||||
break
|
||||
}
|
||||
}
|
||||
break
|
||||
/* istanbul ignore next */
|
||||
default:
|
||||
throw Error("non exhaustive match!")
|
||||
}
|
||||
|
||||
// console.log(Object.keys(result).length)
|
||||
return values(result)
|
||||
}
|
||||
|
||||
function addOptimizedIdxToResult(
|
||||
code: number,
|
||||
result: { [charCode: number]: number },
|
||||
ignoreCase: boolean
|
||||
) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(code)
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
|
||||
if (ignoreCase === true) {
|
||||
handleIgnoreCase(code, result)
|
||||
}
|
||||
}
|
||||
|
||||
function handleIgnoreCase(
|
||||
code: number,
|
||||
result: { [charCode: number]: number }
|
||||
) {
|
||||
const char = String.fromCharCode(code)
|
||||
const upperChar = char.toUpperCase()
|
||||
/* istanbul ignore else */
|
||||
if (upperChar !== char) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0))
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
} else {
|
||||
const lowerChar = char.toLowerCase()
|
||||
if (lowerChar !== char) {
|
||||
const optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0))
|
||||
result[optimizedCharIdx] = optimizedCharIdx
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function findCode(setNode: Set, targetCharCodes: number[]) {
|
||||
return find(setNode.value, (codeOrRange) => {
|
||||
if (typeof codeOrRange === "number") {
|
||||
return includes(targetCharCodes, codeOrRange)
|
||||
} else {
|
||||
// range
|
||||
const range = <any>codeOrRange
|
||||
return (
|
||||
find(
|
||||
targetCharCodes,
|
||||
(targetCode) => range.from <= targetCode && targetCode <= range.to
|
||||
) !== undefined
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function isWholeOptional(ast: any): boolean {
|
||||
const quantifier = (ast as Atom).quantifier
|
||||
if (quantifier && quantifier.atLeast === 0) {
|
||||
return true
|
||||
}
|
||||
|
||||
if (!ast.value) {
|
||||
return false
|
||||
}
|
||||
|
||||
return isArray(ast.value)
|
||||
? every(ast.value, isWholeOptional)
|
||||
: isWholeOptional(ast.value)
|
||||
}
|
||||
|
||||
class CharCodeFinder extends BaseRegExpVisitor {
|
||||
found: boolean = false
|
||||
|
||||
constructor(private targetCharCodes: number[]) {
|
||||
super()
|
||||
}
|
||||
|
||||
visitChildren(node: ASTNode) {
|
||||
// No need to keep looking...
|
||||
if (this.found === true) {
|
||||
return
|
||||
}
|
||||
|
||||
// switch lookaheads as they do not actually consume any characters thus
|
||||
// finding a charCode at lookahead context does not mean that regexp can actually contain it in a match.
|
||||
switch (node.type) {
|
||||
case "Lookahead":
|
||||
this.visitLookahead(node)
|
||||
return
|
||||
case "NegativeLookahead":
|
||||
this.visitNegativeLookahead(node)
|
||||
return
|
||||
}
|
||||
|
||||
super.visitChildren(node)
|
||||
}
|
||||
|
||||
visitCharacter(node: Character) {
|
||||
if (includes(this.targetCharCodes, node.value)) {
|
||||
this.found = true
|
||||
}
|
||||
}
|
||||
|
||||
visitSet(node: Set) {
|
||||
if (node.complement) {
|
||||
if (findCode(node, this.targetCharCodes) === undefined) {
|
||||
this.found = true
|
||||
}
|
||||
} else {
|
||||
if (findCode(node, this.targetCharCodes) !== undefined) {
|
||||
this.found = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function canMatchCharCode(
|
||||
charCodes: number[],
|
||||
pattern: RegExp | string
|
||||
) {
|
||||
if (pattern instanceof RegExp) {
|
||||
const ast = getRegExpAst(pattern)
|
||||
const charCodeFinder = new CharCodeFinder(charCodes)
|
||||
charCodeFinder.visit(ast)
|
||||
return charCodeFinder.found
|
||||
} else {
|
||||
return (
|
||||
find(<any>pattern, (char) => {
|
||||
return includes(charCodes, (<string>char).charCodeAt(0))
|
||||
}) !== undefined
|
||||
)
|
||||
}
|
||||
}
|
||||
34
_node_modules/chevrotain/src/scan/reg_exp_parser.ts
generated
Normal file
34
_node_modules/chevrotain/src/scan/reg_exp_parser.ts
generated
Normal file
@@ -0,0 +1,34 @@
|
||||
import {
|
||||
Alternative,
|
||||
Assertion,
|
||||
Atom,
|
||||
Disjunction,
|
||||
RegExpParser,
|
||||
RegExpPattern
|
||||
} from "regexp-to-ast"
|
||||
|
||||
let regExpAstCache: { [regex: string]: RegExpPattern } = {}
|
||||
const regExpParser = new RegExpParser()
|
||||
|
||||
// this should be moved to regexp-to-ast
|
||||
export type ASTNode =
|
||||
| RegExpPattern
|
||||
| Disjunction
|
||||
| Alternative
|
||||
| Assertion
|
||||
| Atom
|
||||
|
||||
export function getRegExpAst(regExp: RegExp): RegExpPattern {
|
||||
const regExpStr = regExp.toString()
|
||||
if (regExpAstCache.hasOwnProperty(regExpStr)) {
|
||||
return regExpAstCache[regExpStr]
|
||||
} else {
|
||||
const regExpAst = regExpParser.pattern(regExpStr)
|
||||
regExpAstCache[regExpStr] = regExpAst
|
||||
return regExpAst
|
||||
}
|
||||
}
|
||||
|
||||
export function clearRegExpParserCache() {
|
||||
regExpAstCache = {}
|
||||
}
|
||||
165
_node_modules/chevrotain/src/scan/tokens.ts
generated
Normal file
165
_node_modules/chevrotain/src/scan/tokens.ts
generated
Normal file
@@ -0,0 +1,165 @@
|
||||
import isEmpty from "lodash/isEmpty"
|
||||
import compact from "lodash/compact"
|
||||
import isArray from "lodash/isArray"
|
||||
import flatten from "lodash/flatten"
|
||||
import difference from "lodash/difference"
|
||||
import map from "lodash/map"
|
||||
import forEach from "lodash/forEach"
|
||||
import has from "lodash/has"
|
||||
import includes from "lodash/includes"
|
||||
import clone from "lodash/clone"
|
||||
import { IToken, TokenType } from "@chevrotain/types"
|
||||
|
||||
export function tokenStructuredMatcher(
|
||||
tokInstance: IToken,
|
||||
tokConstructor: TokenType
|
||||
) {
|
||||
const instanceType = tokInstance.tokenTypeIdx
|
||||
if (instanceType === tokConstructor.tokenTypeIdx) {
|
||||
return true
|
||||
} else {
|
||||
return (
|
||||
tokConstructor.isParent === true &&
|
||||
tokConstructor.categoryMatchesMap![instanceType] === true
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimized tokenMatcher in case our grammar does not use token categories
|
||||
// Being so tiny it is much more likely to be in-lined and this avoid the function call overhead
|
||||
export function tokenStructuredMatcherNoCategories(
|
||||
token: IToken,
|
||||
tokType: TokenType
|
||||
) {
|
||||
return token.tokenTypeIdx === tokType.tokenTypeIdx
|
||||
}
|
||||
|
||||
export let tokenShortNameIdx = 1
|
||||
export const tokenIdxToClass: { [tokenIdx: number]: TokenType } = {}
|
||||
|
||||
export function augmentTokenTypes(tokenTypes: TokenType[]): void {
|
||||
// collect the parent Token Types as well.
|
||||
const tokenTypesAndParents = expandCategories(tokenTypes)
|
||||
|
||||
// add required tokenType and categoryMatches properties
|
||||
assignTokenDefaultProps(tokenTypesAndParents)
|
||||
|
||||
// fill up the categoryMatches
|
||||
assignCategoriesMapProp(tokenTypesAndParents)
|
||||
assignCategoriesTokensProp(tokenTypesAndParents)
|
||||
|
||||
forEach(tokenTypesAndParents, (tokType) => {
|
||||
tokType.isParent = tokType.categoryMatches!.length > 0
|
||||
})
|
||||
}
|
||||
|
||||
export function expandCategories(tokenTypes: TokenType[]): TokenType[] {
|
||||
let result = clone(tokenTypes)
|
||||
|
||||
let categories = tokenTypes
|
||||
let searching = true
|
||||
while (searching) {
|
||||
categories = compact(
|
||||
flatten(map(categories, (currTokType) => currTokType.CATEGORIES))
|
||||
)
|
||||
|
||||
const newCategories = difference(categories, result)
|
||||
|
||||
result = result.concat(newCategories)
|
||||
|
||||
if (isEmpty(newCategories)) {
|
||||
searching = false
|
||||
} else {
|
||||
categories = newCategories
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
export function assignTokenDefaultProps(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
if (!hasShortKeyProperty(currTokType)) {
|
||||
tokenIdxToClass[tokenShortNameIdx] = currTokType
|
||||
;(<any>currTokType).tokenTypeIdx = tokenShortNameIdx++
|
||||
}
|
||||
|
||||
// CATEGORIES? : TokenType | TokenType[]
|
||||
if (
|
||||
hasCategoriesProperty(currTokType) &&
|
||||
!isArray(currTokType.CATEGORIES)
|
||||
// &&
|
||||
// !isUndefined(currTokType.CATEGORIES.PATTERN)
|
||||
) {
|
||||
currTokType.CATEGORIES = [currTokType.CATEGORIES as unknown as TokenType]
|
||||
}
|
||||
|
||||
if (!hasCategoriesProperty(currTokType)) {
|
||||
currTokType.CATEGORIES = []
|
||||
}
|
||||
|
||||
if (!hasExtendingTokensTypesProperty(currTokType)) {
|
||||
currTokType.categoryMatches = []
|
||||
}
|
||||
|
||||
if (!hasExtendingTokensTypesMapProperty(currTokType)) {
|
||||
currTokType.categoryMatchesMap = {}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function assignCategoriesTokensProp(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
// avoid duplications
|
||||
currTokType.categoryMatches = []
|
||||
forEach(currTokType.categoryMatchesMap!, (val, key) => {
|
||||
currTokType.categoryMatches!.push(
|
||||
tokenIdxToClass[key as unknown as number].tokenTypeIdx!
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function assignCategoriesMapProp(tokenTypes: TokenType[]): void {
|
||||
forEach(tokenTypes, (currTokType) => {
|
||||
singleAssignCategoriesToksMap([], currTokType)
|
||||
})
|
||||
}
|
||||
|
||||
export function singleAssignCategoriesToksMap(
|
||||
path: TokenType[],
|
||||
nextNode: TokenType
|
||||
): void {
|
||||
forEach(path, (pathNode) => {
|
||||
nextNode.categoryMatchesMap![pathNode.tokenTypeIdx!] = true
|
||||
})
|
||||
|
||||
forEach(nextNode.CATEGORIES, (nextCategory) => {
|
||||
const newPath = path.concat(nextNode)
|
||||
// avoids infinite loops due to cyclic categories.
|
||||
if (!includes(newPath, nextCategory)) {
|
||||
singleAssignCategoriesToksMap(newPath, nextCategory)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function hasShortKeyProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "tokenTypeIdx")
|
||||
}
|
||||
|
||||
export function hasCategoriesProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "CATEGORIES")
|
||||
}
|
||||
|
||||
export function hasExtendingTokensTypesProperty(tokType: TokenType): boolean {
|
||||
return has(tokType, "categoryMatches")
|
||||
}
|
||||
|
||||
export function hasExtendingTokensTypesMapProperty(
|
||||
tokType: TokenType
|
||||
): boolean {
|
||||
return has(tokType, "categoryMatchesMap")
|
||||
}
|
||||
|
||||
export function isTokenType(tokType: TokenType): boolean {
|
||||
return has(tokType, "tokenTypeIdx")
|
||||
}
|
||||
1
_node_modules/chevrotain/src/scan/tokens_constants.ts
generated
Normal file
1
_node_modules/chevrotain/src/scan/tokens_constants.ts
generated
Normal file
@@ -0,0 +1 @@
|
||||
export const EOF_TOKEN_TYPE = 1
|
||||
123
_node_modules/chevrotain/src/scan/tokens_public.ts
generated
Normal file
123
_node_modules/chevrotain/src/scan/tokens_public.ts
generated
Normal file
@@ -0,0 +1,123 @@
|
||||
import isString from "lodash/isString"
|
||||
import has from "lodash/has"
|
||||
import isUndefined from "lodash/isUndefined"
|
||||
import { Lexer } from "./lexer_public"
|
||||
import { augmentTokenTypes, tokenStructuredMatcher } from "./tokens"
|
||||
import { IToken, ITokenConfig, TokenType } from "@chevrotain/types"
|
||||
|
||||
export function tokenLabel(tokType: TokenType): string {
|
||||
if (hasTokenLabel(tokType)) {
|
||||
return tokType.LABEL
|
||||
} else {
|
||||
return tokType.name
|
||||
}
|
||||
}
|
||||
|
||||
export function tokenName(tokType: TokenType): string {
|
||||
return tokType.name
|
||||
}
|
||||
|
||||
export function hasTokenLabel(
|
||||
obj: TokenType
|
||||
): obj is TokenType & Pick<Required<TokenType>, "LABEL"> {
|
||||
return isString(obj.LABEL) && obj.LABEL !== ""
|
||||
}
|
||||
|
||||
const PARENT = "parent"
|
||||
const CATEGORIES = "categories"
|
||||
const LABEL = "label"
|
||||
const GROUP = "group"
|
||||
const PUSH_MODE = "push_mode"
|
||||
const POP_MODE = "pop_mode"
|
||||
const LONGER_ALT = "longer_alt"
|
||||
const LINE_BREAKS = "line_breaks"
|
||||
const START_CHARS_HINT = "start_chars_hint"
|
||||
|
||||
export function createToken(config: ITokenConfig): TokenType {
|
||||
return createTokenInternal(config)
|
||||
}
|
||||
|
||||
function createTokenInternal(config: ITokenConfig): TokenType {
|
||||
const pattern = config.pattern
|
||||
|
||||
const tokenType: TokenType = <any>{}
|
||||
tokenType.name = config.name
|
||||
|
||||
if (!isUndefined(pattern)) {
|
||||
tokenType.PATTERN = pattern
|
||||
}
|
||||
|
||||
if (has(config, PARENT)) {
|
||||
throw (
|
||||
"The parent property is no longer supported.\n" +
|
||||
"See: https://github.com/chevrotain/chevrotain/issues/564#issuecomment-349062346 for details."
|
||||
)
|
||||
}
|
||||
|
||||
if (has(config, CATEGORIES)) {
|
||||
// casting to ANY as this will be fixed inside `augmentTokenTypes``
|
||||
tokenType.CATEGORIES = <any>config[CATEGORIES]
|
||||
}
|
||||
|
||||
augmentTokenTypes([tokenType])
|
||||
|
||||
if (has(config, LABEL)) {
|
||||
tokenType.LABEL = config[LABEL]
|
||||
}
|
||||
|
||||
if (has(config, GROUP)) {
|
||||
tokenType.GROUP = config[GROUP]
|
||||
}
|
||||
|
||||
if (has(config, POP_MODE)) {
|
||||
tokenType.POP_MODE = config[POP_MODE]
|
||||
}
|
||||
|
||||
if (has(config, PUSH_MODE)) {
|
||||
tokenType.PUSH_MODE = config[PUSH_MODE]
|
||||
}
|
||||
|
||||
if (has(config, LONGER_ALT)) {
|
||||
tokenType.LONGER_ALT = config[LONGER_ALT]
|
||||
}
|
||||
|
||||
if (has(config, LINE_BREAKS)) {
|
||||
tokenType.LINE_BREAKS = config[LINE_BREAKS]
|
||||
}
|
||||
|
||||
if (has(config, START_CHARS_HINT)) {
|
||||
tokenType.START_CHARS_HINT = config[START_CHARS_HINT]
|
||||
}
|
||||
|
||||
return tokenType
|
||||
}
|
||||
|
||||
export const EOF = createToken({ name: "EOF", pattern: Lexer.NA })
|
||||
augmentTokenTypes([EOF])
|
||||
|
||||
export function createTokenInstance(
|
||||
tokType: TokenType,
|
||||
image: string,
|
||||
startOffset: number,
|
||||
endOffset: number,
|
||||
startLine: number,
|
||||
endLine: number,
|
||||
startColumn: number,
|
||||
endColumn: number
|
||||
): IToken {
|
||||
return {
|
||||
image,
|
||||
startOffset,
|
||||
endOffset,
|
||||
startLine,
|
||||
endLine,
|
||||
startColumn,
|
||||
endColumn,
|
||||
tokenTypeIdx: (<any>tokType).tokenTypeIdx,
|
||||
tokenType: tokType
|
||||
}
|
||||
}
|
||||
|
||||
export function tokenMatcher(token: IToken, tokType: TokenType): boolean {
|
||||
return tokenStructuredMatcher(token, tokType)
|
||||
}
|
||||
46
_node_modules/chevrotain/src/text/range.ts
generated
Normal file
46
_node_modules/chevrotain/src/text/range.ts
generated
Normal file
@@ -0,0 +1,46 @@
|
||||
export interface IRange {
|
||||
start: number
|
||||
end: number
|
||||
|
||||
contains(num: number): boolean
|
||||
|
||||
containsRange(other: IRange): boolean
|
||||
|
||||
isContainedInRange(other: IRange): boolean
|
||||
|
||||
strictlyContainsRange(other: IRange): boolean
|
||||
|
||||
isStrictlyContainedInRange(other: IRange): boolean
|
||||
}
|
||||
|
||||
export class Range implements IRange {
|
||||
constructor(public start: number, public end: number) {
|
||||
if (!isValidRange(start, end)) {
|
||||
throw new Error("INVALID RANGE")
|
||||
}
|
||||
}
|
||||
|
||||
contains(num: number): boolean {
|
||||
return this.start <= num && this.end >= num
|
||||
}
|
||||
|
||||
containsRange(other: IRange): boolean {
|
||||
return this.start <= other.start && this.end >= other.end
|
||||
}
|
||||
|
||||
isContainedInRange(other: IRange): boolean {
|
||||
return other.containsRange(this)
|
||||
}
|
||||
|
||||
strictlyContainsRange(other: IRange): boolean {
|
||||
return this.start < other.start && this.end > other.end
|
||||
}
|
||||
|
||||
isStrictlyContainedInRange(other: IRange): boolean {
|
||||
return other.strictlyContainsRange(this)
|
||||
}
|
||||
}
|
||||
|
||||
export function isValidRange(start: number, end: number): boolean {
|
||||
return !(start < 0 || end < start)
|
||||
}
|
||||
4
_node_modules/chevrotain/src/version.ts
generated
Normal file
4
_node_modules/chevrotain/src/version.ts
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
// needs a separate module as this is required inside chevrotain productive code
|
||||
// and also in the entry point for webpack(api.ts).
|
||||
// A separate file avoids cyclic dependencies and webpack errors.
|
||||
export const VERSION = "10.5.0"
|
||||
Reference in New Issue
Block a user