This commit is contained in:
440
node_modules/micromark/lib/util/create-tokenizer.js
generated
vendored
Normal file
440
node_modules/micromark/lib/util/create-tokenizer.js
generated
vendored
Normal file
@@ -0,0 +1,440 @@
|
||||
'use strict'
|
||||
|
||||
var assert = require('assert')
|
||||
var createDebug = require('debug')
|
||||
var assign = require('../constant/assign.js')
|
||||
var codes = require('../character/codes.js')
|
||||
var markdownLineEnding = require('../character/markdown-line-ending.js')
|
||||
var chunkedPush = require('./chunked-push.js')
|
||||
var chunkedSplice = require('./chunked-splice.js')
|
||||
var miniflat = require('./miniflat.js')
|
||||
var resolveAll = require('./resolve-all.js')
|
||||
var serializeChunks = require('./serialize-chunks.js')
|
||||
var shallow = require('./shallow.js')
|
||||
var sliceChunks = require('./slice-chunks.js')
|
||||
|
||||
function _interopDefaultLegacy(e) {
|
||||
return e && typeof e === 'object' && 'default' in e ? e : {default: e}
|
||||
}
|
||||
|
||||
var assert__default = /*#__PURE__*/ _interopDefaultLegacy(assert)
|
||||
var createDebug__default = /*#__PURE__*/ _interopDefaultLegacy(createDebug)
|
||||
|
||||
var debug = createDebug__default['default']('micromark')
|
||||
|
||||
// Create a tokenizer.
|
||||
// Tokenizers deal with one type of data (e.g., containers, flow, text).
|
||||
// The parser is the object dealing with it all.
|
||||
// `initialize` works like other constructs, except that only its `tokenize`
|
||||
// function is used, in which case it doesn’t receive an `ok` or `nok`.
|
||||
// `from` can be given to set the point before the first character, although
|
||||
// when further lines are indented, they must be set with `defineSkip`.
|
||||
function createTokenizer(parser, initialize, from) {
|
||||
var point = from ? shallow(from) : {line: 1, column: 1, offset: 0}
|
||||
var columnStart = {}
|
||||
var resolveAllConstructs = []
|
||||
var chunks = []
|
||||
var stack = []
|
||||
var consumed = true
|
||||
|
||||
// Tools used for tokenizing.
|
||||
var effects = {
|
||||
consume: consume,
|
||||
enter: enter,
|
||||
exit: exit,
|
||||
attempt: constructFactory(onsuccessfulconstruct),
|
||||
check: constructFactory(onsuccessfulcheck),
|
||||
interrupt: constructFactory(onsuccessfulcheck, {interrupt: true}),
|
||||
lazy: constructFactory(onsuccessfulcheck, {lazy: true})
|
||||
}
|
||||
|
||||
// State and tools for resolving and serializing.
|
||||
var context = {
|
||||
previous: codes.eof,
|
||||
events: [],
|
||||
parser: parser,
|
||||
sliceStream: sliceStream,
|
||||
sliceSerialize: sliceSerialize,
|
||||
now: now,
|
||||
defineSkip: skip,
|
||||
write: write
|
||||
}
|
||||
|
||||
// The state function.
|
||||
var state = initialize.tokenize.call(context, effects)
|
||||
|
||||
// Track which character we expect to be consumed, to catch bugs.
|
||||
var expectedCode
|
||||
|
||||
if (initialize.resolveAll) {
|
||||
resolveAllConstructs.push(initialize)
|
||||
}
|
||||
|
||||
// Store where we are in the input stream.
|
||||
point._index = 0
|
||||
point._bufferIndex = -1
|
||||
|
||||
return context
|
||||
|
||||
function write(slice) {
|
||||
chunks = chunkedPush(chunks, slice)
|
||||
|
||||
main()
|
||||
|
||||
// Exit if we’re not done, resolve might change stuff.
|
||||
if (chunks[chunks.length - 1] !== codes.eof) {
|
||||
return []
|
||||
}
|
||||
|
||||
addResult(initialize, 0)
|
||||
|
||||
// Otherwise, resolve, and exit.
|
||||
context.events = resolveAll(resolveAllConstructs, context.events, context)
|
||||
|
||||
return context.events
|
||||
}
|
||||
|
||||
//
|
||||
// Tools.
|
||||
//
|
||||
|
||||
function sliceSerialize(token) {
|
||||
return serializeChunks(sliceStream(token))
|
||||
}
|
||||
|
||||
function sliceStream(token) {
|
||||
return sliceChunks(chunks, token)
|
||||
}
|
||||
|
||||
function now() {
|
||||
return shallow(point)
|
||||
}
|
||||
|
||||
function skip(value) {
|
||||
columnStart[value.line] = value.column
|
||||
accountForPotentialSkip()
|
||||
debug('position: define skip: `%j`', point)
|
||||
}
|
||||
|
||||
//
|
||||
// State management.
|
||||
//
|
||||
|
||||
// Main loop (note that `_index` and `_bufferIndex` in `point` are modified by
|
||||
// `consume`).
|
||||
// Here is where we walk through the chunks, which either include strings of
|
||||
// several characters, or numerical character codes.
|
||||
// The reason to do this in a loop instead of a call is so the stack can
|
||||
// drain.
|
||||
function main() {
|
||||
var chunkIndex
|
||||
var chunk
|
||||
|
||||
while (point._index < chunks.length) {
|
||||
chunk = chunks[point._index]
|
||||
|
||||
// If we’re in a buffer chunk, loop through it.
|
||||
if (typeof chunk === 'string') {
|
||||
chunkIndex = point._index
|
||||
|
||||
if (point._bufferIndex < 0) {
|
||||
point._bufferIndex = 0
|
||||
}
|
||||
|
||||
while (
|
||||
point._index === chunkIndex &&
|
||||
point._bufferIndex < chunk.length
|
||||
) {
|
||||
go(chunk.charCodeAt(point._bufferIndex))
|
||||
}
|
||||
} else {
|
||||
go(chunk)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with one code.
|
||||
function go(code) {
|
||||
assert__default['default'].equal(
|
||||
consumed,
|
||||
true,
|
||||
'expected character to be consumed'
|
||||
)
|
||||
consumed = undefined
|
||||
debug('main: passing `%s` to %s', code, state.name)
|
||||
expectedCode = code
|
||||
state = state(code)
|
||||
}
|
||||
|
||||
// Move a character forward.
|
||||
function consume(code) {
|
||||
assert__default['default'].equal(
|
||||
code,
|
||||
expectedCode,
|
||||
'expected given code to equal expected code'
|
||||
)
|
||||
|
||||
debug('consume: `%s`', code)
|
||||
|
||||
assert__default['default'].equal(
|
||||
consumed,
|
||||
undefined,
|
||||
'expected code to not have been consumed'
|
||||
)
|
||||
assert__default['default'](
|
||||
code === null
|
||||
? !context.events.length ||
|
||||
context.events[context.events.length - 1][0] === 'exit'
|
||||
: context.events[context.events.length - 1][0] === 'enter',
|
||||
'expected last token to be open'
|
||||
)
|
||||
|
||||
if (markdownLineEnding(code)) {
|
||||
point.line++
|
||||
point.column = 1
|
||||
point.offset += code === codes.carriageReturnLineFeed ? 2 : 1
|
||||
accountForPotentialSkip()
|
||||
debug('position: after eol: `%j`', point)
|
||||
} else if (code !== codes.virtualSpace) {
|
||||
point.column++
|
||||
point.offset++
|
||||
}
|
||||
|
||||
// Not in a string chunk.
|
||||
if (point._bufferIndex < 0) {
|
||||
point._index++
|
||||
} else {
|
||||
point._bufferIndex++
|
||||
|
||||
// At end of string chunk.
|
||||
if (point._bufferIndex === chunks[point._index].length) {
|
||||
point._bufferIndex = -1
|
||||
point._index++
|
||||
}
|
||||
}
|
||||
|
||||
// Expose the previous character.
|
||||
context.previous = code
|
||||
|
||||
// Mark as consumed.
|
||||
consumed = true
|
||||
}
|
||||
|
||||
// Start a token.
|
||||
function enter(type, fields) {
|
||||
var token = fields || {}
|
||||
token.type = type
|
||||
token.start = now()
|
||||
|
||||
assert__default['default'].equal(
|
||||
typeof type,
|
||||
'string',
|
||||
'expected string type'
|
||||
)
|
||||
assert__default['default'].notEqual(
|
||||
type.length,
|
||||
0,
|
||||
'expected non-empty string'
|
||||
)
|
||||
debug('enter: `%s`', type)
|
||||
|
||||
context.events.push(['enter', token, context])
|
||||
|
||||
stack.push(token)
|
||||
|
||||
return token
|
||||
}
|
||||
|
||||
// Stop a token.
|
||||
function exit(type) {
|
||||
assert__default['default'].equal(
|
||||
typeof type,
|
||||
'string',
|
||||
'expected string type'
|
||||
)
|
||||
assert__default['default'].notEqual(
|
||||
type.length,
|
||||
0,
|
||||
'expected non-empty string'
|
||||
)
|
||||
assert__default['default'].notEqual(
|
||||
stack.length,
|
||||
0,
|
||||
'cannot close w/o open tokens'
|
||||
)
|
||||
|
||||
var token = stack.pop()
|
||||
token.end = now()
|
||||
|
||||
assert__default['default'].equal(
|
||||
type,
|
||||
token.type,
|
||||
'expected exit token to match current token'
|
||||
)
|
||||
|
||||
assert__default['default'](
|
||||
!(
|
||||
token.start._index === token.end._index &&
|
||||
token.start._bufferIndex === token.end._bufferIndex
|
||||
),
|
||||
'expected non-empty token (`' + type + '`)'
|
||||
)
|
||||
|
||||
debug('exit: `%s`', token.type)
|
||||
context.events.push(['exit', token, context])
|
||||
|
||||
return token
|
||||
}
|
||||
|
||||
// Use results.
|
||||
function onsuccessfulconstruct(construct, info) {
|
||||
addResult(construct, info.from)
|
||||
}
|
||||
|
||||
// Discard results.
|
||||
function onsuccessfulcheck(construct, info) {
|
||||
info.restore()
|
||||
}
|
||||
|
||||
// Factory to attempt/check/interrupt.
|
||||
function constructFactory(onreturn, fields) {
|
||||
return hook
|
||||
|
||||
// Handle either an object mapping codes to constructs, a list of
|
||||
// constructs, or a single construct.
|
||||
function hook(constructs, returnState, bogusState) {
|
||||
var listOfConstructs
|
||||
var constructIndex
|
||||
var currentConstruct
|
||||
var info
|
||||
|
||||
return constructs.tokenize || 'length' in constructs
|
||||
? handleListOfConstructs(miniflat(constructs))
|
||||
: handleMapOfConstructs
|
||||
|
||||
function handleMapOfConstructs(code) {
|
||||
if (code in constructs || codes.eof in constructs) {
|
||||
return handleListOfConstructs(
|
||||
constructs.null
|
||||
? /* c8 ignore next */
|
||||
miniflat(constructs[code]).concat(miniflat(constructs.null))
|
||||
: constructs[code]
|
||||
)(code)
|
||||
}
|
||||
|
||||
return bogusState(code)
|
||||
}
|
||||
|
||||
function handleListOfConstructs(list) {
|
||||
listOfConstructs = list
|
||||
constructIndex = 0
|
||||
return handleConstruct(list[constructIndex])
|
||||
}
|
||||
|
||||
function handleConstruct(construct) {
|
||||
return start
|
||||
|
||||
function start(code) {
|
||||
// To do: not nede to store if there is no bogus state, probably?
|
||||
// Currently doesn’t work because `inspect` in document does a check
|
||||
// w/o a bogus, which doesn’t make sense. But it does seem to help perf
|
||||
// by not storing.
|
||||
info = store()
|
||||
currentConstruct = construct
|
||||
|
||||
if (!construct.partial) {
|
||||
context.currentConstruct = construct
|
||||
}
|
||||
|
||||
if (
|
||||
construct.name &&
|
||||
context.parser.constructs.disable.null.indexOf(construct.name) > -1
|
||||
) {
|
||||
return nok(code)
|
||||
}
|
||||
|
||||
return construct.tokenize.call(
|
||||
fields ? assign({}, context, fields) : context,
|
||||
effects,
|
||||
ok,
|
||||
nok
|
||||
)(code)
|
||||
}
|
||||
}
|
||||
|
||||
function ok(code) {
|
||||
assert__default['default'].equal(code, expectedCode, 'expected code')
|
||||
consumed = true
|
||||
onreturn(currentConstruct, info)
|
||||
return returnState
|
||||
}
|
||||
|
||||
function nok(code) {
|
||||
assert__default['default'].equal(code, expectedCode, 'expected code')
|
||||
consumed = true
|
||||
info.restore()
|
||||
|
||||
if (++constructIndex < listOfConstructs.length) {
|
||||
return handleConstruct(listOfConstructs[constructIndex])
|
||||
}
|
||||
|
||||
return bogusState
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function addResult(construct, from) {
|
||||
if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) {
|
||||
resolveAllConstructs.push(construct)
|
||||
}
|
||||
|
||||
if (construct.resolve) {
|
||||
chunkedSplice(
|
||||
context.events,
|
||||
from,
|
||||
context.events.length - from,
|
||||
construct.resolve(context.events.slice(from), context)
|
||||
)
|
||||
}
|
||||
|
||||
if (construct.resolveTo) {
|
||||
context.events = construct.resolveTo(context.events, context)
|
||||
}
|
||||
|
||||
assert__default['default'](
|
||||
construct.partial ||
|
||||
!context.events.length ||
|
||||
context.events[context.events.length - 1][0] === 'exit',
|
||||
'expected last token to end'
|
||||
)
|
||||
}
|
||||
|
||||
function store() {
|
||||
var startPoint = now()
|
||||
var startPrevious = context.previous
|
||||
var startCurrentConstruct = context.currentConstruct
|
||||
var startEventsIndex = context.events.length
|
||||
var startStack = Array.from(stack)
|
||||
|
||||
return {restore: restore, from: startEventsIndex}
|
||||
|
||||
function restore() {
|
||||
point = startPoint
|
||||
context.previous = startPrevious
|
||||
context.currentConstruct = startCurrentConstruct
|
||||
context.events.length = startEventsIndex
|
||||
stack = startStack
|
||||
accountForPotentialSkip()
|
||||
debug('position: restore: `%j`', point)
|
||||
}
|
||||
}
|
||||
|
||||
function accountForPotentialSkip() {
|
||||
if (point.line in columnStart && point.column < 2) {
|
||||
point.column = columnStart[point.line]
|
||||
point.offset += columnStart[point.line] - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = createTokenizer
|
||||
Reference in New Issue
Block a user