Files
coopgo/node_modules/micromark/dist/util/create-tokenizer.js
sgauthier 6e64e138e2
All checks were successful
Publish To Prod / deploy_and_publish (push) Successful in 35s
planning
2024-10-14 09:15:30 +02:00

317 lines
8.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict'
var assign = require('../constant/assign.js')
var markdownLineEnding = require('../character/markdown-line-ending.js')
var chunkedPush = require('./chunked-push.js')
var chunkedSplice = require('./chunked-splice.js')
var miniflat = require('./miniflat.js')
var resolveAll = require('./resolve-all.js')
var serializeChunks = require('./serialize-chunks.js')
var shallow = require('./shallow.js')
var sliceChunks = require('./slice-chunks.js')
// Create a tokenizer.
// Tokenizers deal with one type of data (e.g., containers, flow, text).
// The parser is the object dealing with it all.
// `initialize` works like other constructs, except that only its `tokenize`
// function is used, in which case it doesnt receive an `ok` or `nok`.
// `from` can be given to set the point before the first character, although
// when further lines are indented, they must be set with `defineSkip`.
function createTokenizer(parser, initialize, from) {
var point = from
? shallow(from)
: {
line: 1,
column: 1,
offset: 0
}
var columnStart = {}
var resolveAllConstructs = []
var chunks = []
var stack = []
var effects = {
consume: consume,
enter: enter,
exit: exit,
attempt: constructFactory(onsuccessfulconstruct),
check: constructFactory(onsuccessfulcheck),
interrupt: constructFactory(onsuccessfulcheck, {
interrupt: true
}),
lazy: constructFactory(onsuccessfulcheck, {
lazy: true
})
} // State and tools for resolving and serializing.
var context = {
previous: null,
events: [],
parser: parser,
sliceStream: sliceStream,
sliceSerialize: sliceSerialize,
now: now,
defineSkip: skip,
write: write
} // The state function.
var state = initialize.tokenize.call(context, effects) // Track which character we expect to be consumed, to catch bugs.
if (initialize.resolveAll) {
resolveAllConstructs.push(initialize)
} // Store where we are in the input stream.
point._index = 0
point._bufferIndex = -1
return context
function write(slice) {
chunks = chunkedPush(chunks, slice)
main() // Exit if were not done, resolve might change stuff.
if (chunks[chunks.length - 1] !== null) {
return []
}
addResult(initialize, 0) // Otherwise, resolve, and exit.
context.events = resolveAll(resolveAllConstructs, context.events, context)
return context.events
} //
// Tools.
//
function sliceSerialize(token) {
return serializeChunks(sliceStream(token))
}
function sliceStream(token) {
return sliceChunks(chunks, token)
}
function now() {
return shallow(point)
}
function skip(value) {
columnStart[value.line] = value.column
accountForPotentialSkip()
} //
// State management.
//
// Main loop (note that `_index` and `_bufferIndex` in `point` are modified by
// `consume`).
// Here is where we walk through the chunks, which either include strings of
// several characters, or numerical character codes.
// The reason to do this in a loop instead of a call is so the stack can
// drain.
function main() {
var chunkIndex
var chunk
while (point._index < chunks.length) {
chunk = chunks[point._index] // If were in a buffer chunk, loop through it.
if (typeof chunk === 'string') {
chunkIndex = point._index
if (point._bufferIndex < 0) {
point._bufferIndex = 0
}
while (
point._index === chunkIndex &&
point._bufferIndex < chunk.length
) {
go(chunk.charCodeAt(point._bufferIndex))
}
} else {
go(chunk)
}
}
} // Deal with one code.
function go(code) {
state = state(code)
} // Move a character forward.
function consume(code) {
if (markdownLineEnding(code)) {
point.line++
point.column = 1
point.offset += code === -3 ? 2 : 1
accountForPotentialSkip()
} else if (code !== -1) {
point.column++
point.offset++
} // Not in a string chunk.
if (point._bufferIndex < 0) {
point._index++
} else {
point._bufferIndex++ // At end of string chunk.
if (point._bufferIndex === chunks[point._index].length) {
point._bufferIndex = -1
point._index++
}
} // Expose the previous character.
context.previous = code // Mark as consumed.
} // Start a token.
function enter(type, fields) {
var token = fields || {}
token.type = type
token.start = now()
context.events.push(['enter', token, context])
stack.push(token)
return token
} // Stop a token.
function exit(type) {
var token = stack.pop()
token.end = now()
context.events.push(['exit', token, context])
return token
} // Use results.
function onsuccessfulconstruct(construct, info) {
addResult(construct, info.from)
} // Discard results.
function onsuccessfulcheck(construct, info) {
info.restore()
} // Factory to attempt/check/interrupt.
function constructFactory(onreturn, fields) {
return hook // Handle either an object mapping codes to constructs, a list of
// constructs, or a single construct.
function hook(constructs, returnState, bogusState) {
var listOfConstructs
var constructIndex
var currentConstruct
var info
return constructs.tokenize || 'length' in constructs
? handleListOfConstructs(miniflat(constructs))
: handleMapOfConstructs
function handleMapOfConstructs(code) {
if (code in constructs || null in constructs) {
return handleListOfConstructs(
constructs.null
? /* c8 ignore next */
miniflat(constructs[code]).concat(miniflat(constructs.null))
: constructs[code]
)(code)
}
return bogusState(code)
}
function handleListOfConstructs(list) {
listOfConstructs = list
constructIndex = 0
return handleConstruct(list[constructIndex])
}
function handleConstruct(construct) {
return start
function start(code) {
// To do: not nede to store if there is no bogus state, probably?
// Currently doesnt work because `inspect` in document does a check
// w/o a bogus, which doesnt make sense. But it does seem to help perf
// by not storing.
info = store()
currentConstruct = construct
if (!construct.partial) {
context.currentConstruct = construct
}
if (
construct.name &&
context.parser.constructs.disable.null.indexOf(construct.name) > -1
) {
return nok()
}
return construct.tokenize.call(
fields ? assign({}, context, fields) : context,
effects,
ok,
nok
)(code)
}
}
function ok(code) {
onreturn(currentConstruct, info)
return returnState
}
function nok(code) {
info.restore()
if (++constructIndex < listOfConstructs.length) {
return handleConstruct(listOfConstructs[constructIndex])
}
return bogusState
}
}
}
function addResult(construct, from) {
if (construct.resolveAll && resolveAllConstructs.indexOf(construct) < 0) {
resolveAllConstructs.push(construct)
}
if (construct.resolve) {
chunkedSplice(
context.events,
from,
context.events.length - from,
construct.resolve(context.events.slice(from), context)
)
}
if (construct.resolveTo) {
context.events = construct.resolveTo(context.events, context)
}
}
function store() {
var startPoint = now()
var startPrevious = context.previous
var startCurrentConstruct = context.currentConstruct
var startEventsIndex = context.events.length
var startStack = Array.from(stack)
return {
restore: restore,
from: startEventsIndex
}
function restore() {
point = startPoint
context.previous = startPrevious
context.currentConstruct = startCurrentConstruct
context.events.length = startEventsIndex
stack = startStack
accountForPotentialSkip()
}
}
function accountForPotentialSkip() {
if (point.line in columnStart && point.column < 2) {
point.column = columnStart[point.line]
point.offset += columnStart[point.line] - 1
}
}
}
module.exports = createTokenizer