This commit is contained in:
451
node_modules/parse-entities/index.js
generated
vendored
Normal file
451
node_modules/parse-entities/index.js
generated
vendored
Normal file
@@ -0,0 +1,451 @@
|
||||
'use strict'
|
||||
|
||||
var legacy = require('character-entities-legacy')
|
||||
var invalid = require('character-reference-invalid')
|
||||
var decimal = require('is-decimal')
|
||||
var hexadecimal = require('is-hexadecimal')
|
||||
var alphanumerical = require('is-alphanumerical')
|
||||
var decodeEntity = require('./decode-entity')
|
||||
|
||||
module.exports = parseEntities
|
||||
|
||||
var own = {}.hasOwnProperty
|
||||
var fromCharCode = String.fromCharCode
|
||||
var noop = Function.prototype
|
||||
|
||||
// Default settings.
|
||||
var defaults = {
|
||||
warning: null,
|
||||
reference: null,
|
||||
text: null,
|
||||
warningContext: null,
|
||||
referenceContext: null,
|
||||
textContext: null,
|
||||
position: {},
|
||||
additional: null,
|
||||
attribute: false,
|
||||
nonTerminated: true
|
||||
}
|
||||
|
||||
// Characters.
|
||||
var tab = 9 // '\t'
|
||||
var lineFeed = 10 // '\n'
|
||||
var formFeed = 12 // '\f'
|
||||
var space = 32 // ' '
|
||||
var ampersand = 38 // '&'
|
||||
var semicolon = 59 // ';'
|
||||
var lessThan = 60 // '<'
|
||||
var equalsTo = 61 // '='
|
||||
var numberSign = 35 // '#'
|
||||
var uppercaseX = 88 // 'X'
|
||||
var lowercaseX = 120 // 'x'
|
||||
var replacementCharacter = 65533 // '<27>'
|
||||
|
||||
// Reference types.
|
||||
var name = 'named'
|
||||
var hexa = 'hexadecimal'
|
||||
var deci = 'decimal'
|
||||
|
||||
// Map of bases.
|
||||
var bases = {}
|
||||
|
||||
bases[hexa] = 16
|
||||
bases[deci] = 10
|
||||
|
||||
// Map of types to tests.
|
||||
// Each type of character reference accepts different characters.
|
||||
// This test is used to detect whether a reference has ended (as the semicolon
|
||||
// is not strictly needed).
|
||||
var tests = {}
|
||||
|
||||
tests[name] = alphanumerical
|
||||
tests[deci] = decimal
|
||||
tests[hexa] = hexadecimal
|
||||
|
||||
// Warning types.
|
||||
var namedNotTerminated = 1
|
||||
var numericNotTerminated = 2
|
||||
var namedEmpty = 3
|
||||
var numericEmpty = 4
|
||||
var namedUnknown = 5
|
||||
var numericDisallowed = 6
|
||||
var numericProhibited = 7
|
||||
|
||||
// Warning messages.
|
||||
var messages = {}
|
||||
|
||||
messages[namedNotTerminated] =
|
||||
'Named character references must be terminated by a semicolon'
|
||||
messages[numericNotTerminated] =
|
||||
'Numeric character references must be terminated by a semicolon'
|
||||
messages[namedEmpty] = 'Named character references cannot be empty'
|
||||
messages[numericEmpty] = 'Numeric character references cannot be empty'
|
||||
messages[namedUnknown] = 'Named character references must be known'
|
||||
messages[numericDisallowed] =
|
||||
'Numeric character references cannot be disallowed'
|
||||
messages[numericProhibited] =
|
||||
'Numeric character references cannot be outside the permissible Unicode range'
|
||||
|
||||
// Wrap to ensure clean parameters are given to `parse`.
|
||||
function parseEntities(value, options) {
|
||||
var settings = {}
|
||||
var option
|
||||
var key
|
||||
|
||||
if (!options) {
|
||||
options = {}
|
||||
}
|
||||
|
||||
for (key in defaults) {
|
||||
option = options[key]
|
||||
settings[key] =
|
||||
option === null || option === undefined ? defaults[key] : option
|
||||
}
|
||||
|
||||
if (settings.position.indent || settings.position.start) {
|
||||
settings.indent = settings.position.indent || []
|
||||
settings.position = settings.position.start
|
||||
}
|
||||
|
||||
return parse(value, settings)
|
||||
}
|
||||
|
||||
// Parse entities.
|
||||
// eslint-disable-next-line complexity
|
||||
function parse(value, settings) {
|
||||
var additional = settings.additional
|
||||
var nonTerminated = settings.nonTerminated
|
||||
var handleText = settings.text
|
||||
var handleReference = settings.reference
|
||||
var handleWarning = settings.warning
|
||||
var textContext = settings.textContext
|
||||
var referenceContext = settings.referenceContext
|
||||
var warningContext = settings.warningContext
|
||||
var pos = settings.position
|
||||
var indent = settings.indent || []
|
||||
var length = value.length
|
||||
var index = 0
|
||||
var lines = -1
|
||||
var column = pos.column || 1
|
||||
var line = pos.line || 1
|
||||
var queue = ''
|
||||
var result = []
|
||||
var entityCharacters
|
||||
var namedEntity
|
||||
var terminated
|
||||
var characters
|
||||
var character
|
||||
var reference
|
||||
var following
|
||||
var warning
|
||||
var reason
|
||||
var output
|
||||
var entity
|
||||
var begin
|
||||
var start
|
||||
var type
|
||||
var test
|
||||
var prev
|
||||
var next
|
||||
var diff
|
||||
var end
|
||||
|
||||
if (typeof additional === 'string') {
|
||||
additional = additional.charCodeAt(0)
|
||||
}
|
||||
|
||||
// Cache the current point.
|
||||
prev = now()
|
||||
|
||||
// Wrap `handleWarning`.
|
||||
warning = handleWarning ? parseError : noop
|
||||
|
||||
// Ensure the algorithm walks over the first character and the end
|
||||
// (inclusive).
|
||||
index--
|
||||
length++
|
||||
|
||||
while (++index < length) {
|
||||
// If the previous character was a newline.
|
||||
if (character === lineFeed) {
|
||||
column = indent[lines] || 1
|
||||
}
|
||||
|
||||
character = value.charCodeAt(index)
|
||||
|
||||
if (character === ampersand) {
|
||||
following = value.charCodeAt(index + 1)
|
||||
|
||||
// The behaviour depends on the identity of the next character.
|
||||
if (
|
||||
following === tab ||
|
||||
following === lineFeed ||
|
||||
following === formFeed ||
|
||||
following === space ||
|
||||
following === ampersand ||
|
||||
following === lessThan ||
|
||||
following !== following ||
|
||||
(additional && following === additional)
|
||||
) {
|
||||
// Not a character reference.
|
||||
// No characters are consumed, and nothing is returned.
|
||||
// This is not an error, either.
|
||||
queue += fromCharCode(character)
|
||||
column++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
start = index + 1
|
||||
begin = start
|
||||
end = start
|
||||
|
||||
if (following === numberSign) {
|
||||
// Numerical entity.
|
||||
end = ++begin
|
||||
|
||||
// The behaviour further depends on the next character.
|
||||
following = value.charCodeAt(end)
|
||||
|
||||
if (following === uppercaseX || following === lowercaseX) {
|
||||
// ASCII hex digits.
|
||||
type = hexa
|
||||
end = ++begin
|
||||
} else {
|
||||
// ASCII digits.
|
||||
type = deci
|
||||
}
|
||||
} else {
|
||||
// Named entity.
|
||||
type = name
|
||||
}
|
||||
|
||||
entityCharacters = ''
|
||||
entity = ''
|
||||
characters = ''
|
||||
test = tests[type]
|
||||
end--
|
||||
|
||||
while (++end < length) {
|
||||
following = value.charCodeAt(end)
|
||||
|
||||
if (!test(following)) {
|
||||
break
|
||||
}
|
||||
|
||||
characters += fromCharCode(following)
|
||||
|
||||
// Check if we can match a legacy named reference.
|
||||
// If so, we cache that as the last viable named reference.
|
||||
// This ensures we do not need to walk backwards later.
|
||||
if (type === name && own.call(legacy, characters)) {
|
||||
entityCharacters = characters
|
||||
entity = legacy[characters]
|
||||
}
|
||||
}
|
||||
|
||||
terminated = value.charCodeAt(end) === semicolon
|
||||
|
||||
if (terminated) {
|
||||
end++
|
||||
|
||||
namedEntity = type === name ? decodeEntity(characters) : false
|
||||
|
||||
if (namedEntity) {
|
||||
entityCharacters = characters
|
||||
entity = namedEntity
|
||||
}
|
||||
}
|
||||
|
||||
diff = 1 + end - start
|
||||
|
||||
if (!terminated && !nonTerminated) {
|
||||
// Empty.
|
||||
} else if (!characters) {
|
||||
// An empty (possible) entity is valid, unless it’s numeric (thus an
|
||||
// ampersand followed by an octothorp).
|
||||
if (type !== name) {
|
||||
warning(numericEmpty, diff)
|
||||
}
|
||||
} else if (type === name) {
|
||||
// An ampersand followed by anything unknown, and not terminated, is
|
||||
// invalid.
|
||||
if (terminated && !entity) {
|
||||
warning(namedUnknown, 1)
|
||||
} else {
|
||||
// If theres something after an entity name which is not known, cap
|
||||
// the reference.
|
||||
if (entityCharacters !== characters) {
|
||||
end = begin + entityCharacters.length
|
||||
diff = 1 + end - begin
|
||||
terminated = false
|
||||
}
|
||||
|
||||
// If the reference is not terminated, warn.
|
||||
if (!terminated) {
|
||||
reason = entityCharacters ? namedNotTerminated : namedEmpty
|
||||
|
||||
if (settings.attribute) {
|
||||
following = value.charCodeAt(end)
|
||||
|
||||
if (following === equalsTo) {
|
||||
warning(reason, diff)
|
||||
entity = null
|
||||
} else if (alphanumerical(following)) {
|
||||
entity = null
|
||||
} else {
|
||||
warning(reason, diff)
|
||||
}
|
||||
} else {
|
||||
warning(reason, diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reference = entity
|
||||
} else {
|
||||
if (!terminated) {
|
||||
// All non-terminated numeric entities are not rendered, and trigger a
|
||||
// warning.
|
||||
warning(numericNotTerminated, diff)
|
||||
}
|
||||
|
||||
// When terminated and number, parse as either hexadecimal or decimal.
|
||||
reference = parseInt(characters, bases[type])
|
||||
|
||||
// Trigger a warning when the parsed number is prohibited, and replace
|
||||
// with replacement character.
|
||||
if (prohibited(reference)) {
|
||||
warning(numericProhibited, diff)
|
||||
reference = fromCharCode(replacementCharacter)
|
||||
} else if (reference in invalid) {
|
||||
// Trigger a warning when the parsed number is disallowed, and replace
|
||||
// by an alternative.
|
||||
warning(numericDisallowed, diff)
|
||||
reference = invalid[reference]
|
||||
} else {
|
||||
// Parse the number.
|
||||
output = ''
|
||||
|
||||
// Trigger a warning when the parsed number should not be used.
|
||||
if (disallowed(reference)) {
|
||||
warning(numericDisallowed, diff)
|
||||
}
|
||||
|
||||
// Stringify the number.
|
||||
if (reference > 0xffff) {
|
||||
reference -= 0x10000
|
||||
output += fromCharCode((reference >>> (10 & 0x3ff)) | 0xd800)
|
||||
reference = 0xdc00 | (reference & 0x3ff)
|
||||
}
|
||||
|
||||
reference = output + fromCharCode(reference)
|
||||
}
|
||||
}
|
||||
|
||||
// Found it!
|
||||
// First eat the queued characters as normal text, then eat an entity.
|
||||
if (reference) {
|
||||
flush()
|
||||
|
||||
prev = now()
|
||||
index = end - 1
|
||||
column += end - start + 1
|
||||
result.push(reference)
|
||||
next = now()
|
||||
next.offset++
|
||||
|
||||
if (handleReference) {
|
||||
handleReference.call(
|
||||
referenceContext,
|
||||
reference,
|
||||
{start: prev, end: next},
|
||||
value.slice(start - 1, end)
|
||||
)
|
||||
}
|
||||
|
||||
prev = next
|
||||
} else {
|
||||
// If we could not find a reference, queue the checked characters (as
|
||||
// normal characters), and move the pointer to their end.
|
||||
// This is possible because we can be certain neither newlines nor
|
||||
// ampersands are included.
|
||||
characters = value.slice(start - 1, end)
|
||||
queue += characters
|
||||
column += characters.length
|
||||
index = end - 1
|
||||
}
|
||||
} else {
|
||||
// Handle anything other than an ampersand, including newlines and EOF.
|
||||
if (
|
||||
character === 10 // Line feed
|
||||
) {
|
||||
line++
|
||||
lines++
|
||||
column = 0
|
||||
}
|
||||
|
||||
if (character === character) {
|
||||
queue += fromCharCode(character)
|
||||
column++
|
||||
} else {
|
||||
flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return the reduced nodes.
|
||||
return result.join('')
|
||||
|
||||
// Get current position.
|
||||
function now() {
|
||||
return {
|
||||
line: line,
|
||||
column: column,
|
||||
offset: index + (pos.offset || 0)
|
||||
}
|
||||
}
|
||||
|
||||
// “Throw” a parse-error: a warning.
|
||||
function parseError(code, offset) {
|
||||
var position = now()
|
||||
|
||||
position.column += offset
|
||||
position.offset += offset
|
||||
|
||||
handleWarning.call(warningContext, messages[code], position, code)
|
||||
}
|
||||
|
||||
// Flush `queue` (normal text).
|
||||
// Macro invoked before each entity and at the end of `value`.
|
||||
// Does nothing when `queue` is empty.
|
||||
function flush() {
|
||||
if (queue) {
|
||||
result.push(queue)
|
||||
|
||||
if (handleText) {
|
||||
handleText.call(textContext, queue, {start: prev, end: now()})
|
||||
}
|
||||
|
||||
queue = ''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if `character` is outside the permissible unicode range.
|
||||
function prohibited(code) {
|
||||
return (code >= 0xd800 && code <= 0xdfff) || code > 0x10ffff
|
||||
}
|
||||
|
||||
// Check if `character` is disallowed.
|
||||
function disallowed(code) {
|
||||
return (
|
||||
(code >= 0x0001 && code <= 0x0008) ||
|
||||
code === 0x000b ||
|
||||
(code >= 0x000d && code <= 0x001f) ||
|
||||
(code >= 0x007f && code <= 0x009f) ||
|
||||
(code >= 0xfdd0 && code <= 0xfdef) ||
|
||||
(code & 0xffff) === 0xffff ||
|
||||
(code & 0xffff) === 0xfffe
|
||||
)
|
||||
}
|
||||
Reference in New Issue
Block a user