1399 lines
34 KiB
JavaScript
1399 lines
34 KiB
JavaScript
/**
|
|
* @licstart The following is the entire license notice for the
|
|
* JavaScript code in this page
|
|
*
|
|
* Copyright 2022 Mozilla Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
* @licend The above is the entire license notice for the
|
|
* JavaScript code in this page
|
|
*/
|
|
"use strict";
|
|
|
|
Object.defineProperty(exports, "__esModule", {
|
|
value: true
|
|
});
|
|
exports.Parser = exports.Linearization = exports.Lexer = void 0;
|
|
|
|
var _util = require("../shared/util.js");
|
|
|
|
var _primitives = require("./primitives.js");
|
|
|
|
var _core_utils = require("./core_utils.js");
|
|
|
|
var _ascii_85_stream = require("./ascii_85_stream.js");
|
|
|
|
var _ascii_hex_stream = require("./ascii_hex_stream.js");
|
|
|
|
var _ccitt_stream = require("./ccitt_stream.js");
|
|
|
|
var _flate_stream = require("./flate_stream.js");
|
|
|
|
var _jbig2_stream = require("./jbig2_stream.js");
|
|
|
|
var _jpeg_stream = require("./jpeg_stream.js");
|
|
|
|
var _jpx_stream = require("./jpx_stream.js");
|
|
|
|
var _lzw_stream = require("./lzw_stream.js");
|
|
|
|
var _stream = require("./stream.js");
|
|
|
|
var _predictor_stream = require("./predictor_stream.js");
|
|
|
|
var _run_length_stream = require("./run_length_stream.js");
|
|
|
|
const MAX_LENGTH_TO_CACHE = 1000;
|
|
const MAX_ADLER32_LENGTH = 5552;
|
|
|
|
function computeAdler32(bytes) {
|
|
const bytesLength = bytes.length;
|
|
let a = 1,
|
|
b = 0;
|
|
|
|
for (let i = 0; i < bytesLength; ++i) {
|
|
a += bytes[i] & 0xff;
|
|
b += a;
|
|
}
|
|
|
|
return b % 65521 << 16 | a % 65521;
|
|
}
|
|
|
|
class Parser {
|
|
constructor({
|
|
lexer,
|
|
xref,
|
|
allowStreams = false,
|
|
recoveryMode = false
|
|
}) {
|
|
this.lexer = lexer;
|
|
this.xref = xref;
|
|
this.allowStreams = allowStreams;
|
|
this.recoveryMode = recoveryMode;
|
|
this.imageCache = Object.create(null);
|
|
this.refill();
|
|
}
|
|
|
|
refill() {
|
|
this.buf1 = this.lexer.getObj();
|
|
this.buf2 = this.lexer.getObj();
|
|
}
|
|
|
|
shift() {
|
|
if (this.buf2 instanceof _primitives.Cmd && this.buf2.cmd === "ID") {
|
|
this.buf1 = this.buf2;
|
|
this.buf2 = null;
|
|
} else {
|
|
this.buf1 = this.buf2;
|
|
this.buf2 = this.lexer.getObj();
|
|
}
|
|
}
|
|
|
|
tryShift() {
|
|
try {
|
|
this.shift();
|
|
return true;
|
|
} catch (e) {
|
|
if (e instanceof _core_utils.MissingDataException) {
|
|
throw e;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
getObj(cipherTransform = null) {
|
|
const buf1 = this.buf1;
|
|
this.shift();
|
|
|
|
if (buf1 instanceof _primitives.Cmd) {
|
|
switch (buf1.cmd) {
|
|
case "BI":
|
|
return this.makeInlineImage(cipherTransform);
|
|
|
|
case "[":
|
|
const array = [];
|
|
|
|
while (!(0, _primitives.isCmd)(this.buf1, "]") && this.buf1 !== _primitives.EOF) {
|
|
array.push(this.getObj(cipherTransform));
|
|
}
|
|
|
|
if (this.buf1 === _primitives.EOF) {
|
|
if (this.recoveryMode) {
|
|
return array;
|
|
}
|
|
|
|
throw new _core_utils.ParserEOFException("End of file inside array.");
|
|
}
|
|
|
|
this.shift();
|
|
return array;
|
|
|
|
case "<<":
|
|
const dict = new _primitives.Dict(this.xref);
|
|
|
|
while (!(0, _primitives.isCmd)(this.buf1, ">>") && this.buf1 !== _primitives.EOF) {
|
|
if (!(this.buf1 instanceof _primitives.Name)) {
|
|
(0, _util.info)("Malformed dictionary: key must be a name object");
|
|
this.shift();
|
|
continue;
|
|
}
|
|
|
|
const key = this.buf1.name;
|
|
this.shift();
|
|
|
|
if (this.buf1 === _primitives.EOF) {
|
|
break;
|
|
}
|
|
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
}
|
|
|
|
if (this.buf1 === _primitives.EOF) {
|
|
if (this.recoveryMode) {
|
|
return dict;
|
|
}
|
|
|
|
throw new _core_utils.ParserEOFException("End of file inside dictionary.");
|
|
}
|
|
|
|
if ((0, _primitives.isCmd)(this.buf2, "stream")) {
|
|
return this.allowStreams ? this.makeStream(dict, cipherTransform) : dict;
|
|
}
|
|
|
|
this.shift();
|
|
return dict;
|
|
|
|
default:
|
|
return buf1;
|
|
}
|
|
}
|
|
|
|
if (Number.isInteger(buf1)) {
|
|
if (Number.isInteger(this.buf1) && (0, _primitives.isCmd)(this.buf2, "R")) {
|
|
const ref = _primitives.Ref.get(buf1, this.buf1);
|
|
|
|
this.shift();
|
|
this.shift();
|
|
return ref;
|
|
}
|
|
|
|
return buf1;
|
|
}
|
|
|
|
if (typeof buf1 === "string") {
|
|
if (cipherTransform) {
|
|
return cipherTransform.decryptString(buf1);
|
|
}
|
|
|
|
return buf1;
|
|
}
|
|
|
|
return buf1;
|
|
}
|
|
|
|
findDefaultInlineStreamEnd(stream) {
|
|
const E = 0x45,
|
|
I = 0x49,
|
|
SPACE = 0x20,
|
|
LF = 0xa,
|
|
CR = 0xd,
|
|
NUL = 0x0;
|
|
const lexer = this.lexer,
|
|
startPos = stream.pos,
|
|
n = 10;
|
|
let state = 0,
|
|
ch,
|
|
maybeEIPos;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (state === 0) {
|
|
state = ch === E ? 1 : 0;
|
|
} else if (state === 1) {
|
|
state = ch === I ? 2 : 0;
|
|
} else {
|
|
(0, _util.assert)(state === 2, "findDefaultInlineStreamEnd - invalid state.");
|
|
|
|
if (ch === SPACE || ch === LF || ch === CR) {
|
|
maybeEIPos = stream.pos;
|
|
const followingBytes = stream.peekBytes(n);
|
|
|
|
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
|
|
ch = followingBytes[i];
|
|
|
|
if (ch === NUL && followingBytes[i + 1] !== NUL) {
|
|
continue;
|
|
}
|
|
|
|
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7f)) {
|
|
state = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (state !== 2) {
|
|
continue;
|
|
}
|
|
|
|
if (lexer.knownCommands) {
|
|
const nextObj = lexer.peekObj();
|
|
|
|
if (nextObj instanceof _primitives.Cmd && !lexer.knownCommands[nextObj.cmd]) {
|
|
state = 0;
|
|
}
|
|
} else {
|
|
(0, _util.warn)("findDefaultInlineStreamEnd - `lexer.knownCommands` is undefined.");
|
|
}
|
|
|
|
if (state === 2) {
|
|
break;
|
|
}
|
|
} else {
|
|
state = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ch === -1) {
|
|
(0, _util.warn)("findDefaultInlineStreamEnd: " + "Reached the end of the stream without finding a valid EI marker");
|
|
|
|
if (maybeEIPos) {
|
|
(0, _util.warn)('... trying to recover by using the last "EI" occurrence.');
|
|
stream.skip(-(stream.pos - maybeEIPos));
|
|
}
|
|
}
|
|
|
|
let endOffset = 4;
|
|
stream.skip(-endOffset);
|
|
ch = stream.peekByte();
|
|
stream.skip(endOffset);
|
|
|
|
if (!(0, _core_utils.isWhiteSpace)(ch)) {
|
|
endOffset--;
|
|
}
|
|
|
|
return stream.pos - endOffset - startPos;
|
|
}
|
|
|
|
findDCTDecodeInlineStreamEnd(stream) {
|
|
const startPos = stream.pos;
|
|
let foundEOI = false,
|
|
b,
|
|
markerLength;
|
|
|
|
while ((b = stream.getByte()) !== -1) {
|
|
if (b !== 0xff) {
|
|
continue;
|
|
}
|
|
|
|
switch (stream.getByte()) {
|
|
case 0x00:
|
|
break;
|
|
|
|
case 0xff:
|
|
stream.skip(-1);
|
|
break;
|
|
|
|
case 0xd9:
|
|
foundEOI = true;
|
|
break;
|
|
|
|
case 0xc0:
|
|
case 0xc1:
|
|
case 0xc2:
|
|
case 0xc3:
|
|
case 0xc5:
|
|
case 0xc6:
|
|
case 0xc7:
|
|
case 0xc9:
|
|
case 0xca:
|
|
case 0xcb:
|
|
case 0xcd:
|
|
case 0xce:
|
|
case 0xcf:
|
|
case 0xc4:
|
|
case 0xcc:
|
|
case 0xda:
|
|
case 0xdb:
|
|
case 0xdc:
|
|
case 0xdd:
|
|
case 0xde:
|
|
case 0xdf:
|
|
case 0xe0:
|
|
case 0xe1:
|
|
case 0xe2:
|
|
case 0xe3:
|
|
case 0xe4:
|
|
case 0xe5:
|
|
case 0xe6:
|
|
case 0xe7:
|
|
case 0xe8:
|
|
case 0xe9:
|
|
case 0xea:
|
|
case 0xeb:
|
|
case 0xec:
|
|
case 0xed:
|
|
case 0xee:
|
|
case 0xef:
|
|
case 0xfe:
|
|
markerLength = stream.getUint16();
|
|
|
|
if (markerLength > 2) {
|
|
stream.skip(markerLength - 2);
|
|
} else {
|
|
stream.skip(-2);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (foundEOI) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
const length = stream.pos - startPos;
|
|
|
|
if (b === -1) {
|
|
(0, _util.warn)("Inline DCTDecode image stream: " + "EOI marker not found, searching for /EI/ instead.");
|
|
stream.skip(-length);
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
findASCII85DecodeInlineStreamEnd(stream) {
|
|
const TILDE = 0x7e,
|
|
GT = 0x3e;
|
|
const startPos = stream.pos;
|
|
let ch;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (ch === TILDE) {
|
|
const tildePos = stream.pos;
|
|
ch = stream.peekByte();
|
|
|
|
while ((0, _core_utils.isWhiteSpace)(ch)) {
|
|
stream.skip();
|
|
ch = stream.peekByte();
|
|
}
|
|
|
|
if (ch === GT) {
|
|
stream.skip();
|
|
break;
|
|
}
|
|
|
|
if (stream.pos > tildePos) {
|
|
const maybeEI = stream.peekBytes(2);
|
|
|
|
if (maybeEI[0] === 0x45 && maybeEI[1] === 0x49) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const length = stream.pos - startPos;
|
|
|
|
if (ch === -1) {
|
|
(0, _util.warn)("Inline ASCII85Decode image stream: " + "EOD marker not found, searching for /EI/ instead.");
|
|
stream.skip(-length);
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
findASCIIHexDecodeInlineStreamEnd(stream) {
|
|
const GT = 0x3e;
|
|
const startPos = stream.pos;
|
|
let ch;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (ch === GT) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
const length = stream.pos - startPos;
|
|
|
|
if (ch === -1) {
|
|
(0, _util.warn)("Inline ASCIIHexDecode image stream: " + "EOD marker not found, searching for /EI/ instead.");
|
|
stream.skip(-length);
|
|
return this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
|
|
this.inlineStreamSkipEI(stream);
|
|
return length;
|
|
}
|
|
|
|
inlineStreamSkipEI(stream) {
|
|
const E = 0x45,
|
|
I = 0x49;
|
|
let state = 0,
|
|
ch;
|
|
|
|
while ((ch = stream.getByte()) !== -1) {
|
|
if (state === 0) {
|
|
state = ch === E ? 1 : 0;
|
|
} else if (state === 1) {
|
|
state = ch === I ? 2 : 0;
|
|
} else if (state === 2) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
makeInlineImage(cipherTransform) {
|
|
const lexer = this.lexer;
|
|
const stream = lexer.stream;
|
|
const dict = new _primitives.Dict(this.xref);
|
|
let dictLength;
|
|
|
|
while (!(0, _primitives.isCmd)(this.buf1, "ID") && this.buf1 !== _primitives.EOF) {
|
|
if (!(this.buf1 instanceof _primitives.Name)) {
|
|
throw new _util.FormatError("Dictionary key must be a name object");
|
|
}
|
|
|
|
const key = this.buf1.name;
|
|
this.shift();
|
|
|
|
if (this.buf1 === _primitives.EOF) {
|
|
break;
|
|
}
|
|
|
|
dict.set(key, this.getObj(cipherTransform));
|
|
}
|
|
|
|
if (lexer.beginInlineImagePos !== -1) {
|
|
dictLength = stream.pos - lexer.beginInlineImagePos;
|
|
}
|
|
|
|
const filter = dict.get("F", "Filter");
|
|
let filterName;
|
|
|
|
if (filter instanceof _primitives.Name) {
|
|
filterName = filter.name;
|
|
} else if (Array.isArray(filter)) {
|
|
const filterZero = this.xref.fetchIfRef(filter[0]);
|
|
|
|
if (filterZero instanceof _primitives.Name) {
|
|
filterName = filterZero.name;
|
|
}
|
|
}
|
|
|
|
const startPos = stream.pos;
|
|
let length;
|
|
|
|
switch (filterName) {
|
|
case "DCT":
|
|
case "DCTDecode":
|
|
length = this.findDCTDecodeInlineStreamEnd(stream);
|
|
break;
|
|
|
|
case "A85":
|
|
case "ASCII85Decode":
|
|
length = this.findASCII85DecodeInlineStreamEnd(stream);
|
|
break;
|
|
|
|
case "AHx":
|
|
case "ASCIIHexDecode":
|
|
length = this.findASCIIHexDecodeInlineStreamEnd(stream);
|
|
break;
|
|
|
|
default:
|
|
length = this.findDefaultInlineStreamEnd(stream);
|
|
}
|
|
|
|
let imageStream = stream.makeSubStream(startPos, length, dict);
|
|
let cacheKey;
|
|
|
|
if (length < MAX_LENGTH_TO_CACHE && dictLength < MAX_ADLER32_LENGTH) {
|
|
const imageBytes = imageStream.getBytes();
|
|
imageStream.reset();
|
|
const initialStreamPos = stream.pos;
|
|
stream.pos = lexer.beginInlineImagePos;
|
|
const dictBytes = stream.getBytes(dictLength);
|
|
stream.pos = initialStreamPos;
|
|
cacheKey = computeAdler32(imageBytes) + "_" + computeAdler32(dictBytes);
|
|
const cacheEntry = this.imageCache[cacheKey];
|
|
|
|
if (cacheEntry !== undefined) {
|
|
this.buf2 = _primitives.Cmd.get("EI");
|
|
this.shift();
|
|
cacheEntry.reset();
|
|
return cacheEntry;
|
|
}
|
|
}
|
|
|
|
if (cipherTransform) {
|
|
imageStream = cipherTransform.createStream(imageStream, length);
|
|
}
|
|
|
|
imageStream = this.filter(imageStream, dict, length);
|
|
imageStream.dict = dict;
|
|
|
|
if (cacheKey !== undefined) {
|
|
imageStream.cacheKey = `inline_${length}_${cacheKey}`;
|
|
this.imageCache[cacheKey] = imageStream;
|
|
}
|
|
|
|
this.buf2 = _primitives.Cmd.get("EI");
|
|
this.shift();
|
|
return imageStream;
|
|
}
|
|
|
|
_findStreamLength(startPos, signature) {
|
|
const {
|
|
stream
|
|
} = this.lexer;
|
|
stream.pos = startPos;
|
|
const SCAN_BLOCK_LENGTH = 2048;
|
|
const signatureLength = signature.length;
|
|
|
|
while (stream.pos < stream.end) {
|
|
const scanBytes = stream.peekBytes(SCAN_BLOCK_LENGTH);
|
|
const scanLength = scanBytes.length - signatureLength;
|
|
|
|
if (scanLength <= 0) {
|
|
break;
|
|
}
|
|
|
|
let pos = 0;
|
|
|
|
while (pos < scanLength) {
|
|
let j = 0;
|
|
|
|
while (j < signatureLength && scanBytes[pos + j] === signature[j]) {
|
|
j++;
|
|
}
|
|
|
|
if (j >= signatureLength) {
|
|
stream.pos += pos;
|
|
return stream.pos - startPos;
|
|
}
|
|
|
|
pos++;
|
|
}
|
|
|
|
stream.pos += scanLength;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
makeStream(dict, cipherTransform) {
|
|
const lexer = this.lexer;
|
|
let stream = lexer.stream;
|
|
lexer.skipToNextLine();
|
|
const startPos = stream.pos - 1;
|
|
let length = dict.get("Length");
|
|
|
|
if (!Number.isInteger(length)) {
|
|
(0, _util.info)(`Bad length "${length && length.toString()}" in stream.`);
|
|
length = 0;
|
|
}
|
|
|
|
stream.pos = startPos + length;
|
|
lexer.nextChar();
|
|
|
|
if (this.tryShift() && (0, _primitives.isCmd)(this.buf2, "endstream")) {
|
|
this.shift();
|
|
} else {
|
|
const ENDSTREAM_SIGNATURE = new Uint8Array([0x65, 0x6e, 0x64, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d]);
|
|
|
|
let actualLength = this._findStreamLength(startPos, ENDSTREAM_SIGNATURE);
|
|
|
|
if (actualLength < 0) {
|
|
const MAX_TRUNCATION = 1;
|
|
|
|
for (let i = 1; i <= MAX_TRUNCATION; i++) {
|
|
const end = ENDSTREAM_SIGNATURE.length - i;
|
|
const TRUNCATED_SIGNATURE = ENDSTREAM_SIGNATURE.slice(0, end);
|
|
|
|
const maybeLength = this._findStreamLength(startPos, TRUNCATED_SIGNATURE);
|
|
|
|
if (maybeLength >= 0) {
|
|
const lastByte = stream.peekBytes(end + 1)[end];
|
|
|
|
if (!(0, _core_utils.isWhiteSpace)(lastByte)) {
|
|
break;
|
|
}
|
|
|
|
(0, _util.info)(`Found "${(0, _util.bytesToString)(TRUNCATED_SIGNATURE)}" when ` + "searching for endstream command.");
|
|
actualLength = maybeLength;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (actualLength < 0) {
|
|
throw new _util.FormatError("Missing endstream command.");
|
|
}
|
|
}
|
|
|
|
length = actualLength;
|
|
lexer.nextChar();
|
|
this.shift();
|
|
this.shift();
|
|
}
|
|
|
|
this.shift();
|
|
stream = stream.makeSubStream(startPos, length, dict);
|
|
|
|
if (cipherTransform) {
|
|
stream = cipherTransform.createStream(stream, length);
|
|
}
|
|
|
|
stream = this.filter(stream, dict, length);
|
|
stream.dict = dict;
|
|
return stream;
|
|
}
|
|
|
|
filter(stream, dict, length) {
|
|
let filter = dict.get("F", "Filter");
|
|
let params = dict.get("DP", "DecodeParms");
|
|
|
|
if (filter instanceof _primitives.Name) {
|
|
if (Array.isArray(params)) {
|
|
(0, _util.warn)("/DecodeParms should not be an Array, when /Filter is a Name.");
|
|
}
|
|
|
|
return this.makeFilter(stream, filter.name, length, params);
|
|
}
|
|
|
|
let maybeLength = length;
|
|
|
|
if (Array.isArray(filter)) {
|
|
const filterArray = filter;
|
|
const paramsArray = params;
|
|
|
|
for (let i = 0, ii = filterArray.length; i < ii; ++i) {
|
|
filter = this.xref.fetchIfRef(filterArray[i]);
|
|
|
|
if (!(filter instanceof _primitives.Name)) {
|
|
throw new _util.FormatError(`Bad filter name "${filter}"`);
|
|
}
|
|
|
|
params = null;
|
|
|
|
if (Array.isArray(paramsArray) && i in paramsArray) {
|
|
params = this.xref.fetchIfRef(paramsArray[i]);
|
|
}
|
|
|
|
stream = this.makeFilter(stream, filter.name, maybeLength, params);
|
|
maybeLength = null;
|
|
}
|
|
}
|
|
|
|
return stream;
|
|
}
|
|
|
|
makeFilter(stream, name, maybeLength, params) {
|
|
if (maybeLength === 0) {
|
|
(0, _util.warn)(`Empty "${name}" stream.`);
|
|
return new _stream.NullStream();
|
|
}
|
|
|
|
const xrefStats = this.xref.stats;
|
|
|
|
try {
|
|
switch (name) {
|
|
case "Fl":
|
|
case "FlateDecode":
|
|
xrefStats.addStreamType(_util.StreamType.FLATE);
|
|
|
|
if (params) {
|
|
return new _predictor_stream.PredictorStream(new _flate_stream.FlateStream(stream, maybeLength), maybeLength, params);
|
|
}
|
|
|
|
return new _flate_stream.FlateStream(stream, maybeLength);
|
|
|
|
case "LZW":
|
|
case "LZWDecode":
|
|
xrefStats.addStreamType(_util.StreamType.LZW);
|
|
let earlyChange = 1;
|
|
|
|
if (params) {
|
|
if (params.has("EarlyChange")) {
|
|
earlyChange = params.get("EarlyChange");
|
|
}
|
|
|
|
return new _predictor_stream.PredictorStream(new _lzw_stream.LZWStream(stream, maybeLength, earlyChange), maybeLength, params);
|
|
}
|
|
|
|
return new _lzw_stream.LZWStream(stream, maybeLength, earlyChange);
|
|
|
|
case "DCT":
|
|
case "DCTDecode":
|
|
xrefStats.addStreamType(_util.StreamType.DCT);
|
|
return new _jpeg_stream.JpegStream(stream, maybeLength, params);
|
|
|
|
case "JPX":
|
|
case "JPXDecode":
|
|
xrefStats.addStreamType(_util.StreamType.JPX);
|
|
return new _jpx_stream.JpxStream(stream, maybeLength, params);
|
|
|
|
case "A85":
|
|
case "ASCII85Decode":
|
|
xrefStats.addStreamType(_util.StreamType.A85);
|
|
return new _ascii_85_stream.Ascii85Stream(stream, maybeLength);
|
|
|
|
case "AHx":
|
|
case "ASCIIHexDecode":
|
|
xrefStats.addStreamType(_util.StreamType.AHX);
|
|
return new _ascii_hex_stream.AsciiHexStream(stream, maybeLength);
|
|
|
|
case "CCF":
|
|
case "CCITTFaxDecode":
|
|
xrefStats.addStreamType(_util.StreamType.CCF);
|
|
return new _ccitt_stream.CCITTFaxStream(stream, maybeLength, params);
|
|
|
|
case "RL":
|
|
case "RunLengthDecode":
|
|
xrefStats.addStreamType(_util.StreamType.RLX);
|
|
return new _run_length_stream.RunLengthStream(stream, maybeLength);
|
|
|
|
case "JBIG2Decode":
|
|
xrefStats.addStreamType(_util.StreamType.JBIG);
|
|
return new _jbig2_stream.Jbig2Stream(stream, maybeLength, params);
|
|
}
|
|
|
|
(0, _util.warn)(`Filter "${name}" is not supported.`);
|
|
return stream;
|
|
} catch (ex) {
|
|
if (ex instanceof _core_utils.MissingDataException) {
|
|
throw ex;
|
|
}
|
|
|
|
(0, _util.warn)(`Invalid stream: "${ex}"`);
|
|
return new _stream.NullStream();
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
exports.Parser = Parser;
|
|
const specialChars = [1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
|
|
|
|
function toHexDigit(ch) {
|
|
if (ch >= 0x30 && ch <= 0x39) {
|
|
return ch & 0x0f;
|
|
}
|
|
|
|
if (ch >= 0x41 && ch <= 0x46 || ch >= 0x61 && ch <= 0x66) {
|
|
return (ch & 0x0f) + 9;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
class Lexer {
|
|
constructor(stream, knownCommands = null) {
|
|
this.stream = stream;
|
|
this.nextChar();
|
|
this.strBuf = [];
|
|
this.knownCommands = knownCommands;
|
|
this._hexStringNumWarn = 0;
|
|
this.beginInlineImagePos = -1;
|
|
}
|
|
|
|
nextChar() {
|
|
return this.currentChar = this.stream.getByte();
|
|
}
|
|
|
|
peekChar() {
|
|
return this.stream.peekByte();
|
|
}
|
|
|
|
getNumber() {
|
|
let ch = this.currentChar;
|
|
let eNotation = false;
|
|
let divideBy = 0;
|
|
let sign = 0;
|
|
|
|
if (ch === 0x2d) {
|
|
sign = -1;
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x2d) {
|
|
ch = this.nextChar();
|
|
}
|
|
} else if (ch === 0x2b) {
|
|
sign = 1;
|
|
ch = this.nextChar();
|
|
}
|
|
|
|
if (ch === 0x0a || ch === 0x0d) {
|
|
do {
|
|
ch = this.nextChar();
|
|
} while (ch === 0x0a || ch === 0x0d);
|
|
}
|
|
|
|
if (ch === 0x2e) {
|
|
divideBy = 10;
|
|
ch = this.nextChar();
|
|
}
|
|
|
|
if (ch < 0x30 || ch > 0x39) {
|
|
if ((0, _core_utils.isWhiteSpace)(ch) || ch === -1) {
|
|
if (divideBy === 10 && sign === 0) {
|
|
(0, _util.warn)("Lexer.getNumber - treating a single decimal point as zero.");
|
|
return 0;
|
|
}
|
|
|
|
if (divideBy === 0 && sign === -1) {
|
|
(0, _util.warn)("Lexer.getNumber - treating a single minus sign as zero.");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
throw new _util.FormatError(`Invalid number: ${String.fromCharCode(ch)} (charCode ${ch})`);
|
|
}
|
|
|
|
sign = sign || 1;
|
|
let baseValue = ch - 0x30;
|
|
let powerValue = 0;
|
|
let powerValueSign = 1;
|
|
|
|
while ((ch = this.nextChar()) >= 0) {
|
|
if (ch >= 0x30 && ch <= 0x39) {
|
|
const currentDigit = ch - 0x30;
|
|
|
|
if (eNotation) {
|
|
powerValue = powerValue * 10 + currentDigit;
|
|
} else {
|
|
if (divideBy !== 0) {
|
|
divideBy *= 10;
|
|
}
|
|
|
|
baseValue = baseValue * 10 + currentDigit;
|
|
}
|
|
} else if (ch === 0x2e) {
|
|
if (divideBy === 0) {
|
|
divideBy = 1;
|
|
} else {
|
|
break;
|
|
}
|
|
} else if (ch === 0x2d) {
|
|
(0, _util.warn)("Badly formatted number: minus sign in the middle");
|
|
} else if (ch === 0x45 || ch === 0x65) {
|
|
ch = this.peekChar();
|
|
|
|
if (ch === 0x2b || ch === 0x2d) {
|
|
powerValueSign = ch === 0x2d ? -1 : 1;
|
|
this.nextChar();
|
|
} else if (ch < 0x30 || ch > 0x39) {
|
|
break;
|
|
}
|
|
|
|
eNotation = true;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (divideBy !== 0) {
|
|
baseValue /= divideBy;
|
|
}
|
|
|
|
if (eNotation) {
|
|
baseValue *= 10 ** (powerValueSign * powerValue);
|
|
}
|
|
|
|
return sign * baseValue;
|
|
}
|
|
|
|
getString() {
|
|
let numParen = 1;
|
|
let done = false;
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
let ch = this.nextChar();
|
|
|
|
while (true) {
|
|
let charBuffered = false;
|
|
|
|
switch (ch | 0) {
|
|
case -1:
|
|
(0, _util.warn)("Unterminated string");
|
|
done = true;
|
|
break;
|
|
|
|
case 0x28:
|
|
++numParen;
|
|
strBuf.push("(");
|
|
break;
|
|
|
|
case 0x29:
|
|
if (--numParen === 0) {
|
|
this.nextChar();
|
|
done = true;
|
|
} else {
|
|
strBuf.push(")");
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x5c:
|
|
ch = this.nextChar();
|
|
|
|
switch (ch) {
|
|
case -1:
|
|
(0, _util.warn)("Unterminated string");
|
|
done = true;
|
|
break;
|
|
|
|
case 0x6e:
|
|
strBuf.push("\n");
|
|
break;
|
|
|
|
case 0x72:
|
|
strBuf.push("\r");
|
|
break;
|
|
|
|
case 0x74:
|
|
strBuf.push("\t");
|
|
break;
|
|
|
|
case 0x62:
|
|
strBuf.push("\b");
|
|
break;
|
|
|
|
case 0x66:
|
|
strBuf.push("\f");
|
|
break;
|
|
|
|
case 0x5c:
|
|
case 0x28:
|
|
case 0x29:
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
|
|
case 0x30:
|
|
case 0x31:
|
|
case 0x32:
|
|
case 0x33:
|
|
case 0x34:
|
|
case 0x35:
|
|
case 0x36:
|
|
case 0x37:
|
|
let x = ch & 0x0f;
|
|
ch = this.nextChar();
|
|
charBuffered = true;
|
|
|
|
if (ch >= 0x30 && ch <= 0x37) {
|
|
x = (x << 3) + (ch & 0x0f);
|
|
ch = this.nextChar();
|
|
|
|
if (ch >= 0x30 && ch <= 0x37) {
|
|
charBuffered = false;
|
|
x = (x << 3) + (ch & 0x0f);
|
|
}
|
|
}
|
|
|
|
strBuf.push(String.fromCharCode(x));
|
|
break;
|
|
|
|
case 0x0d:
|
|
if (this.peekChar() === 0x0a) {
|
|
this.nextChar();
|
|
}
|
|
|
|
break;
|
|
|
|
case 0x0a:
|
|
break;
|
|
|
|
default:
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
strBuf.push(String.fromCharCode(ch));
|
|
break;
|
|
}
|
|
|
|
if (done) {
|
|
break;
|
|
}
|
|
|
|
if (!charBuffered) {
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
|
|
return strBuf.join("");
|
|
}
|
|
|
|
getName() {
|
|
let ch, previousCh;
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
|
if (ch === 0x23) {
|
|
ch = this.nextChar();
|
|
|
|
if (specialChars[ch]) {
|
|
(0, _util.warn)("Lexer_getName: " + "NUMBER SIGN (#) should be followed by a hexadecimal number.");
|
|
strBuf.push("#");
|
|
break;
|
|
}
|
|
|
|
const x = toHexDigit(ch);
|
|
|
|
if (x !== -1) {
|
|
previousCh = ch;
|
|
ch = this.nextChar();
|
|
const x2 = toHexDigit(ch);
|
|
|
|
if (x2 === -1) {
|
|
(0, _util.warn)(`Lexer_getName: Illegal digit (${String.fromCharCode(ch)}) ` + "in hexadecimal number.");
|
|
strBuf.push("#", String.fromCharCode(previousCh));
|
|
|
|
if (specialChars[ch]) {
|
|
break;
|
|
}
|
|
|
|
strBuf.push(String.fromCharCode(ch));
|
|
continue;
|
|
}
|
|
|
|
strBuf.push(String.fromCharCode(x << 4 | x2));
|
|
} else {
|
|
strBuf.push("#", String.fromCharCode(ch));
|
|
}
|
|
} else {
|
|
strBuf.push(String.fromCharCode(ch));
|
|
}
|
|
}
|
|
|
|
if (strBuf.length > 127) {
|
|
(0, _util.warn)(`Name token is longer than allowed by the spec: ${strBuf.length}`);
|
|
}
|
|
|
|
return _primitives.Name.get(strBuf.join(""));
|
|
}
|
|
|
|
_hexStringWarn(ch) {
|
|
const MAX_HEX_STRING_NUM_WARN = 5;
|
|
|
|
if (this._hexStringNumWarn++ === MAX_HEX_STRING_NUM_WARN) {
|
|
(0, _util.warn)("getHexString - ignoring additional invalid characters.");
|
|
return;
|
|
}
|
|
|
|
if (this._hexStringNumWarn > MAX_HEX_STRING_NUM_WARN) {
|
|
return;
|
|
}
|
|
|
|
(0, _util.warn)(`getHexString - ignoring invalid character: ${ch}`);
|
|
}
|
|
|
|
getHexString() {
|
|
const strBuf = this.strBuf;
|
|
strBuf.length = 0;
|
|
let ch = this.currentChar;
|
|
let isFirstHex = true;
|
|
let firstDigit, secondDigit;
|
|
this._hexStringNumWarn = 0;
|
|
|
|
while (true) {
|
|
if (ch < 0) {
|
|
(0, _util.warn)("Unterminated hex string");
|
|
break;
|
|
} else if (ch === 0x3e) {
|
|
this.nextChar();
|
|
break;
|
|
} else if (specialChars[ch] === 1) {
|
|
ch = this.nextChar();
|
|
continue;
|
|
} else {
|
|
if (isFirstHex) {
|
|
firstDigit = toHexDigit(ch);
|
|
|
|
if (firstDigit === -1) {
|
|
this._hexStringWarn(ch);
|
|
|
|
ch = this.nextChar();
|
|
continue;
|
|
}
|
|
} else {
|
|
secondDigit = toHexDigit(ch);
|
|
|
|
if (secondDigit === -1) {
|
|
this._hexStringWarn(ch);
|
|
|
|
ch = this.nextChar();
|
|
continue;
|
|
}
|
|
|
|
strBuf.push(String.fromCharCode(firstDigit << 4 | secondDigit));
|
|
}
|
|
|
|
isFirstHex = !isFirstHex;
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
|
|
return strBuf.join("");
|
|
}
|
|
|
|
getObj() {
|
|
let comment = false;
|
|
let ch = this.currentChar;
|
|
|
|
while (true) {
|
|
if (ch < 0) {
|
|
return _primitives.EOF;
|
|
}
|
|
|
|
if (comment) {
|
|
if (ch === 0x0a || ch === 0x0d) {
|
|
comment = false;
|
|
}
|
|
} else if (ch === 0x25) {
|
|
comment = true;
|
|
} else if (specialChars[ch] !== 1) {
|
|
break;
|
|
}
|
|
|
|
ch = this.nextChar();
|
|
}
|
|
|
|
switch (ch | 0) {
|
|
case 0x30:
|
|
case 0x31:
|
|
case 0x32:
|
|
case 0x33:
|
|
case 0x34:
|
|
case 0x35:
|
|
case 0x36:
|
|
case 0x37:
|
|
case 0x38:
|
|
case 0x39:
|
|
case 0x2b:
|
|
case 0x2d:
|
|
case 0x2e:
|
|
return this.getNumber();
|
|
|
|
case 0x28:
|
|
return this.getString();
|
|
|
|
case 0x2f:
|
|
return this.getName();
|
|
|
|
case 0x5b:
|
|
this.nextChar();
|
|
return _primitives.Cmd.get("[");
|
|
|
|
case 0x5d:
|
|
this.nextChar();
|
|
return _primitives.Cmd.get("]");
|
|
|
|
case 0x3c:
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x3c) {
|
|
this.nextChar();
|
|
return _primitives.Cmd.get("<<");
|
|
}
|
|
|
|
return this.getHexString();
|
|
|
|
case 0x3e:
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x3e) {
|
|
this.nextChar();
|
|
return _primitives.Cmd.get(">>");
|
|
}
|
|
|
|
return _primitives.Cmd.get(">");
|
|
|
|
case 0x7b:
|
|
this.nextChar();
|
|
return _primitives.Cmd.get("{");
|
|
|
|
case 0x7d:
|
|
this.nextChar();
|
|
return _primitives.Cmd.get("}");
|
|
|
|
case 0x29:
|
|
this.nextChar();
|
|
throw new _util.FormatError(`Illegal character: ${ch}`);
|
|
}
|
|
|
|
let str = String.fromCharCode(ch);
|
|
|
|
if (ch < 0x20 || ch > 0x7f) {
|
|
const nextCh = this.peekChar();
|
|
|
|
if (nextCh >= 0x20 && nextCh <= 0x7f) {
|
|
this.nextChar();
|
|
return _primitives.Cmd.get(str);
|
|
}
|
|
}
|
|
|
|
const knownCommands = this.knownCommands;
|
|
let knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
|
|
|
while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
|
|
const possibleCommand = str + String.fromCharCode(ch);
|
|
|
|
if (knownCommandFound && knownCommands[possibleCommand] === undefined) {
|
|
break;
|
|
}
|
|
|
|
if (str.length === 128) {
|
|
throw new _util.FormatError(`Command token too long: ${str.length}`);
|
|
}
|
|
|
|
str = possibleCommand;
|
|
knownCommandFound = knownCommands && knownCommands[str] !== undefined;
|
|
}
|
|
|
|
if (str === "true") {
|
|
return true;
|
|
}
|
|
|
|
if (str === "false") {
|
|
return false;
|
|
}
|
|
|
|
if (str === "null") {
|
|
return null;
|
|
}
|
|
|
|
if (str === "BI") {
|
|
this.beginInlineImagePos = this.stream.pos;
|
|
}
|
|
|
|
return _primitives.Cmd.get(str);
|
|
}
|
|
|
|
peekObj() {
|
|
const streamPos = this.stream.pos,
|
|
currentChar = this.currentChar,
|
|
beginInlineImagePos = this.beginInlineImagePos;
|
|
let nextObj;
|
|
|
|
try {
|
|
nextObj = this.getObj();
|
|
} catch (ex) {
|
|
if (ex instanceof _core_utils.MissingDataException) {
|
|
throw ex;
|
|
}
|
|
|
|
(0, _util.warn)(`peekObj: ${ex}`);
|
|
}
|
|
|
|
this.stream.pos = streamPos;
|
|
this.currentChar = currentChar;
|
|
this.beginInlineImagePos = beginInlineImagePos;
|
|
return nextObj;
|
|
}
|
|
|
|
skipToNextLine() {
|
|
let ch = this.currentChar;
|
|
|
|
while (ch >= 0) {
|
|
if (ch === 0x0d) {
|
|
ch = this.nextChar();
|
|
|
|
if (ch === 0x0a) {
|
|
this.nextChar();
|
|
}
|
|
|
|
break;
|
|
} else if (ch === 0x0a) {
|
|
this.nextChar();
|
|
break;
|
|
}
|
|
|
|
ch = this.nextChar();
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
exports.Lexer = Lexer;
|
|
|
|
class Linearization {
|
|
static create(stream) {
|
|
function getInt(linDict, name, allowZeroValue = false) {
|
|
const obj = linDict.get(name);
|
|
|
|
if (Number.isInteger(obj) && (allowZeroValue ? obj >= 0 : obj > 0)) {
|
|
return obj;
|
|
}
|
|
|
|
throw new Error(`The "${name}" parameter in the linearization ` + "dictionary is invalid.");
|
|
}
|
|
|
|
function getHints(linDict) {
|
|
const hints = linDict.get("H");
|
|
let hintsLength;
|
|
|
|
if (Array.isArray(hints) && ((hintsLength = hints.length) === 2 || hintsLength === 4)) {
|
|
for (let index = 0; index < hintsLength; index++) {
|
|
const hint = hints[index];
|
|
|
|
if (!(Number.isInteger(hint) && hint > 0)) {
|
|
throw new Error(`Hint (${index}) in the linearization dictionary is invalid.`);
|
|
}
|
|
}
|
|
|
|
return hints;
|
|
}
|
|
|
|
throw new Error("Hint array in the linearization dictionary is invalid.");
|
|
}
|
|
|
|
const parser = new Parser({
|
|
lexer: new Lexer(stream),
|
|
xref: null
|
|
});
|
|
const obj1 = parser.getObj();
|
|
const obj2 = parser.getObj();
|
|
const obj3 = parser.getObj();
|
|
const linDict = parser.getObj();
|
|
let obj, length;
|
|
|
|
if (!(Number.isInteger(obj1) && Number.isInteger(obj2) && (0, _primitives.isCmd)(obj3, "obj") && linDict instanceof _primitives.Dict && typeof (obj = linDict.get("Linearized")) === "number" && obj > 0)) {
|
|
return null;
|
|
} else if ((length = getInt(linDict, "L")) !== stream.length) {
|
|
throw new Error('The "L" parameter in the linearization dictionary ' + "does not equal the stream length.");
|
|
}
|
|
|
|
return {
|
|
length,
|
|
hints: getHints(linDict),
|
|
objectNumberFirst: getInt(linDict, "O"),
|
|
endFirst: getInt(linDict, "E"),
|
|
numPages: getInt(linDict, "N"),
|
|
mainXRefEntriesOffset: getInt(linDict, "T"),
|
|
pageFirst: linDict.has("P") ? getInt(linDict, "P", true) : 0
|
|
};
|
|
}
|
|
|
|
}
|
|
|
|
exports.Linearization = Linearization; |