yanchengPowerSupply/node_modules/pdfjs-dist/lib/web/pdf_find_controller.js

830 lines
21 KiB
JavaScript

/**
* @licstart The following is the entire license notice for the
* JavaScript code in this page
*
* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @licend The above is the entire license notice for the
* JavaScript code in this page
*/
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.PDFFindController = exports.FindState = void 0;
var _ui_utils = require("./ui_utils.js");
var _pdf = require("../pdf");
var _pdf_find_utils = require("./pdf_find_utils.js");
const FindState = {
FOUND: 0,
NOT_FOUND: 1,
WRAPPED: 2,
PENDING: 3
};
exports.FindState = FindState;
const FIND_TIMEOUT = 250;
const MATCH_SCROLL_OFFSET_TOP = -50;
const MATCH_SCROLL_OFFSET_LEFT = -400;
const CHARACTERS_TO_NORMALIZE = {
"\u2010": "-",
"\u2018": "'",
"\u2019": "'",
"\u201A": "'",
"\u201B": "'",
"\u201C": '"',
"\u201D": '"',
"\u201E": '"',
"\u201F": '"',
"\u00BC": "1/4",
"\u00BD": "1/2",
"\u00BE": "3/4"
};
const DIACRITICS_EXCEPTION = new Set([0x3099, 0x309a, 0x094d, 0x09cd, 0x0a4d, 0x0acd, 0x0b4d, 0x0bcd, 0x0c4d, 0x0ccd, 0x0d3b, 0x0d3c, 0x0d4d, 0x0dca, 0x0e3a, 0x0eba, 0x0f84, 0x1039, 0x103a, 0x1714, 0x1734, 0x17d2, 0x1a60, 0x1b44, 0x1baa, 0x1bab, 0x1bf2, 0x1bf3, 0x2d7f, 0xa806, 0xa82c, 0xa8c4, 0xa953, 0xa9c0, 0xaaf6, 0xabed, 0x0c56, 0x0f71, 0x0f72, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f80, 0x0f74]);
const DIACRITICS_EXCEPTION_STR = [...DIACRITICS_EXCEPTION.values()].map(x => String.fromCharCode(x)).join("");
const DIACRITICS_REG_EXP = /\p{M}+/gu;
const SPECIAL_CHARS_REG_EXP = /([.*+?^${}()|[\]\\])|(\p{P})|(\s+)|(\p{M})|(\p{L})/gu;
const NOT_DIACRITIC_FROM_END_REG_EXP = /([^\p{M}])\p{M}*$/u;
const NOT_DIACRITIC_FROM_START_REG_EXP = /^\p{M}*([^\p{M}])/u;
const SYLLABLES_REG_EXP = /[\uAC00-\uD7AF\uFA6C\uFACF-\uFAD1\uFAD5-\uFAD7]+/g;
const SYLLABLES_LENGTHS = new Map();
const FIRST_CHAR_SYLLABLES_REG_EXP = "[\\u1100-\\u1112\\ud7a4-\\ud7af\\ud84a\\ud84c\\ud850\\ud854\\ud857\\ud85f]";
let noSyllablesRegExp = null;
let withSyllablesRegExp = null;
function normalize(text) {
const syllablePositions = [];
let m;
while ((m = SYLLABLES_REG_EXP.exec(text)) !== null) {
let {
index
} = m;
for (const char of m[0]) {
let len = SYLLABLES_LENGTHS.get(char);
if (!len) {
len = char.normalize("NFD").length;
SYLLABLES_LENGTHS.set(char, len);
}
syllablePositions.push([len, index++]);
}
}
let normalizationRegex;
if (syllablePositions.length === 0 && noSyllablesRegExp) {
normalizationRegex = noSyllablesRegExp;
} else if (syllablePositions.length > 0 && withSyllablesRegExp) {
normalizationRegex = withSyllablesRegExp;
} else {
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
const regexp = `([${replace}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\n)`;
if (syllablePositions.length === 0) {
normalizationRegex = noSyllablesRegExp = new RegExp(regexp + "|(\\u0000)", "gum");
} else {
normalizationRegex = withSyllablesRegExp = new RegExp(regexp + `|(${FIRST_CHAR_SYLLABLES_REG_EXP})`, "gum");
}
}
const rawDiacriticsPositions = [];
while ((m = DIACRITICS_REG_EXP.exec(text)) !== null) {
rawDiacriticsPositions.push([m[0].length, m.index]);
}
let normalized = text.normalize("NFD");
const positions = [[0, 0]];
let rawDiacriticsIndex = 0;
let syllableIndex = 0;
let shift = 0;
let shiftOrigin = 0;
let eol = 0;
let hasDiacritics = false;
normalized = normalized.replace(normalizationRegex, (match, p1, p2, p3, p4, p5, i) => {
i -= shiftOrigin;
if (p1) {
const replacement = CHARACTERS_TO_NORMALIZE[match];
const jj = replacement.length;
for (let j = 1; j < jj; j++) {
positions.push([i - shift + j, shift - j]);
}
shift -= jj - 1;
return replacement;
}
if (p2) {
const hasTrailingDashEOL = p2.endsWith("\n");
const len = hasTrailingDashEOL ? p2.length - 2 : p2.length;
hasDiacritics = true;
let jj = len;
if (i + eol === rawDiacriticsPositions[rawDiacriticsIndex]?.[1]) {
jj -= rawDiacriticsPositions[rawDiacriticsIndex][0];
++rawDiacriticsIndex;
}
for (let j = 1; j <= jj; j++) {
positions.push([i - 1 - shift + j, shift - j]);
}
shift -= jj;
shiftOrigin += jj;
if (hasTrailingDashEOL) {
i += len - 1;
positions.push([i - shift + 1, 1 + shift]);
shift += 1;
shiftOrigin += 1;
eol += 1;
return p2.slice(0, len);
}
return p2;
}
if (p3) {
positions.push([i - shift + 1, 1 + shift]);
shift += 1;
shiftOrigin += 1;
eol += 1;
return p3.charAt(0);
}
if (p4) {
positions.push([i - shift + 1, shift - 1]);
shift -= 1;
shiftOrigin += 1;
eol += 1;
return " ";
}
if (i + eol === syllablePositions[syllableIndex]?.[1]) {
const newCharLen = syllablePositions[syllableIndex][0] - 1;
++syllableIndex;
for (let j = 1; j <= newCharLen; j++) {
positions.push([i - (shift - j), shift - j]);
}
shift -= newCharLen;
shiftOrigin += newCharLen;
}
return p5;
});
positions.push([normalized.length, shift]);
return [normalized, positions, hasDiacritics];
}
function getOriginalIndex(diffs, pos, len) {
if (!diffs) {
return [pos, len];
}
const start = pos;
const end = pos + len;
let i = (0, _ui_utils.binarySearchFirstItem)(diffs, x => x[0] >= start);
if (diffs[i][0] > start) {
--i;
}
let j = (0, _ui_utils.binarySearchFirstItem)(diffs, x => x[0] >= end, i);
if (diffs[j][0] > end) {
--j;
}
return [start + diffs[i][1], len + diffs[j][1] - diffs[i][1]];
}
class PDFFindController {
constructor({
linkService,
eventBus
}) {
this._linkService = linkService;
this._eventBus = eventBus;
this.#reset();
eventBus._on("find", this.#onFind.bind(this));
eventBus._on("findbarclose", this.#onFindBarClose.bind(this));
}
get highlightMatches() {
return this._highlightMatches;
}
get pageMatches() {
return this._pageMatches;
}
get pageMatchesLength() {
return this._pageMatchesLength;
}
get selected() {
return this._selected;
}
get state() {
return this._state;
}
setDocument(pdfDocument) {
if (this._pdfDocument) {
this.#reset();
}
if (!pdfDocument) {
return;
}
this._pdfDocument = pdfDocument;
this._firstPageCapability.resolve();
}
#onFind(state) {
if (!state) {
return;
}
const pdfDocument = this._pdfDocument;
const {
type
} = state;
if (this._state === null || this.#shouldDirtyMatch(state)) {
this._dirtyMatch = true;
}
this._state = state;
if (type !== "highlightallchange") {
this.#updateUIState(FindState.PENDING);
}
this._firstPageCapability.promise.then(() => {
if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
return;
}
this.#extractText();
const findbarClosed = !this._highlightMatches;
const pendingTimeout = !!this._findTimeout;
if (this._findTimeout) {
clearTimeout(this._findTimeout);
this._findTimeout = null;
}
if (!type) {
this._findTimeout = setTimeout(() => {
this.#nextMatch();
this._findTimeout = null;
}, FIND_TIMEOUT);
} else if (this._dirtyMatch) {
this.#nextMatch();
} else if (type === "again") {
this.#nextMatch();
if (findbarClosed && this._state.highlightAll) {
this.#updateAllPages();
}
} else if (type === "highlightallchange") {
if (pendingTimeout) {
this.#nextMatch();
} else {
this._highlightMatches = true;
}
this.#updateAllPages();
} else {
this.#nextMatch();
}
});
}
scrollMatchIntoView({
element = null,
selectedLeft = 0,
pageIndex = -1,
matchIndex = -1
}) {
if (!this._scrollMatches || !element) {
return;
} else if (matchIndex === -1 || matchIndex !== this._selected.matchIdx) {
return;
} else if (pageIndex === -1 || pageIndex !== this._selected.pageIdx) {
return;
}
this._scrollMatches = false;
const spot = {
top: MATCH_SCROLL_OFFSET_TOP,
left: selectedLeft + MATCH_SCROLL_OFFSET_LEFT
};
(0, _ui_utils.scrollIntoView)(element, spot, true);
}
#reset() {
this._highlightMatches = false;
this._scrollMatches = false;
this._pdfDocument = null;
this._pageMatches = [];
this._pageMatchesLength = [];
this._state = null;
this._selected = {
pageIdx: -1,
matchIdx: -1
};
this._offset = {
pageIdx: null,
matchIdx: null,
wrapped: false
};
this._extractTextPromises = [];
this._pageContents = [];
this._pageDiffs = [];
this._hasDiacritics = [];
this._matchesCountTotal = 0;
this._pagesToSearch = null;
this._pendingFindMatches = new Set();
this._resumePageIdx = null;
this._dirtyMatch = false;
clearTimeout(this._findTimeout);
this._findTimeout = null;
this._firstPageCapability = (0, _pdf.createPromiseCapability)();
}
get #query() {
if (this._state.query !== this._rawQuery) {
this._rawQuery = this._state.query;
[this._normalizedQuery] = normalize(this._state.query);
}
return this._normalizedQuery;
}
#shouldDirtyMatch(state) {
if (state.query !== this._state.query) {
return true;
}
switch (state.type) {
case "again":
const pageNumber = this._selected.pageIdx + 1;
const linkService = this._linkService;
if (pageNumber >= 1 && pageNumber <= linkService.pagesCount && pageNumber !== linkService.page && !linkService.isPageVisible(pageNumber)) {
return true;
}
return false;
case "highlightallchange":
return false;
}
return true;
}
#isEntireWord(content, startIdx, length) {
let match = content.slice(0, startIdx).match(NOT_DIACRITIC_FROM_END_REG_EXP);
if (match) {
const first = content.charCodeAt(startIdx);
const limit = match[1].charCodeAt(0);
if ((0, _pdf_find_utils.getCharacterType)(first) === (0, _pdf_find_utils.getCharacterType)(limit)) {
return false;
}
}
match = content.slice(startIdx + length).match(NOT_DIACRITIC_FROM_START_REG_EXP);
if (match) {
const last = content.charCodeAt(startIdx + length - 1);
const limit = match[1].charCodeAt(0);
if ((0, _pdf_find_utils.getCharacterType)(last) === (0, _pdf_find_utils.getCharacterType)(limit)) {
return false;
}
}
return true;
}
#calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
const matches = [],
matchesLength = [];
const diffs = this._pageDiffs[pageIndex];
let match;
while ((match = query.exec(pageContent)) !== null) {
if (entireWord && !this.#isEntireWord(pageContent, match.index, match[0].length)) {
continue;
}
const [matchPos, matchLen] = getOriginalIndex(diffs, match.index, match[0].length);
if (matchLen) {
matches.push(matchPos);
matchesLength.push(matchLen);
}
}
this._pageMatches[pageIndex] = matches;
this._pageMatchesLength[pageIndex] = matchesLength;
}
#convertToRegExpString(query, hasDiacritics) {
const {
matchDiacritics
} = this._state;
let isUnicode = false;
query = query.replace(SPECIAL_CHARS_REG_EXP, (match, p1, p2, p3, p4, p5) => {
if (p1) {
return `[ ]*\\${p1}[ ]*`;
}
if (p2) {
return `[ ]*${p2}[ ]*`;
}
if (p3) {
return "[ ]+";
}
if (matchDiacritics) {
return p4 || p5;
}
if (p4) {
return DIACRITICS_EXCEPTION.has(p4.charCodeAt(0)) ? p4 : "";
}
if (hasDiacritics) {
isUnicode = true;
return `${p5}\\p{M}*`;
}
return p5;
});
const trailingSpaces = "[ ]*";
if (query.endsWith(trailingSpaces)) {
query = query.slice(0, query.length - trailingSpaces.length);
}
if (matchDiacritics) {
if (hasDiacritics) {
isUnicode = true;
query = `${query}(?=[${DIACRITICS_EXCEPTION_STR}]|[^\\p{M}]|$)`;
}
}
return [isUnicode, query];
}
#calculateMatch(pageIndex) {
let query = this.#query;
if (query.length === 0) {
return;
}
const {
caseSensitive,
entireWord,
phraseSearch
} = this._state;
const pageContent = this._pageContents[pageIndex];
const hasDiacritics = this._hasDiacritics[pageIndex];
let isUnicode = false;
if (phraseSearch) {
[isUnicode, query] = this.#convertToRegExpString(query, hasDiacritics);
} else {
const match = query.match(/\S+/g);
if (match) {
query = match.sort().reverse().map(q => {
const [isUnicodePart, queryPart] = this.#convertToRegExpString(q, hasDiacritics);
isUnicode ||= isUnicodePart;
return `(${queryPart})`;
}).join("|");
}
}
const flags = `g${isUnicode ? "u" : ""}${caseSensitive ? "" : "i"}`;
query = new RegExp(query, flags);
this.#calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
if (this._state.highlightAll) {
this.#updatePage(pageIndex);
}
if (this._resumePageIdx === pageIndex) {
this._resumePageIdx = null;
this.#nextPageMatch();
}
const pageMatchesCount = this._pageMatches[pageIndex].length;
if (pageMatchesCount > 0) {
this._matchesCountTotal += pageMatchesCount;
this.#updateUIResultsCount();
}
}
#extractText() {
if (this._extractTextPromises.length > 0) {
return;
}
let promise = Promise.resolve();
for (let i = 0, ii = this._linkService.pagesCount; i < ii; i++) {
const extractTextCapability = (0, _pdf.createPromiseCapability)();
this._extractTextPromises[i] = extractTextCapability.promise;
promise = promise.then(() => {
return this._pdfDocument.getPage(i + 1).then(pdfPage => {
return pdfPage.getTextContent();
}).then(textContent => {
const strBuf = [];
for (const textItem of textContent.items) {
strBuf.push(textItem.str);
if (textItem.hasEOL) {
strBuf.push("\n");
}
}
[this._pageContents[i], this._pageDiffs[i], this._hasDiacritics[i]] = normalize(strBuf.join(""));
extractTextCapability.resolve();
}, reason => {
console.error(`Unable to get text content for page ${i + 1}`, reason);
this._pageContents[i] = "";
this._pageDiffs[i] = null;
this._hasDiacritics[i] = false;
extractTextCapability.resolve();
});
});
}
}
#updatePage(index) {
if (this._scrollMatches && this._selected.pageIdx === index) {
this._linkService.page = index + 1;
}
this._eventBus.dispatch("updatetextlayermatches", {
source: this,
pageIndex: index
});
}
#updateAllPages() {
this._eventBus.dispatch("updatetextlayermatches", {
source: this,
pageIndex: -1
});
}
#nextMatch() {
const previous = this._state.findPrevious;
const currentPageIndex = this._linkService.page - 1;
const numPages = this._linkService.pagesCount;
this._highlightMatches = true;
if (this._dirtyMatch) {
this._dirtyMatch = false;
this._selected.pageIdx = this._selected.matchIdx = -1;
this._offset.pageIdx = currentPageIndex;
this._offset.matchIdx = null;
this._offset.wrapped = false;
this._resumePageIdx = null;
this._pageMatches.length = 0;
this._pageMatchesLength.length = 0;
this._matchesCountTotal = 0;
this.#updateAllPages();
for (let i = 0; i < numPages; i++) {
if (this._pendingFindMatches.has(i)) {
continue;
}
this._pendingFindMatches.add(i);
this._extractTextPromises[i].then(() => {
this._pendingFindMatches.delete(i);
this.#calculateMatch(i);
});
}
}
if (this.#query === "") {
this.#updateUIState(FindState.FOUND);
return;
}
if (this._resumePageIdx) {
return;
}
const offset = this._offset;
this._pagesToSearch = numPages;
if (offset.matchIdx !== null) {
const numPageMatches = this._pageMatches[offset.pageIdx].length;
if (!previous && offset.matchIdx + 1 < numPageMatches || previous && offset.matchIdx > 0) {
offset.matchIdx = previous ? offset.matchIdx - 1 : offset.matchIdx + 1;
this.#updateMatch(true);
return;
}
this.#advanceOffsetPage(previous);
}
this.#nextPageMatch();
}
#matchesReady(matches) {
const offset = this._offset;
const numMatches = matches.length;
const previous = this._state.findPrevious;
if (numMatches) {
offset.matchIdx = previous ? numMatches - 1 : 0;
this.#updateMatch(true);
return true;
}
this.#advanceOffsetPage(previous);
if (offset.wrapped) {
offset.matchIdx = null;
if (this._pagesToSearch < 0) {
this.#updateMatch(false);
return true;
}
}
return false;
}
#nextPageMatch() {
if (this._resumePageIdx !== null) {
console.error("There can only be one pending page.");
}
let matches = null;
do {
const pageIdx = this._offset.pageIdx;
matches = this._pageMatches[pageIdx];
if (!matches) {
this._resumePageIdx = pageIdx;
break;
}
} while (!this.#matchesReady(matches));
}
#advanceOffsetPage(previous) {
const offset = this._offset;
const numPages = this._linkService.pagesCount;
offset.pageIdx = previous ? offset.pageIdx - 1 : offset.pageIdx + 1;
offset.matchIdx = null;
this._pagesToSearch--;
if (offset.pageIdx >= numPages || offset.pageIdx < 0) {
offset.pageIdx = previous ? numPages - 1 : 0;
offset.wrapped = true;
}
}
#updateMatch(found = false) {
let state = FindState.NOT_FOUND;
const wrapped = this._offset.wrapped;
this._offset.wrapped = false;
if (found) {
const previousPage = this._selected.pageIdx;
this._selected.pageIdx = this._offset.pageIdx;
this._selected.matchIdx = this._offset.matchIdx;
state = wrapped ? FindState.WRAPPED : FindState.FOUND;
if (previousPage !== -1 && previousPage !== this._selected.pageIdx) {
this.#updatePage(previousPage);
}
}
this.#updateUIState(state, this._state.findPrevious);
if (this._selected.pageIdx !== -1) {
this._scrollMatches = true;
this.#updatePage(this._selected.pageIdx);
}
}
#onFindBarClose(evt) {
const pdfDocument = this._pdfDocument;
this._firstPageCapability.promise.then(() => {
if (!this._pdfDocument || pdfDocument && this._pdfDocument !== pdfDocument) {
return;
}
if (this._findTimeout) {
clearTimeout(this._findTimeout);
this._findTimeout = null;
}
if (this._resumePageIdx) {
this._resumePageIdx = null;
this._dirtyMatch = true;
}
this.#updateUIState(FindState.FOUND);
this._highlightMatches = false;
this.#updateAllPages();
});
}
#requestMatchesCount() {
const {
pageIdx,
matchIdx
} = this._selected;
let current = 0,
total = this._matchesCountTotal;
if (matchIdx !== -1) {
for (let i = 0; i < pageIdx; i++) {
current += this._pageMatches[i]?.length || 0;
}
current += matchIdx + 1;
}
if (current < 1 || current > total) {
current = total = 0;
}
return {
current,
total
};
}
#updateUIResultsCount() {
this._eventBus.dispatch("updatefindmatchescount", {
source: this,
matchesCount: this.#requestMatchesCount()
});
}
#updateUIState(state, previous = false) {
this._eventBus.dispatch("updatefindcontrolstate", {
source: this,
state,
previous,
matchesCount: this.#requestMatchesCount(),
rawQuery: this._state?.query ?? null
});
}
}
exports.PDFFindController = PDFFindController;