package jp.favorite.pdf.reader.fumiko.pdfbox.util;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Vector;
import java.util.regex.Pattern;
import jp.favorite.pdf.reader.fumiko.pdfbox.cos.COSStream;
import jp.favorite.pdf.reader.fumiko.pdfbox.exceptions.CryptographyException;
import jp.favorite.pdf.reader.fumiko.pdfbox.exceptions.InvalidPasswordException;
import jp.favorite.pdf.reader.fumiko.pdfbox.exceptions.WrappedIOException;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.PDDocument;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.PDPage;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.common.COSObjectable;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.common.PDRectangle;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.common.PDStream;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import jp.favorite.pdf.reader.fumiko.pdfbox.pdmodel.interactive.pagenavigation.PDThreadBead;
import jp.favorite.pdf.reader.fumiko.task.parser.IProcessPdf;
import jp.favorite.pdf.reader.fumiko.task.parser.TextData;

/* loaded from: classes.dex */
public class PDFTextStripper extends PDFStreamEngine {
    private static float DEFAULT_DROP_THRESHOLD = 0.0f;
    private static float DEFAULT_INDENT_THRESHOLD = 0.0f;
    private static final float ENDOFLASTTEXTX_RESET_VALUE = -1.0f;
    private static final float EXPECTEDSTARTOFNEXTWORDX_RESET_VALUE = -3.4028235E38f;
    private static final float LASTWORDSPACING_RESET_VALUE = -1.0f;
    private static final String[] LIST_ITEM_EXPRESSIONS;
    private static final float MAXHEIGHTFORLINE_RESET_VALUE = -1.0f;
    private static final float MAXYFORLINE_RESET_VALUE = -3.4028235E38f;
    private static final float MINYTOPFORLINE_RESET_VALUE = Float.MAX_VALUE;
    private static final String thisClassName = PDFTextStripper.class.getSimpleName().toLowerCase();
    private boolean addMoreFormatting;
    private String articleEnd;
    private String articleStart;
    private float averageCharTolerance;
    private Map<String, TreeMap<Float, TreeSet<Float>>> characterListMapping;
    protected Vector<List<TextPosition>> charactersByArticle;
    private int currentPageNo;
    protected PDDocument document;
    private float dropThreshold;
    private PDOutlineItem endBookmark;
    private int endBookmarkPageNumber;
    private int endPage;
    private float indentThreshold;
    private List<Pattern> liPatterns;
    private String lineSeparator;
    private IProcessPdf mIProcessPdf;
    protected Writer output;
    protected String outputEncoding;
    private List<PDThreadBead> pageArticles;
    private String pageEnd;
    private String pageSeparator;
    private String pageStart;
    private String paragraphEnd;
    private String paragraphStart;
    private boolean shouldSeparateByBeads;
    private boolean sortByPosition;
    private float spacingTolerance;
    private PDOutlineItem startBookmark;
    private int startBookmarkPageNumber;
    private int startPage;
    private boolean suppressDuplicateOverlappingText;
    protected final String systemLineSeparator;
    private String wordSeparator;

    /* loaded from: classes.dex */
    private static final class WordSeparator extends TextPosition {
        private static final WordSeparator separator = new WordSeparator();

        private WordSeparator() {
        }

        public static final WordSeparator getSeparator() {
            return separator;
        }
    }

    static {
        DEFAULT_INDENT_THRESHOLD = 2.0f;
        DEFAULT_DROP_THRESHOLD = 2.5f;
        String property = System.getProperty(String.valueOf(thisClassName) + ".indent");
        if (property != null && property.length() > 0) {
            try {
                DEFAULT_INDENT_THRESHOLD = Float.parseFloat(property);
            } catch (NumberFormatException e) {
            }
        }
        String property2 = System.getProperty(String.valueOf(thisClassName) + ".drop");
        if (property2 != null && property2.length() > 0) {
            try {
                DEFAULT_DROP_THRESHOLD = Float.parseFloat(property2);
            } catch (NumberFormatException e2) {
            }
        }
        LIST_ITEM_EXPRESSIONS = new String[]{"\\.", "\\d+\\.", "\\[\\d+\\]", "\\d+\\)", "[A-Z]\\.", "[a-z]\\.", "[A-Z]\\)", "[a-z]\\)", "[IVXL]+\\.", "[ivxl]+\\."};
    }

    public PDFTextStripper(Properties properties) throws IOException {
        super(properties);
        this.systemLineSeparator = System.getProperty("line.separator");
        this.lineSeparator = this.systemLineSeparator;
        this.pageSeparator = this.systemLineSeparator;
        this.wordSeparator = " ";
        this.paragraphStart = "";
        this.paragraphEnd = "";
        this.pageStart = "";
        this.pageEnd = this.pageSeparator;
        this.articleStart = "";
        this.articleEnd = "";
        this.currentPageNo = 0;
        this.startPage = 1;
        this.endPage = Integer.MAX_VALUE;
        this.startBookmark = null;
        this.startBookmarkPageNumber = -1;
        this.endBookmark = null;
        this.endBookmarkPageNumber = -1;
        this.suppressDuplicateOverlappingText = true;
        this.shouldSeparateByBeads = true;
        this.sortByPosition = false;
        this.addMoreFormatting = false;
        this.indentThreshold = DEFAULT_INDENT_THRESHOLD;
        this.dropThreshold = DEFAULT_DROP_THRESHOLD;
        this.spacingTolerance = 0.5f;
        this.averageCharTolerance = 0.3f;
        this.pageArticles = null;
        this.charactersByArticle = new Vector<>();
        this.characterListMapping = new HashMap();
        this.liPatterns = null;
        this.outputEncoding = null;
    }

    private int getPageNumber(PDOutlineItem pDOutlineItem, List<COSObjectable> list) throws IOException {
        PDPage findDestinationPage = pDOutlineItem.findDestinationPage(this.document);
        if (findDestinationPage != null) {
            return list.indexOf(findDestinationPage) + 1;
        }
        return -1;
    }

    protected static final Pattern matchPattern(String str, List<Pattern> list) {
        for (Pattern pattern : list) {
            if (pattern.matcher(str).matches()) {
                return pattern;
            }
        }
        return null;
    }

    private List<String> normalize(List<TextPosition> list, boolean z, boolean z2) {
        LinkedList linkedList = new LinkedList();
        StringBuilder sb = new StringBuilder();
        for (TextPosition textPosition : list) {
            if (textPosition instanceof WordSeparator) {
                linkedList.add(sb.toString());
                sb = new StringBuilder();
            } else {
                sb.append(textPosition.getCharacter());
            }
        }
        if (sb.length() > 0) {
            linkedList.add(sb.toString());
        }
        return linkedList;
    }

    private boolean overlap(float f, float f2, float f3, float f4) {
        return within(f, f3, 0.1f) || (f3 <= f && f3 >= f - f2) || (f <= f3 && f >= f3 - f4);
    }

    private boolean within(float f, float f2, float f3) {
        return f2 < f + f3 && f2 > f - f3;
    }

    private void writeLine(List<String> list, boolean z) throws IOException {
        int size = list.size();
        if (z) {
            for (int i = size - 1; i >= 0; i--) {
                if (i < size - 1) {
                    writeWordSeparator();
                }
                writeString(list.get(i));
            }
            return;
        }
        for (int i2 = 0; i2 < size; i2++) {
            writeString(list.get(i2));
            if (!z && i2 < size - 1) {
                writeWordSeparator();
            }
        }
    }

    protected void endArticle() throws IOException {
        this.output.write(getArticleEnd());
    }

    protected void endDocument(PDDocument pDDocument) throws IOException {
    }

    protected void endPage(PDPage pDPage) throws IOException {
    }

    public boolean getAddMoreFormatting() {
        return this.addMoreFormatting;
    }

    public String getArticleEnd() {
        return this.articleEnd;
    }

    public String getArticleStart() {
        return this.articleStart;
    }

    public float getAverageCharTolerance() {
        return this.averageCharTolerance;
    }

    protected Vector<List<TextPosition>> getCharactersByArticle() {
        return this.charactersByArticle;
    }

    protected int getCurrentPageNo() {
        return this.currentPageNo;
    }

    public float getDropThreshold() {
        return this.dropThreshold;
    }

    public PDOutlineItem getEndBookmark() {
        return this.endBookmark;
    }

    public int getEndPage() {
        return this.endPage;
    }

    public float getIndentThreshold() {
        return this.indentThreshold;
    }

    public String getLineSeparator() {
        return this.lineSeparator;
    }

    protected List<Pattern> getListItemPatterns() {
        if (this.liPatterns == null) {
            this.liPatterns = new ArrayList();
            for (String str : LIST_ITEM_EXPRESSIONS) {
                this.liPatterns.add(Pattern.compile(str));
            }
        }
        return this.liPatterns;
    }

    protected Writer getOutput() {
        return this.output;
    }

    public String getPageEnd() {
        return this.pageEnd;
    }

    public String getPageSeparator() {
        return this.pageSeparator;
    }

    public String getPageStart() {
        return this.pageStart;
    }

    public String getParagraphEnd() {
        return this.paragraphEnd;
    }

    public String getParagraphStart() {
        return this.paragraphStart;
    }

    public boolean getSeparateByBeads() {
        return this.shouldSeparateByBeads;
    }

    public boolean getSortByPosition() {
        return this.sortByPosition;
    }

    public float getSpacingTolerance() {
        return this.spacingTolerance;
    }

    public PDOutlineItem getStartBookmark() {
        return this.startBookmark;
    }

    public int getStartPage() {
        return this.startPage;
    }

    public boolean getSuppressDuplicateOverlappingText() {
        return this.suppressDuplicateOverlappingText;
    }

    public String getText(PDDocument pDDocument, IProcessPdf iProcessPdf) throws IOException {
        this.mIProcessPdf = iProcessPdf;
        StringWriter stringWriter = new StringWriter();
        writeText(pDDocument, stringWriter);
        return stringWriter.toString();
    }

    public String getWordSeparator() {
        return this.wordSeparator;
    }

    protected PositionWrapper handleLineSeparation(PositionWrapper positionWrapper, PositionWrapper positionWrapper2, PositionWrapper positionWrapper3, float f) throws IOException {
        positionWrapper.setLineStart();
        isParagraphSeparation(positionWrapper, positionWrapper2, positionWrapper3, f);
        if (!positionWrapper.isParagraphStart()) {
            writeLineSeparator();
        } else if (positionWrapper2.isArticleStart()) {
            writeParagraphStart();
        } else {
            writeLineSeparator();
            writeParagraphSeparator();
        }
        return positionWrapper;
    }

    @Override // jp.favorite.pdf.reader.fumiko.pdfbox.util.PDFStreamEngine
    public String inspectFontEncoding(String str) {
        if (!this.sortByPosition || str == null || str.length() < 2) {
            return str;
        }
        for (int i = 0; i < str.length(); i++) {
            if (Character.getDirectionality(str.charAt(i)) != 2) {
                return str;
            }
        }
        StringBuilder sb = new StringBuilder(str.length());
        for (int length = str.length() - 1; length >= 0; length--) {
            sb.append(str.charAt(length));
        }
        return sb.toString();
    }

    protected void isParagraphSeparation(PositionWrapper positionWrapper, PositionWrapper positionWrapper2, PositionWrapper positionWrapper3, float f) {
        Pattern matchListItemPattern;
        boolean z = false;
        if (positionWrapper3 == null) {
            z = true;
        } else {
            float abs = Math.abs(positionWrapper.getTextPosition().getYDirAdj() - positionWrapper2.getTextPosition().getYDirAdj());
            float xDirAdj = positionWrapper.getTextPosition().getXDirAdj() - positionWrapper3.getTextPosition().getXDirAdj();
            if (abs > getDropThreshold() * f) {
                z = true;
            } else if (xDirAdj > getIndentThreshold() * positionWrapper.getTextPosition().getWidthOfSpace()) {
                if (positionWrapper3.isParagraphStart()) {
                    positionWrapper.setHangingIndent();
                } else {
                    z = true;
                }
            } else if (xDirAdj < (-positionWrapper.getTextPosition().getWidthOfSpace())) {
                if (!positionWrapper3.isParagraphStart()) {
                    z = true;
                }
            } else if (Math.abs(xDirAdj) < 0.25d * positionWrapper.getTextPosition().getWidth()) {
                if (positionWrapper3.isHangingIndent()) {
                    positionWrapper.setHangingIndent();
                } else if (positionWrapper3.isParagraphStart() && (matchListItemPattern = matchListItemPattern(positionWrapper3)) != null && matchListItemPattern == matchListItemPattern(positionWrapper)) {
                    z = true;
                }
            }
        }
        if (z) {
            positionWrapper.setParagraphStart();
        }
    }

    protected Pattern matchListItemPattern(PositionWrapper positionWrapper) {
        return matchPattern(positionWrapper.getTextPosition().getCharacter(), getListItemPatterns());
    }

    protected void processPage(PDPage pDPage, COSStream cOSStream) throws IOException {
        if (this.currentPageNo < this.startPage || this.currentPageNo > this.endPage) {
            return;
        }
        if (this.startBookmarkPageNumber == -1 || this.currentPageNo >= this.startBookmarkPageNumber) {
            if (this.endBookmarkPageNumber == -1 || this.currentPageNo <= this.endBookmarkPageNumber) {
                startPage(pDPage);
                this.pageArticles = pDPage.getThreadBeads();
                int size = (this.pageArticles.size() * 2) + 1;
                if (!this.shouldSeparateByBeads) {
                    size = 1;
                }
                int size2 = this.charactersByArticle.size();
                this.charactersByArticle.setSize(size);
                for (int i = 0; i < size; i++) {
                    if (size < size2) {
                        this.charactersByArticle.get(i).clear();
                    } else {
                        this.charactersByArticle.set(i, new ArrayList());
                    }
                }
                this.characterListMapping.clear();
                processStream(pDPage, pDPage.findResources(), cOSStream);
                writePage(this.currentPageNo);
                endPage(pDPage);
            }
        }
    }

    protected void processPages(List<COSObjectable> list) throws IOException {
        if (this.startBookmark != null) {
            this.startBookmarkPageNumber = getPageNumber(this.startBookmark, list);
        }
        if (this.endBookmark != null) {
            this.endBookmarkPageNumber = getPageNumber(this.endBookmark, list);
        }
        if (this.startBookmarkPageNumber == -1 && this.startBookmark != null && this.endBookmarkPageNumber == -1 && this.endBookmark != null && this.startBookmark.getCOSObject() == this.endBookmark.getCOSObject()) {
            this.startBookmarkPageNumber = 0;
            this.endBookmarkPageNumber = 0;
        }
        this.mIProcessPdf.onFindTotalPages(list.size());
        Iterator<COSObjectable> it = list.iterator();
        while (it.hasNext()) {
            PDPage pDPage = (PDPage) it.next();
            PDStream contents = pDPage.getContents();
            this.currentPageNo++;
            if (contents != null && this.mIProcessPdf.isNeedExtract(this.currentPageNo)) {
                processPage(pDPage, contents.getStream());
            }
            if (this.mIProcessPdf.isCanceled()) {
                return;
            }
        }
    }

    @Override // jp.favorite.pdf.reader.fumiko.pdfbox.util.PDFStreamEngine
    protected void processTextPosition(TextPosition textPosition) {
        boolean z = true;
        if (this.suppressDuplicateOverlappingText) {
            z = false;
            String character = textPosition.getCharacter();
            float x = textPosition.getX();
            float y = textPosition.getY();
            TreeMap<Float, TreeSet<Float>> treeMap = this.characterListMapping.get(character);
            if (treeMap == null) {
                treeMap = new TreeMap<>();
                this.characterListMapping.put(character, treeMap);
            }
            boolean z2 = false;
            float width = (textPosition.getWidth() / character.length()) / 3.0f;
            Iterator<TreeSet<Float>> it = treeMap.subMap(Float.valueOf(x - width), Float.valueOf(x + width)).values().iterator();
            while (true) {
                if (it.hasNext()) {
                    if (!it.next().subSet(Float.valueOf(y - width), Float.valueOf(y + width)).isEmpty()) {
                        z2 = true;
                        break;
                    }
                } else {
                    break;
                }
            }
            if (!z2) {
                TreeSet<Float> treeSet = treeMap.get(Float.valueOf(x));
                if (treeSet == null) {
                    treeSet = new TreeSet<>();
                    treeMap.put(Float.valueOf(x), treeSet);
                }
                treeSet.add(Float.valueOf(y));
                z = true;
            }
        }
        if (z) {
            int i = -1;
            int i2 = -1;
            int i3 = -1;
            int i4 = -1;
            float x2 = textPosition.getX();
            float y2 = textPosition.getY();
            if (this.shouldSeparateByBeads) {
                for (int i5 = 0; i5 < this.pageArticles.size() && i == -1; i5++) {
                    PDThreadBead pDThreadBead = this.pageArticles.get(i5);
                    if (pDThreadBead != null) {
                        PDRectangle rectangle = pDThreadBead.getRectangle();
                        if (rectangle.contains(x2, y2)) {
                            i = (i5 * 2) + 1;
                        } else if ((x2 < rectangle.getLowerLeftX() || y2 < rectangle.getUpperRightY()) && i2 == -1) {
                            i2 = i5 * 2;
                        } else if (x2 < rectangle.getLowerLeftX() && i3 == -1) {
                            i3 = i5 * 2;
                        } else if (y2 < rectangle.getUpperRightY() && i4 == -1) {
                            i4 = i5 * 2;
                        }
                    } else {
                        i = 0;
                    }
                }
            } else {
                i = 0;
            }
            List<TextPosition> list = this.charactersByArticle.get(i != -1 ? i : i2 != -1 ? i2 : i3 != -1 ? i3 : i4 != -1 ? i4 : this.charactersByArticle.size() - 1);
            if (list.isEmpty()) {
                list.add(textPosition);
                return;
            }
            TextPosition textPosition2 = list.get(list.size() - 1);
            if (textPosition.isDiacritic() && textPosition2.contains(textPosition)) {
                return;
            }
            if (textPosition2.isDiacritic() && textPosition.contains(textPosition2)) {
                list.remove(list.size() - 1);
                list.add(textPosition);
            } else {
                list.add(textPosition);
            }
        }
    }

    @Override // jp.favorite.pdf.reader.fumiko.pdfbox.util.PDFStreamEngine
    public void resetEngine() {
        super.resetEngine();
        this.currentPageNo = 0;
    }

    public void setAddMoreFormatting(boolean z) {
        this.addMoreFormatting = z;
    }

    public void setArticleEnd(String str) {
        this.articleEnd = str;
    }

    public void setArticleStart(String str) {
        this.articleStart = str;
    }

    public void setAverageCharTolerance(float f) {
        this.averageCharTolerance = f;
    }

    public void setDropThreshold(float f) {
        this.dropThreshold = f;
    }

    public void setEndBookmark(PDOutlineItem pDOutlineItem) {
        this.endBookmark = pDOutlineItem;
    }

    public void setEndPage(int i) {
        this.endPage = i;
    }

    public void setIndentThreshold(float f) {
        this.indentThreshold = f;
    }

    public void setLineSeparator(String str) {
        this.lineSeparator = str;
    }

    protected void setListItemPatterns(List<Pattern> list) {
        this.liPatterns = list;
    }

    public void setPageEnd(String str) {
        this.pageEnd = str;
    }

    public void setPageSeparator(String str) {
        this.pageSeparator = str;
    }

    public void setPageStart(String str) {
        this.pageStart = str;
    }

    public void setParagraphEnd(String str) {
        this.paragraphEnd = str;
    }

    public void setParagraphStart(String str) {
        this.paragraphStart = str;
    }

    public void setShouldSeparateByBeads(boolean z) {
        this.shouldSeparateByBeads = z;
    }

    public void setSortByPosition(boolean z) {
        this.sortByPosition = z;
    }

    public void setSpacingTolerance(float f) {
        this.spacingTolerance = f;
    }

    public void setStartBookmark(PDOutlineItem pDOutlineItem) {
        this.startBookmark = pDOutlineItem;
    }

    public void setStartPage(int i) {
        this.startPage = i;
    }

    public void setSuppressDuplicateOverlappingText(boolean z) {
        this.suppressDuplicateOverlappingText = z;
    }

    public void setWordSeparator(String str) {
        this.wordSeparator = str;
    }

    protected void startArticle() throws IOException {
        startArticle(true);
    }

    protected void startArticle(boolean z) throws IOException {
        this.output.write(getArticleStart());
    }

    protected void startDocument(PDDocument pDDocument) throws IOException {
    }

    protected void startPage(PDPage pDPage) throws IOException {
    }

    protected void writeCharacters(TextPosition textPosition) throws IOException {
        this.output.write(textPosition.getCharacter());
    }

    protected void writeLineSeparator() throws IOException {
        this.output.write(getLineSeparator());
    }

    protected void writePage(int i) throws IOException {
        if (this.charactersByArticle.size() > 0) {
            writePageStart();
        }
        ArrayList<TextData> arrayList = new ArrayList<>();
        float f = 0.0f;
        StringBuilder sb = new StringBuilder(1024);
        for (int i2 = 0; i2 < this.charactersByArticle.size(); i2++) {
            List<TextPosition> list = this.charactersByArticle.get(i2);
            if (getSortByPosition()) {
                Collections.sort(list, new TextPositionComparator());
            }
            int i3 = 0;
            int i4 = 0;
            for (TextPosition textPosition : list) {
                String character = textPosition.getCharacter();
                float fontSizeInPt = textPosition.getFontSizeInPt();
                if (f != fontSizeInPt) {
                    if (sb.length() > 0) {
                        arrayList.add(new TextData(sb.toString(), f));
                    }
                    sb.setLength(0);
                    f = fontSizeInPt;
                }
                sb.append(character);
                for (int i5 = 0; i5 < character.length(); i5++) {
                    byte directionality = Character.getDirectionality(character.charAt(i5));
                    if (directionality == 0 || directionality == 14 || directionality == 15) {
                        i3++;
                    } else if (directionality == 1 || directionality == 2 || directionality == 16 || directionality == 17) {
                        i4++;
                    }
                }
            }
            endArticle();
        }
        if (sb.length() > 0) {
            arrayList.add(new TextData(sb.toString(), f));
        }
        this.mIProcessPdf.onExtractPage(i, arrayList);
        writePageEnd();
    }

    protected void writePageEnd() throws IOException {
        this.output.write(getPageEnd());
    }

    protected void writePageSeperator() throws IOException {
        this.output.write(getPageSeparator());
        this.output.flush();
    }

    protected void writePageStart() throws IOException {
        this.output.write(getPageStart());
    }

    protected void writeParagraphEnd() throws IOException {
        this.output.write(getParagraphEnd());
    }

    protected void writeParagraphSeparator() throws IOException {
        writeParagraphEnd();
        writeParagraphStart();
    }

    protected void writeParagraphStart() throws IOException {
        this.output.write(getParagraphStart());
    }

    protected void writeString(String str) throws IOException {
        this.output.write(str);
    }

    public void writeText(PDDocument pDDocument, Writer writer) throws IOException {
        resetEngine();
        this.document = pDDocument;
        this.output = writer;
        if (getAddMoreFormatting()) {
            this.paragraphEnd = this.lineSeparator;
            this.pageStart = this.lineSeparator;
            this.articleStart = this.lineSeparator;
            this.articleEnd = this.lineSeparator;
        }
        startDocument(this.document);
        if (this.document.isEncrypted()) {
            try {
                this.document.decrypt("");
            } catch (CryptographyException e) {
                throw new WrappedIOException("Error decrypting document, details: ", e);
            } catch (InvalidPasswordException e2) {
                throw new WrappedIOException("Error: document is encrypted", e2);
            }
        }
        processPages(this.document.getDocumentCatalog().getAllPages());
        endDocument(this.document);
    }

    protected void writeWordSeparator() throws IOException {
        this.output.write(getWordSeparator());
    }
}
