package org.apache.tika.parser.pdf;

import java.io.IOException;
import java.util.List;
import org.antlr.stringtemplate.language.ASTExpr;
import org.apache.abdera.util.Constants;
import org.apache.fontbox.afm.AFMParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
import org.apache.pdfbox.pdmodel.interactive.measurement.PDNumberFormatDictionary;
import org.apache.pdfbox.util.PDFOperator;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;
import org.apache.pdfbox.util.operator.OperatorProcessor;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-0.7.jar:org/apache/tika/parser/pdf/PDF2XHTML.class */
class PDF2XHTML extends PDFTextStripper {
    private final XHTMLContentHandler handler;

    public static void process(PDDocument pDDocument, ContentHandler contentHandler, Metadata metadata) throws SAXException, TikaException {
        try {
            new PDF2XHTML(contentHandler, metadata).getText(pDDocument);
        } catch (IOException e) {
            if (!(e.getCause() instanceof SAXException)) {
                throw new TikaException("Unable to extract PDF content", e);
            }
            throw ((SAXException) e.getCause());
        }
    }

    private PDF2XHTML(ContentHandler contentHandler, Metadata metadata) throws IOException {
        this.handler = new XHTMLContentHandler(contentHandler, metadata);
        OperatorProcessor operatorProcessor = new OperatorProcessor() { // from class: org.apache.tika.parser.pdf.PDF2XHTML.1
            @Override // org.apache.pdfbox.util.operator.OperatorProcessor
            public void process(PDFOperator pDFOperator, List list) {
            }
        };
        registerOperatorProcessor("b", operatorProcessor);
        registerOperatorProcessor("B", operatorProcessor);
        registerOperatorProcessor("b*", operatorProcessor);
        registerOperatorProcessor("B*", operatorProcessor);
        registerOperatorProcessor("BDC", operatorProcessor);
        registerOperatorProcessor("BI", operatorProcessor);
        registerOperatorProcessor("BMC", operatorProcessor);
        registerOperatorProcessor("b", operatorProcessor);
        registerOperatorProcessor("BX", operatorProcessor);
        registerOperatorProcessor("c", operatorProcessor);
        registerOperatorProcessor("CS", operatorProcessor);
        registerOperatorProcessor("cs", operatorProcessor);
        registerOperatorProcessor("d", operatorProcessor);
        registerOperatorProcessor("d0", operatorProcessor);
        registerOperatorProcessor("d1", operatorProcessor);
        registerOperatorProcessor("DP", operatorProcessor);
        registerOperatorProcessor("El", operatorProcessor);
        registerOperatorProcessor("EMC", operatorProcessor);
        registerOperatorProcessor("EX", operatorProcessor);
        registerOperatorProcessor("f", operatorProcessor);
        registerOperatorProcessor(PDNumberFormatDictionary.FRACTIONAL_DISPLAY_FRACTION, operatorProcessor);
        registerOperatorProcessor("f*", operatorProcessor);
        registerOperatorProcessor(PDDeviceGray.ABBREVIATED_NAME, operatorProcessor);
        registerOperatorProcessor("g", operatorProcessor);
        registerOperatorProcessor("h", operatorProcessor);
        registerOperatorProcessor(ASTExpr.DEFAULT_INDEX_VARIABLE_NAME, operatorProcessor);
        registerOperatorProcessor("ID", operatorProcessor);
        registerOperatorProcessor("j", operatorProcessor);
        registerOperatorProcessor("J", operatorProcessor);
        registerOperatorProcessor("K", operatorProcessor);
        registerOperatorProcessor("k", operatorProcessor);
        registerOperatorProcessor("l", operatorProcessor);
        registerOperatorProcessor("m", operatorProcessor);
        registerOperatorProcessor("M", operatorProcessor);
        registerOperatorProcessor("MP", operatorProcessor);
        registerOperatorProcessor("n", operatorProcessor);
        registerOperatorProcessor("re", operatorProcessor);
        registerOperatorProcessor("RG", operatorProcessor);
        registerOperatorProcessor("rg", operatorProcessor);
        registerOperatorProcessor("ri", operatorProcessor);
        registerOperatorProcessor("s", operatorProcessor);
        registerOperatorProcessor("S", operatorProcessor);
        registerOperatorProcessor("SC", operatorProcessor);
        registerOperatorProcessor("sc", operatorProcessor);
        registerOperatorProcessor("SCN", operatorProcessor);
        registerOperatorProcessor("scn", operatorProcessor);
        registerOperatorProcessor("sh", operatorProcessor);
        registerOperatorProcessor("v", operatorProcessor);
        registerOperatorProcessor(AFMParser.CHARMETRICS_W, operatorProcessor);
        registerOperatorProcessor("W*", operatorProcessor);
        registerOperatorProcessor("y", operatorProcessor);
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    protected void startDocument(PDDocument pDDocument) throws IOException {
        try {
            this.handler.startDocument();
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to start a document", e);
        }
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    protected void endDocument(PDDocument pDDocument) throws IOException {
        try {
            this.handler.endDocument();
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to end a document", e);
        }
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    protected void startPage(PDPage pDPage) throws IOException {
        try {
            this.handler.startElement(Constants.LN_DIV, "class", "page");
            this.handler.startElement("p");
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to start a page", e);
        }
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    protected void endPage(PDPage pDPage) throws IOException {
        try {
            this.handler.endElement("p");
            this.handler.endElement(Constants.LN_DIV);
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to end a page", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.pdfbox.util.PDFTextStripper
    public void writeString(String str) throws IOException {
        try {
            this.handler.characters(str);
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to write a string: " + str, e);
        }
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    protected void writeCharacters(TextPosition textPosition) throws IOException {
        try {
            this.handler.characters(textPosition.getCharacter());
        } catch (SAXException e) {
            throw new IOExceptionWithCause("Unable to write a character: " + textPosition.getCharacter(), e);
        }
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    public String getWordSeparator() {
        try {
            this.handler.characters(" ");
        } catch (SAXException e) {
        }
        return super.getWordSeparator();
    }

    @Override // org.apache.pdfbox.util.PDFTextStripper
    public String getLineSeparator() {
        try {
            this.handler.characters("\n");
        } catch (SAXException e) {
        }
        return super.getLineSeparator();
    }
}
