package org.ow2.weblab.service.normaliser.tika.handlers;

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import javax.imageio.ImageIO;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.common.PDPageLabelRange;
import org.apache.tika.language.ProfilingWriter;
import org.apache.xml.serialize.LineSeparator;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.factory.MediaUnitFactory;
import org.ow2.weblab.core.extended.ontologies.DCTerms;
import org.ow2.weblab.core.extended.ontologies.DublinCore;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Image;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.service.normaliser.tika.TikaConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

@Deprecated
/* loaded from: input_file:WEB-INF/classes/org/ow2/weblab/service/normaliser/tika/handlers/MediaUnitContentHandler.class */
public class MediaUnitContentHandler extends WebLabHandlerDecorator {
    private static final int MIN_IMAGE_WIDTH = 100;
    private static final int MIN_IMAGE_HEIGHT = 100;
    private static final String UNKNOWN_LANGUAGE = "unk";
    private static String BASE_URI = "http://weblab.ow2.org/services/tika/";
    private Document document;
    private ProfilingWriter pWriter;
    private ProfilingWriter pTableWriter;
    private TikaConfiguration configuration;
    private static List<String> NEW_LINE_ELEMENTS;
    private static List<String> TAB_LIST_ELEMENTS;
    private static List<String> TITLE_LIST_ELEMENTS;
    private static List<String> BLOC_LIST_ELEMENTS;
    private final Log logger = LogFactory.getLog(getClass());
    private String muHtmlElement = "";
    private Text textInProcess = null;
    private Text tableInProcess = null;

    private void openTextMediaUnit() {
        this.textInProcess = (Text) MediaUnitFactory.createAndLinkMediaUnit(this.document, Text.class);
        this.textInProcess.setContent("");
        this.pWriter = new ProfilingWriter();
    }

    private void openTableMediaUnit() {
        this.tableInProcess = (Text) MediaUnitFactory.createAndLinkMediaUnit(this.document, Text.class);
        this.tableInProcess.setContent("");
        this.pTableWriter = new ProfilingWriter();
    }

    private void closeTextMediaUnit() {
        String trim = this.textInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll(LineSeparator.Macintosh, "").trim();
        if (trim.isEmpty() || trim.length() < 4) {
            this.document.getMediaUnit().remove(this.textInProcess);
        } else if (this.pWriter.getLanguage().isReasonablyCertain() || trim.length() > 200) {
            annotate(this.textInProcess, this.pWriter.getLanguage().getLanguage());
        } else {
            annotate(this.textInProcess, UNKNOWN_LANGUAGE);
        }
        this.textInProcess = null;
        this.pWriter = null;
    }

    private void closeTableMediaUnit() {
        String trim = this.tableInProcess.getContent().replaceAll("\t", "").replaceAll("\n", "").replaceAll(LineSeparator.Macintosh, "").trim();
        if (trim.isEmpty()) {
            this.document.getMediaUnit().remove(this.tableInProcess);
        } else if (this.pTableWriter.getLanguage().isReasonablyCertain() || trim.length() > 200) {
            annotate(this.tableInProcess, this.pTableWriter.getLanguage().getLanguage());
        } else {
            annotate(this.tableInProcess, UNKNOWN_LANGUAGE);
        }
        this.tableInProcess = null;
        this.pTableWriter = null;
    }

    private void addImageMediaUnit(Attributes attributes) {
        String value = attributes.getValue("src");
        String str = "";
        BufferedImage bufferedImage = null;
        if (value.endsWith(".jpg") || value.endsWith(".png") || value.endsWith(".bmp")) {
            try {
                if (value.startsWith("http://")) {
                    bufferedImage = ImageIO.read(new URL(value));
                    str = value;
                }
                if (bufferedImage != null && bufferedImage.getWidth() > 100 && bufferedImage.getHeight() > 100) {
                    Image image = (Image) MediaUnitFactory.createAndLinkMediaUnit(this.document, Image.class);
                    image.setUri(image.getUri());
                    if (str.isEmpty()) {
                        FileInputStream fileInputStream = new FileInputStream(new File(value));
                        try {
                            try {
                                ContentManager.getInstance().writeNativeContent(fileInputStream, image);
                                IOUtils.closeQuietly((InputStream) fileInputStream);
                            } catch (WebLabCheckedException e) {
                                this.logger.warn("Unable to copy image.", e);
                                IOUtils.closeQuietly((InputStream) fileInputStream);
                            }
                        } catch (Throwable th) {
                            IOUtils.closeQuietly((InputStream) fileInputStream);
                            throw th;
                        }
                    }
                    for (int i = 0; i < attributes.getLength(); i++) {
                        if ("alt".equals(attributes.getLocalName(i)) || "title".equals(attributes.getLocalName(i))) {
                            JenaPoKHelper jenaPoKHelper = new JenaPoKHelper(AnnotationFactory.createAndLinkAnnotation(image));
                            if ("alt".equals(attributes.getLocalName(i))) {
                                jenaPoKHelper.createLitStat(image.getUri(), BASE_URI + "alt", attributes.getValue(i).replace("<br/>", ""));
                            } else {
                                jenaPoKHelper.createLitStat(image.getUri(), DublinCore.TITLE_PROPERTY_NAME, attributes.getValue(i).replace("<br/>", ""));
                            }
                        }
                    }
                    try {
                        JenaPoKHelper jenaPoKHelper2 = new JenaPoKHelper(AnnotationFactory.createAndLinkAnnotation(image));
                        jenaPoKHelper2.setAutoCommitMode(false);
                        jenaPoKHelper2.createLitStat(image.getUri(), new URL(BASE_URI + "width").toURI().toString(), String.valueOf(bufferedImage.getWidth()));
                        jenaPoKHelper2.createLitStat(image.getUri(), new URL(BASE_URI + "height").toURI().toString(), String.valueOf(bufferedImage.getHeight()));
                        jenaPoKHelper2.commit();
                    } catch (MalformedURLException e2) {
                        this.logger.warn("Malformed URL : " + BASE_URI + "width)", e2);
                    } catch (URISyntaxException e3) {
                        this.logger.warn("Unable to transform the property 'width' into a predicate (" + BASE_URI + "width)", e3);
                    }
                }
            } catch (MalformedURLException e4) {
                this.logger.info("Unable to read image. " + value + " can not be transformed to URI.");
            } catch (IOException e5) {
                this.logger.info("Unable to convert to image : " + value);
            }
        }
    }

    @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        super.startElement(str, str2, str3, attributes);
        if (str3.equals("body")) {
            this.muHtmlElement = str3;
        }
        if (this.tableInProcess == null) {
            if (TITLE_LIST_ELEMENTS.contains(str3)) {
                if (this.textInProcess == null) {
                    openTextMediaUnit();
                    this.muHtmlElement = str3;
                } else if (this.muHtmlElement.equals(str3) || this.muHtmlElement.equals("body")) {
                    closeTextMediaUnit();
                    this.muHtmlElement = str3;
                    openTextMediaUnit();
                } else {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                }
            }
            if (BLOC_LIST_ELEMENTS.contains(str3)) {
                if (this.textInProcess == null) {
                    openTextMediaUnit();
                    this.muHtmlElement = str3;
                } else if (TITLE_LIST_ELEMENTS.contains(this.muHtmlElement) || BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                } else {
                    closeTextMediaUnit();
                    this.muHtmlElement = str3;
                    openTextMediaUnit();
                }
            }
            if (str3.equals("table")) {
                openTableMediaUnit();
                if (this.textInProcess != null) {
                    closeTextMediaUnit();
                }
            }
        }
        if (str3.equals("img")) {
            addImageMediaUnit(attributes);
            if (this.tableInProcess == null && this.textInProcess != null) {
                if (BLOC_LIST_ELEMENTS.contains(this.muHtmlElement)) {
                    this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
                } else {
                    closeTextMediaUnit();
                }
            }
        }
        if (this.textInProcess != null) {
            if (NEW_LINE_ELEMENTS.contains(str3)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
            }
            if (TAB_LIST_ELEMENTS.contains(str3)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + "\t");
            }
            if (str3.equals(PDPageLabelRange.STYLE_LETTERS_LOWER) || str3.equals("span")) {
                this.textInProcess.setContent(this.textInProcess.getContent() + " ");
            }
        }
    }

    @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        super.endElement(str, str2, str3);
        if (this.tableInProcess != null) {
            if (str3.equals("tr")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
                return;
            } else if (str3.equals("td")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\t");
                return;
            } else {
                if (str3.equals("table")) {
                    closeTableMediaUnit();
                    return;
                }
                return;
            }
        }
        if (this.textInProcess != null) {
            if (str3.equals("br")) {
                this.tableInProcess.setContent(this.tableInProcess.getContent() + "\n");
            }
            if ((str3.equals(this.muHtmlElement) && !TITLE_LIST_ELEMENTS.contains(this.muHtmlElement)) || str3.equals("body")) {
                closeTextMediaUnit();
                this.muHtmlElement = "body";
            } else if (BLOC_LIST_ELEMENTS.contains(str3) || NEW_LINE_ELEMENTS.contains(str3) || TITLE_LIST_ELEMENTS.contains(str3)) {
                this.textInProcess.setContent(this.textInProcess.getContent() + System.getProperty("line.separator"));
            } else {
                this.textInProcess.setContent(this.textInProcess.getContent() + " ");
            }
        }
    }

    @Override // org.apache.tika.sax.ContentHandlerDecorator, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        super.characters(cArr, i, i2);
        if (this.muHtmlElement != "") {
            int i3 = i + i2;
            StringBuilder sb = new StringBuilder();
            for (int i4 = i; i4 < i3; i4++) {
                sb.append(cArr[i4]);
            }
            if (sb.toString().replaceAll("\t", "").replaceAll("\n", "").replaceAll(LineSeparator.Macintosh, "").trim().length() != 0) {
                String replaceAll = sb.toString().replaceAll("\t", " ").replaceAll("\n", " ").replaceAll(LineSeparator.Macintosh, " ").replaceAll("  ", " ");
                if (this.textInProcess == null && this.tableInProcess == null) {
                    openTextMediaUnit();
                }
                if (this.tableInProcess != null) {
                    this.tableInProcess.setContent(this.tableInProcess.getContent() + replaceAll);
                    try {
                        this.pTableWriter.append((CharSequence) sb.toString());
                        return;
                    } catch (IOException e) {
                        this.logger.error(e.getMessage());
                        return;
                    }
                }
                this.textInProcess.setContent(this.textInProcess.getContent() + replaceAll);
                try {
                    this.pWriter.append((CharSequence) sb.toString());
                } catch (IOException e2) {
                    this.logger.error(e2.getMessage());
                }
            }
        }
    }

    private void annotate(Resource resource, String str) {
        Annotation createAndLinkAnnotation = AnnotationFactory.createAndLinkAnnotation(resource);
        JenaPoKHelper jenaPoKHelper = new JenaPoKHelper(createAndLinkAnnotation);
        if (this.configuration.getServiceUri() == null) {
            jenaPoKHelper.createLitStat(resource.getUri(), DublinCore.LANGUAGE_PROPERTY_NAME, str);
            return;
        }
        jenaPoKHelper.setAutoCommitMode(false);
        jenaPoKHelper.setNSPrefix(DCTerms.PREFERRED_PREFIX, "http://purl.org/dc/terms/");
        jenaPoKHelper.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX, "http://weblab.ow2.org/core/1.2/ontology/processing#");
        jenaPoKHelper.createLitStat(resource.getUri(), DublinCore.LANGUAGE_PROPERTY_NAME, str);
        jenaPoKHelper.createResStat(createAndLinkAnnotation.getUri(), "http://weblab.ow2.org/core/1.2/ontology/processing#isProducedBy", this.configuration.getServiceUri());
        jenaPoKHelper.createLitStat(createAndLinkAnnotation.getUri(), DCTerms.CREATED, DatatypeConverter.printDateTime(Calendar.getInstance()));
        jenaPoKHelper.commit();
    }

    @Override // org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator
    public void setDocument(Document document) {
        this.document = document;
    }

    @Override // org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator
    public void setTikaConfiguration(TikaConfiguration tikaConfiguration) {
        this.configuration = tikaConfiguration;
    }

    static {
        ArrayList arrayList = new ArrayList();
        arrayList.add("ol");
        arrayList.add("dt");
        arrayList.add("dl");
        arrayList.add("li");
        NEW_LINE_ELEMENTS = Collections.unmodifiableList(arrayList);
        ArrayList arrayList2 = new ArrayList();
        arrayList2.add("li");
        arrayList2.add("dd");
        TAB_LIST_ELEMENTS = Collections.unmodifiableList(arrayList2);
        ArrayList arrayList3 = new ArrayList();
        arrayList3.add("h1");
        arrayList3.add("h2");
        arrayList3.add("h3");
        arrayList3.add("h4");
        arrayList3.add("h5");
        arrayList3.add("h6");
        TITLE_LIST_ELEMENTS = Collections.unmodifiableList(arrayList3);
        ArrayList arrayList4 = new ArrayList();
        arrayList4.add("div");
        arrayList4.add("p");
        BLOC_LIST_ELEMENTS = Collections.unmodifiableList(arrayList4);
    }
}
