package org.ow2.weblab.service.normaliser.tika;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import javax.jws.WebService;
import javax.xml.bind.DatatypeConverter;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.language.ProfilingHandler;
import org.apache.tika.metadata.ClimateForcast;
import org.apache.tika.metadata.CreativeCommons;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Message;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.TeeContentHandler;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.content.impl.FileContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.factory.AnnotationFactory;
import org.ow2.weblab.core.extended.jaxb.XMLStringCleaner;
import org.ow2.weblab.core.extended.ontologies.DCTerms;
import org.ow2.weblab.core.extended.ontologies.DublinCore;
import org.ow2.weblab.core.extended.ontologies.RDFS;
import org.ow2.weblab.core.extended.ontologies.WebLabProcessing;
import org.ow2.weblab.core.extended.util.ResourceUtil;
import org.ow2.weblab.core.helper.impl.JenaPoKHelper;
import org.ow2.weblab.core.model.Annotation;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.Resource;
import org.ow2.weblab.core.model.Text;
import org.ow2.weblab.core.services.Analyser;
import org.ow2.weblab.core.services.ContentNotAvailableException;
import org.ow2.weblab.core.services.InvalidParameterException;
import org.ow2.weblab.core.services.UnexpectedException;
import org.ow2.weblab.core.services.analyser.ProcessArgs;
import org.ow2.weblab.core.services.analyser.ProcessReturn;
import org.ow2.weblab.rdf.Value;
import org.ow2.weblab.service.normaliser.tika.handlers.WebLabHandlerDecorator;
import org.purl.dc.elements.DublinCoreAnnotator;
import org.springframework.beans.propertyeditors.StringArrayPropertyEditor;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

@WebService(endpointInterface = "org.ow2.weblab.core.services.Analyser")
/* loaded from: input_file:WEB-INF/classes/org/ow2/weblab/service/normaliser/tika/TikaExtractorService.class */
public class TikaExtractorService implements Analyser {
    protected final Log logger = LogFactory.getLog(getClass());
    protected final ContentManager contentManager = ContentManager.getInstance();
    protected final TikaConfiguration serviceConfig;
    protected final TikaConfig tikaConfig;
    protected final boolean removeContent;
    protected final DateFormat simpleDateFormat;

    public TikaExtractorService(TikaConfiguration tikaConfiguration) throws TikaException, IOException {
        this.serviceConfig = tikaConfiguration;
        this.removeContent = !(this.contentManager.getReader() instanceof FileContentManager) && this.serviceConfig.isRemoveTempContent();
        this.simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
        if (this.serviceConfig.getPathToXmlConfigurationFile() == null) {
            this.logger.debug(Messages.getString(Constants.KEY_DEBUG_DEFAULT_TIKA_CONFIG));
            this.tikaConfig = new TikaConfig();
        } else {
            try {
                this.tikaConfig = new TikaConfig(getClass().getClassLoader().getResource(this.serviceConfig.getPathToXmlConfigurationFile()));
            } catch (SAXException e) {
                throw new IOException(e);
            }
        }
        if (this.contentManager == null) {
            this.logger.fatal(Messages.getString(Constants.KEY_ERROR_UNABLE_TO_LOAD_CONTENT_MANAGER));
            throw new IOException(Messages.getString(Constants.KEY_ERROR_UNABLE_TO_LOAD_CONTENT_MANAGER));
        }
        if (!(this.tikaConfig.getParser() instanceof CompositeParser)) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_NOT_A_COMPOSITE_PARSER_1, this.tikaConfig.getParser().getClass().getCanonicalName()));
        }
        this.logger.info(Messages.getString(Constants.KEY_INFO_SERVICE_STARTED));
    }

    @Override // org.ow2.weblab.core.services.Analyser
    public ProcessReturn process(ProcessArgs processArgs) throws InvalidParameterException, ContentNotAvailableException, UnexpectedException {
        this.logger.trace("Process method called.");
        Document checkArgs = checkArgs(processArgs);
        this.logger.info(Messages.getString(Constants.KEY_INFO_PROCESS_DOCUMENT_1, checkArgs.getUri()));
        File content = getContent(checkArgs);
        Map<String, List<String>> extractTextAndMetadata = extractTextAndMetadata(checkArgs, content, false);
        if (ResourceUtil.getSelectedSubResources(checkArgs, Text.class).isEmpty()) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_NO_TEXT_FOUND_2, content.getAbsolutePath(), checkArgs.getUri()));
            extractTextAndMetadata = extractTextAndMetadata(checkArgs, content, true);
        }
        if (this.serviceConfig.isAddMetadata()) {
            annotate(checkArgs, extractTextAndMetadata);
        }
        if (this.removeContent && !content.delete()) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_UNABLE_TO_DELETE_TEMP_2, content.getAbsolutePath(), checkArgs.getUri()));
        }
        ProcessReturn processReturn = new ProcessReturn();
        processReturn.setResource(checkArgs);
        this.logger.info(Messages.getString(Constants.KEY_INFO_END_OF_PROCESS_1, checkArgs.getUri()));
        return processReturn;
    }

    protected Document checkArgs(ProcessArgs processArgs) throws InvalidParameterException {
        if (processArgs == null) {
            String string = Messages.getString(Constants.KEY_ERROR_PROCESSARGS_NULL);
            this.logger.error(string);
            throw new InvalidParameterException(string, Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
        }
        Resource resource = processArgs.getResource();
        if (resource == null) {
            String string2 = Messages.getString(Constants.KEY_ERROR_RESOURCE_NULL);
            this.logger.error(string2);
            throw new InvalidParameterException(string2, Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
        }
        if (resource instanceof Document) {
            return (Document) resource;
        }
        String string3 = Messages.getString(Constants.KEY_ERROR_NOT_A_DOCUMENT_2, resource.getUri(), resource.getClass().getCanonicalName());
        this.logger.error(string3);
        throw new InvalidParameterException(string3, Messages.getString(Constants.KEY_ERROR_INVALID_PARAM));
    }

    private File getContent(Document document) throws ContentNotAvailableException {
        try {
            File readNativeContent = this.contentManager.readNativeContent(document);
            if (!readNativeContent.exists()) {
                String string = Messages.getString(Constants.KEY_ERROR_CONTENT_FILE_NOT_FOUND_2, readNativeContent.getAbsolutePath(), document.getUri());
                this.logger.error(string);
                throw new ContentNotAvailableException(string, Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
            }
            if (readNativeContent.canRead()) {
                return readNativeContent;
            }
            String string2 = Messages.getString(Constants.KEY_ERROR_CONTENT_FILE_NOT_READABLE_2, readNativeContent.getAbsolutePath(), document.getUri());
            this.logger.error(string2);
            throw new ContentNotAvailableException(string2, Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
        } catch (WebLabCheckedException e) {
            String string3 = Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_1, document.getUri());
            this.logger.error(string3, e);
            throw new ContentNotAvailableException(string3, Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE), e);
        }
    }

    public Map<String, List<String>> extractTextAndMetadata(Document document, File file, boolean z) throws UnexpectedException, ContentNotAvailableException {
        String str;
        Parser parser;
        File file2;
        DefaultHandler mUCreatorCHandler;
        if (z) {
            str = null;
        } else {
            Value<String> readFormat = new DublinCoreAnnotator(document).readFormat();
            if (readFormat == null || !readFormat.hasValue()) {
                str = null;
            } else {
                str = readFormat.firstTypedValue();
                if (readFormat.getValues().size() > 1) {
                    this.logger.warn(Messages.getString(Constants.KEY_WARN_MORE_THAN_ONE_TYPE_2, document.getUri(), str));
                }
            }
            this.logger.debug("Mime type detected in Resource: " + str);
        }
        if (str == null) {
            parser = new AutoDetectParser(this.tikaConfig);
        } else if (this.tikaConfig.getParser() instanceof CompositeParser) {
            CompositeParser compositeParser = (CompositeParser) this.tikaConfig.getParser();
            MediaType parse = MediaType.parse(str);
            if (compositeParser.getParsers().containsKey(parse)) {
                parser = compositeParser.getParsers().get(parse);
            } else {
                this.logger.debug("No parser for type " + parse + " let Tika guess type.");
                parser = new AutoDetectParser(this.tikaConfig);
            }
        } else {
            parser = this.tikaConfig.getParser();
            this.logger.debug("Tika Config does not use an AutodetectParser but a " + parser.getClass().getCanonicalName() + ".");
        }
        ProfilingHandler profilingHandler = new ProfilingHandler();
        boolean isGenerateHtml = this.serviceConfig.isGenerateHtml();
        try {
            file2 = File.createTempFile(TikaConfiguration.UNMAPPED_PROPERTIES_PREFIX, ".xhtml");
        } catch (IOException e) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_UNABLE_TO_CREATE_TEMP_FILE_1, document.getUri()), e);
            file2 = new File(FileUtils.getTempDirectory(), "noFile");
            isGenerateHtml = false;
        }
        if (this.serviceConfig.isAddMetadata() && this.serviceConfig.isAnnotateDocumentWithLang() && isGenerateHtml) {
            this.logger.trace("Create a TeeContentHandler for language guesser, MediaUnit creation and XHTML output creation.");
            try {
                mUCreatorCHandler = new TeeContentHandler(getMUCreatorCHandler(document), profilingHandler, getHtmlCreatorCHandler(file2));
            } catch (TransformerConfigurationException e2) {
                this.logger.warn(Messages.getString(Constants.KEY_WARN_UNABLE_TO_CREATE_TRANSFORMER_1, document.getUri()), e2);
                isGenerateHtml = false;
                mUCreatorCHandler = new TeeContentHandler(getMUCreatorCHandler(document), profilingHandler);
            }
        } else if (isGenerateHtml) {
            this.logger.trace("Create a TeeContentHandler for MediaUnit creation and XHTML output creation.");
            try {
                mUCreatorCHandler = new TeeContentHandler(getMUCreatorCHandler(document), getHtmlCreatorCHandler(file2));
            } catch (TransformerConfigurationException e3) {
                this.logger.warn(Messages.getString(Constants.KEY_WARN_UNABLE_TO_CREATE_TRANSFORMER_1, document.getUri()), e3);
                isGenerateHtml = false;
                mUCreatorCHandler = new TeeContentHandler(getMUCreatorCHandler(document));
            }
        } else if (this.serviceConfig.isAddMetadata() && this.serviceConfig.isAnnotateDocumentWithLang()) {
            this.logger.trace("Create a TeeContentHandler for language guesser and MediaUnit creation.");
            mUCreatorCHandler = new TeeContentHandler(getMUCreatorCHandler(document), profilingHandler);
        } else {
            this.logger.trace("Create a ContentHandler for MediaUnit creation.");
            mUCreatorCHandler = getMUCreatorCHandler(document);
        }
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        try {
            FileInputStream fileInputStream = new FileInputStream(file);
            this.logger.debug("Start parsing " + file.getPath() + " for document " + document.getUri() + ".");
            try {
                try {
                    try {
                        parser.parse(fileInputStream, mUCreatorCHandler, metadata, parseContext);
                        IOUtils.closeQuietly((InputStream) fileInputStream);
                        this.logger.debug("Finished parsing " + file.getPath() + " for document " + document.getUri() + ".");
                        if (this.serviceConfig.isAddMetadata() && this.serviceConfig.isAnnotateDocumentWithLang() && profilingHandler.getLanguage().isReasonablyCertain()) {
                            metadata.set("language", profilingHandler.getLanguage().getLanguage());
                        } else if (this.serviceConfig.isAnnotateDocumentWithLang() && this.serviceConfig.getDefaultLang() != null) {
                            metadata.set("language", this.serviceConfig.getDefaultLang());
                        }
                        if (isGenerateHtml) {
                            if (!file2.exists()) {
                                this.logger.warn(Messages.getString(Constants.KEY_WARN_NO_OUTPUT_FILE_2, file2.getPath(), document.getUri()));
                            } else if (FileUtils.sizeOf(file2) <= 0) {
                                this.logger.warn(Messages.getString(Constants.KEY_WARN_EMPTY_OUTPUT_FILE_2, file2.getPath(), document.getUri()));
                            } else {
                                try {
                                    fileInputStream = new FileInputStream(file2);
                                    this.logger.debug("Save normalised content file: " + file2);
                                    try {
                                        try {
                                            this.contentManager.writeNormalisedContent(fileInputStream, document);
                                            IOUtils.closeQuietly((InputStream) fileInputStream);
                                        } finally {
                                        }
                                    } catch (WebLabCheckedException e4) {
                                        this.logger.warn(Messages.getString(Constants.KEY_WARN_ERROR_SAVING_NORMALISED_2, file2.getPath(), document.getUri()), e4);
                                        IOUtils.closeQuietly((InputStream) fileInputStream);
                                    }
                                } catch (FileNotFoundException e5) {
                                    this.logger.warn(Messages.getString(Constants.KEY_WARN_NO_OUTPUT_FILE_2, file2.getPath(), document.getUri()), e5);
                                }
                            }
                        }
                        FileUtils.deleteQuietly(file2);
                        return this.serviceConfig.isAddMetadata() ? fillMapWithMetadata(metadata) : Collections.emptyMap();
                    } finally {
                    }
                } catch (IOException e6) {
                    String string = Messages.getString(Constants.KEY_ERROR_IOE_ON_CONTENT_2, file.getPath(), document.getUri());
                    this.logger.error(string, e6);
                    throw new UnexpectedException(string, Messages.getString(Constants.KEY_ERROR_IOE_ON_CONTENT_SIMPLE), e6);
                }
            } catch (TikaException e7) {
                String string2 = Messages.getString(Constants.KEY_ERROR_TIKA_EX_ON_CONTENT_2, file.getPath(), document.getUri());
                this.logger.error(string2, e7);
                throw new UnexpectedException(string2, Messages.getString(Constants.KEY_ERROR_ERROR_ON_CONTENT_SIMPLE), e7);
            } catch (SAXException e8) {
                String string3 = Messages.getString(Constants.KEY_ERROR_SAXE_ON_CONTENT_2, file.getPath(), document.getUri());
                this.logger.error(string3, e8);
                throw new UnexpectedException(string3, Messages.getString(Constants.KEY_ERROR_ERROR_ON_CONTENT_SIMPLE), e8);
            }
        } catch (FileNotFoundException e9) {
            String string4 = Messages.getString(Constants.KEY_ERROR_CONTENT_FILE_NOT_FOUND_2, file.getAbsolutePath(), document.getUri());
            this.logger.error(string4);
            throw new ContentNotAvailableException(string4, Messages.getString(Constants.KEY_ERROR_CONTENT_NOT_AVAILABLE_SIMPLE));
        }
    }

    protected void annotate(Document document, Map<String, List<String>> map) {
        if (map.isEmpty()) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_NO_META_1, document.getUri()));
            return;
        }
        Annotation createAndLinkAnnotation = AnnotationFactory.createAndLinkAnnotation(document);
        JenaPoKHelper jenaPoKHelper = new JenaPoKHelper(createAndLinkAnnotation);
        jenaPoKHelper.setAutoCommitMode(false);
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        for (Map.Entry<String, List<String>> entry : map.entrySet()) {
            if (!entry.getValue().isEmpty()) {
                if (entry.getKey().startsWith("http://purl.org/dc/terms/")) {
                    z = true;
                } else if (entry.getKey().startsWith("http://weblab.ow2.org/core/1.2/ontology/processing#")) {
                    z3 = true;
                } else if (entry.getKey().startsWith(this.serviceConfig.getUnmappedPropertiesBaseUri())) {
                    z2 = true;
                }
                Iterator<String> it = entry.getValue().iterator();
                while (it.hasNext()) {
                    jenaPoKHelper.createLitStat(document.getUri(), entry.getKey(), it.next());
                }
            }
        }
        if (this.serviceConfig.getServiceUri() != null) {
            jenaPoKHelper.createResStat(createAndLinkAnnotation.getUri(), "http://weblab.ow2.org/core/1.2/ontology/processing#isProducedBy", this.serviceConfig.getServiceUri());
            jenaPoKHelper.createLitStat(createAndLinkAnnotation.getUri(), DCTerms.CREATED, DatatypeConverter.printDateTime(Calendar.getInstance()));
            jenaPoKHelper.setNSPrefix(DCTerms.PREFERRED_PREFIX, "http://purl.org/dc/terms/");
            jenaPoKHelper.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX, "http://weblab.ow2.org/core/1.2/ontology/processing#");
        } else {
            if (z) {
                jenaPoKHelper.setNSPrefix(DCTerms.PREFERRED_PREFIX, "http://purl.org/dc/terms/");
            }
            if (z3) {
                jenaPoKHelper.setNSPrefix(WebLabProcessing.PREFERRED_PREFIX, "http://weblab.ow2.org/core/1.2/ontology/processing#");
            }
        }
        if (z2) {
            jenaPoKHelper.setNSPrefix(this.serviceConfig.getUnmappedPropertiesPrefix(), this.serviceConfig.getUnmappedPropertiesBaseUri());
        }
        try {
            jenaPoKHelper.commit();
        } catch (Exception e) {
            this.logger.warn(Messages.getString(Constants.KEY_WARN_ERROR_COMMIT_2, document.getUri(), map), e);
            document.getAnnotation().remove(createAndLinkAnnotation);
        }
    }

    private WebLabHandlerDecorator getMUCreatorCHandler(Document document) throws UnexpectedException {
        try {
            WebLabHandlerDecorator newInstance = this.serviceConfig.getWebLabHandlerDecoratorClass().newInstance();
            newInstance.setDocument(document);
            newInstance.setTikaConfiguration(this.serviceConfig);
            newInstance.setContentHandler(new BodyContentHandler(-1));
            return newInstance;
        } catch (IllegalAccessException e) {
            String string = Messages.getString(Constants.KEY_ERROR_BAD_HANDLER_1, this.serviceConfig.getWebLabHandlerDecoratorClass().getCanonicalName());
            this.logger.error(string, e);
            throw new UnexpectedException(string, string, e);
        } catch (InstantiationException e2) {
            String string2 = Messages.getString(Constants.KEY_ERROR_BAD_HANDLER_1, this.serviceConfig.getWebLabHandlerDecoratorClass().getCanonicalName());
            this.logger.error(string2, e2);
            throw new UnexpectedException(string2, string2, e2);
        }
    }

    private ContentHandler getHtmlCreatorCHandler(File file) throws TransformerConfigurationException {
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) TransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
        newTransformerHandler.getTransformer().setOutputProperty("indent", "yes");
        newTransformerHandler.setResult(new StreamResult(file));
        return newTransformerHandler;
    }

    private void cleanMap(Map<String, List<String>> map) {
        HashSet hashSet = new HashSet();
        for (Map.Entry<String, List<String>> entry : map.entrySet()) {
            ListIterator<String> listIterator = entry.getValue().listIterator();
            while (listIterator.hasNext()) {
                String next = listIterator.next();
                listIterator.set(XMLStringCleaner.getXMLRecommendedString(next));
                if (next.trim().equals("")) {
                    listIterator.remove();
                }
            }
            if (entry.getValue().isEmpty()) {
                hashSet.add(entry.getKey());
            }
        }
        Iterator it = hashSet.iterator();
        while (it.hasNext()) {
            map.remove((String) it.next());
        }
    }

    protected Map<String, List<String>> fillMapWithMetadata(Metadata metadata) {
        HashMap hashMap = new HashMap();
        for (String str : metadata.names()) {
            String[] values = metadata.getValues(str);
            if (values.length != 0) {
                if (str.equalsIgnoreCase(ClimateForcast.CONTACT) || str.equalsIgnoreCase(ClimateForcast.INSTITUTION)) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(ClimateForcast.REFERENCES)) {
                    addToAnnot(hashMap, DCTerms.REFERENCES, values);
                } else if (str.equalsIgnoreCase("source")) {
                    addToAnnot(hashMap, DublinCore.SOURCE_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(CreativeCommons.LICENSE_LOCATION) || str.equalsIgnoreCase(CreativeCommons.LICENSE_URL)) {
                    addToAnnot(hashMap, DCTerms.LICENSE, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.CONTRIBUTOR)) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.COVERAGE)) {
                    addToAnnot(hashMap, DublinCore.CREATOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.CREATOR)) {
                    addToAnnot(hashMap, DublinCore.CREATOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("description")) {
                    addToAnnot(hashMap, DublinCore.DESCRIPTION_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("format")) {
                    addToAnnot(hashMap, DublinCore.FORMAT_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.IDENTIFIER)) {
                    addToAnnot(hashMap, DublinCore.IDENTIFIER_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("language") && this.serviceConfig.isAnnotateDocumentWithLang()) {
                    addToAnnot(hashMap, DublinCore.LANGUAGE_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.MODIFIED)) {
                    for (String str2 : values) {
                        addToAnnot(hashMap, DCTerms.MODIFIED, new String[]{convertToISO8601Date(str2)});
                    }
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.PUBLISHER)) {
                    addToAnnot(hashMap, DublinCore.PUBLISHER_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.RELATION)) {
                    addToAnnot(hashMap, DublinCore.RELATION_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("rights")) {
                    addToAnnot(hashMap, DublinCore.RIGHTS_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("source")) {
                    addToAnnot(hashMap, DublinCore.SOURCE_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.SUBJECT)) {
                    addToAnnot(hashMap, DublinCore.SUBJECT_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("title")) {
                    addToAnnot(hashMap, DublinCore.TITLE_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase("type")) {
                    addToAnnot(hashMap, DublinCore.TYPE_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(org.apache.tika.metadata.DublinCore.DATE.getName())) {
                    Date date = metadata.getDate(org.apache.tika.metadata.DublinCore.DATE);
                    if (date != null) {
                        addToAnnot(hashMap, DublinCore.DATE_PROPERTY_NAME, new String[]{this.simpleDateFormat.format(date)});
                    }
                } else if (str.equalsIgnoreCase(HttpHeaders.LAST_MODIFIED.getName())) {
                    Date date2 = metadata.getDate(HttpHeaders.LAST_MODIFIED);
                    if (date2 != null) {
                        addToAnnot(hashMap, DCTerms.MODIFIED, new String[]{this.simpleDateFormat.format(date2)});
                    }
                } else if (str.equalsIgnoreCase(Message.MESSAGE_FROM)) {
                    addToAnnot(hashMap, DublinCore.CREATOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.AUTHOR)) {
                    addToAnnot(hashMap, DublinCore.CREATOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.CHARACTER_COUNT)) {
                    addToDCExtent(hashMap, values, "characters");
                } else if (str.equalsIgnoreCase(MSOffice.CHARACTER_COUNT_WITH_SPACES)) {
                    addToDCExtent(hashMap, values, "characters (with spaces)");
                } else if (str.equalsIgnoreCase(MSOffice.COMMENTS)) {
                    addToAnnot(hashMap, RDFS.COMMENT, values);
                } else if (str.equalsIgnoreCase(MSOffice.COMPANY)) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.KEYWORDS)) {
                    addToAnnot(hashMap, DublinCore.DESCRIPTION_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.LAST_AUTHOR)) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.LINE_COUNT)) {
                    addToDCExtent(hashMap, values, "lines");
                } else if (str.equalsIgnoreCase(MSOffice.MANAGER)) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(MSOffice.NOTES)) {
                    addToAnnot(hashMap, RDFS.COMMENT, values);
                } else if (str.equalsIgnoreCase(MSOffice.PAGE_COUNT)) {
                    addToDCExtent(hashMap, values, "pages");
                } else if (str.equalsIgnoreCase(MSOffice.PARAGRAPH_COUNT)) {
                    addToDCExtent(hashMap, values, "paragraphs");
                } else if (str.equalsIgnoreCase(MSOffice.SLIDE_COUNT)) {
                    addToDCExtent(hashMap, values, "slides");
                } else if (str.equalsIgnoreCase(MSOffice.WORD_COUNT)) {
                    addToDCExtent(hashMap, values, "words");
                } else if (str.equalsIgnoreCase(MSOffice.CREATION_DATE.getName())) {
                    Date date3 = metadata.getDate(MSOffice.CREATION_DATE);
                    if (date3 != null) {
                        addToAnnot(hashMap, DCTerms.CREATED, new String[]{this.simpleDateFormat.format(date3)});
                    }
                } else if (str.equalsIgnoreCase(MSOffice.LAST_SAVED.getName())) {
                    Date date4 = metadata.getDate(MSOffice.LAST_SAVED);
                    if (date4 != null) {
                        addToAnnot(hashMap, DCTerms.MODIFIED, new String[]{this.simpleDateFormat.format(date4)});
                    }
                } else if (str.equalsIgnoreCase(PagedText.N_PAGES.getName())) {
                    addToDCExtent(hashMap, values, "pages");
                } else if (str.equalsIgnoreCase(TIFF.ORIGINAL_DATE.getName())) {
                    Date date5 = metadata.getDate(TIFF.ORIGINAL_DATE);
                    if (date5 != null) {
                        addToAnnot(hashMap, DCTerms.CREATED, new String[]{this.simpleDateFormat.format(date5)});
                    }
                } else if (str.equalsIgnoreCase(XMPDM.ARTIST.getName())) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(XMPDM.AUDIO_MOD_DATE.getName())) {
                    Date date6 = metadata.getDate(XMPDM.AUDIO_MOD_DATE);
                    if (date6 != null) {
                        addToAnnot(hashMap, DCTerms.MODIFIED, new String[]{this.simpleDateFormat.format(date6)});
                    }
                } else if (str.equalsIgnoreCase(XMPDM.COMPOSER.getName())) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(XMPDM.COPYRIGHT.getName())) {
                    addToAnnot(hashMap, DublinCore.RIGHTS_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(XMPDM.ENGINEER.getName())) {
                    addToAnnot(hashMap, DublinCore.CONTRIBUTOR_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(XMPDM.GENRE.getName())) {
                    addToAnnot(hashMap, DublinCore.DESCRIPTION_PROPERTY_NAME, values);
                } else if (str.equalsIgnoreCase(XMPDM.LOG_COMMENT.getName())) {
                    addToAnnot(hashMap, RDFS.COMMENT, values);
                } else if (str.equalsIgnoreCase(XMPDM.METADATA_MOD_DATE.getName())) {
                    Date date7 = metadata.getDate(XMPDM.METADATA_MOD_DATE);
                    if (date7 != null) {
                        addToAnnot(hashMap, DCTerms.MODIFIED, new String[]{this.simpleDateFormat.format(date7)});
                    }
                } else if (str.equalsIgnoreCase(XMPDM.RELEASE_DATE.getName())) {
                    Date date8 = metadata.getDate(XMPDM.METADATA_MOD_DATE);
                    if (date8 != null) {
                        addToAnnot(hashMap, DCTerms.AVAILABLE, new String[]{this.simpleDateFormat.format(date8)});
                    }
                } else if (str.equalsIgnoreCase(XMPDM.SHOT_DATE.getName())) {
                    Date date9 = metadata.getDate(XMPDM.SHOT_DATE);
                    if (date9 != null) {
                        addToAnnot(hashMap, DCTerms.CREATED, new String[]{this.simpleDateFormat.format(date9)});
                    }
                } else if (this.serviceConfig.isAddUnmappedProperties()) {
                    String replaceAll = str.replaceAll("\\W", "_");
                    try {
                        addToAnnot(hashMap, new URL(this.serviceConfig.getUnmappedPropertiesBaseUri() + replaceAll).toURI().toString(), values);
                    } catch (MalformedURLException e) {
                        this.logger.warn(Messages.getString(Constants.KEY_WARN_UNMAPPED_PROPERTY_ERROR_4, str, this.serviceConfig.getUnmappedPropertiesBaseUri(), replaceAll, values.toString()), e);
                    } catch (URISyntaxException e2) {
                        this.logger.warn(Messages.getString(Constants.KEY_WARN_UNMAPPED_PROPERTY_ERROR_4, str, this.serviceConfig.getUnmappedPropertiesBaseUri(), replaceAll, values.toString()), e2);
                    }
                }
            }
        }
        cleanMap(hashMap);
        return hashMap;
    }

    private void addToDCExtent(Map<String, List<String>> map, String[] strArr, String str) {
        for (String str2 : strArr) {
            addToAnnot(map, DCTerms.EXTENT, new String[]{str2 + " " + str});
        }
    }

    private void addToAnnot(Map<String, List<String>> map, String str, String[] strArr) {
        if (!map.containsKey(str)) {
            map.put(str, new LinkedList());
        }
        map.get(str).addAll(Arrays.asList(strArr));
    }

    private String convertToISO8601Date(String str) {
        String str2;
        if (str == null || str.trim().equals("")) {
            str2 = "";
        } else {
            String trim = str.trim();
            Date date = null;
            try {
                date = (Character.isDigit(trim.charAt(0)) ? new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH) : !trim.contains(StringArrayPropertyEditor.DEFAULT_SEPARATOR) ? new SimpleDateFormat("EEE MMM d hh:mm:ss z yyyy", Locale.ENGLISH) : new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.ENGLISH)).parse(trim);
            } catch (ParseException e) {
                this.logger.warn("Unable to read date: '" + trim + "'.", e);
            }
            str2 = date != null ? this.simpleDateFormat.format(date) : "";
        }
        return str2;
    }

    protected static List<String> addUnitOnValues(List<String> list, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next() + str);
        }
        return arrayList;
    }
}
