package org.ow2.weblab.crawler;

import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.ow2.weblab.content.api.ContentManager;
import org.ow2.weblab.core.extended.exception.WebLabCheckedException;
import org.ow2.weblab.core.extended.exception.WebLabUncheckedException;
import org.ow2.weblab.core.extended.factory.ResourceFactory;
import org.ow2.weblab.core.model.ComposedResource;
import org.ow2.weblab.core.model.Document;
import org.ow2.weblab.core.model.processing.WProcessingAnnotator;
import org.purl.dc.elements.DublinCoreAnnotator;
import org.purl.dc.terms.DCTermsAnnotator;

/* loaded from: input_file:org/ow2/weblab/crawler/FolderCrawler.class */
public class FolderCrawler {
    protected final ContentManager contentManager;
    protected final File folder;
    protected final FileFilter fileFilter;
    protected final FileFilter folderFilter;
    protected final int bufferSize = 10000;
    protected final boolean recursiveMode;
    private final List<File> crawledFiles;
    private final byte[] lock;
    protected static final String CRAWLER_ID = "crawlerFolder";
    protected static final String CRAWLER_CONTENT_ID = "crawlerFolderContent";
    private static final Log LOG = LogFactory.getLog(FolderCrawler.class);
    private static final FileFilter FOLDER_FILTER = new FileFilter() { // from class: org.ow2.weblab.crawler.FolderCrawler.1
        @Override // java.io.FileFilter
        public boolean accept(File file) {
            return file.isDirectory();
        }
    };

    public FolderCrawler(ContentManager contentManager, File file, FileFilter fileFilter, boolean z, FileFilter fileFilter2) throws WebLabCheckedException {
        this.bufferSize = 10000;
        this.crawledFiles = new ArrayList();
        this.lock = new byte[0];
        if (contentManager == null) {
            throw new WebLabCheckedException("Content manager must be well instanciated.");
        }
        this.contentManager = contentManager;
        if (!file.exists() || file.isFile() || !file.canRead()) {
            throw new WebLabCheckedException("Folder to crawl '" + file.getAbsolutePath() + "' is unvalid.");
        }
        this.folder = file;
        this.recursiveMode = z;
        this.fileFilter = fileFilter;
        this.folderFilter = fileFilter2;
    }

    public FolderCrawler(ContentManager contentManager, File file, FileFilter fileFilter, boolean z) throws WebLabCheckedException {
        this(contentManager, file, fileFilter, z, FOLDER_FILTER);
    }

    public FolderCrawler(String str, FileFilter fileFilter) throws WebLabCheckedException {
        this(str, fileFilter, false, FOLDER_FILTER);
    }

    public FolderCrawler(String str, FileFilter fileFilter, boolean z) throws WebLabCheckedException {
        this(ContentManager.getInstance(), new File(str), fileFilter, z, FOLDER_FILTER);
    }

    public FolderCrawler(String str, FileFilter fileFilter, boolean z, FileFilter fileFilter2) throws WebLabCheckedException {
        this(ContentManager.getInstance(), new File(str), fileFilter, z, fileFilter2);
    }

    public FolderCrawler(String str) throws WebLabCheckedException {
        this(str, new FileFilter() { // from class: org.ow2.weblab.crawler.FolderCrawler.2
            @Override // java.io.FileFilter
            public boolean accept(File file) {
                return file.isFile();
            }
        });
    }

    public int getNbFiles() {
        return this.crawledFiles.size();
    }

    public void startCrawl() {
        if (this.folder == null || this.fileFilter == null) {
            throw new WebLabUncheckedException("Folder to crawl and file filter should have been defined previously.");
        }
        synchronized (this.lock) {
            listAndAddFiles(this.folder);
        }
        LOG.info(this.crawledFiles.size() + " crawled files in FolderCrawler: " + toString());
        LOG.debug("Crawled files: " + this.folder);
    }

    protected void listAndAddFiles(File file) {
        if (file.isDirectory()) {
            LOG.debug("Add content of folder: " + file.getAbsolutePath());
            boolean isDebugEnabled = LOG.isDebugEnabled();
            for (File file2 : file.listFiles(this.fileFilter)) {
                if (!this.crawledFiles.contains(file2)) {
                    if (isDebugEnabled) {
                        LOG.trace("Add file: " + file2.getAbsolutePath());
                    }
                    this.crawledFiles.add(file2);
                }
            }
            if (this.recursiveMode) {
                for (File file3 : file.listFiles(this.folderFilter)) {
                    listAndAddFiles(file3);
                }
            }
        }
    }

    public ComposedResource getCrawledDocuments(int i, int i2) {
        int i3 = i;
        int i4 = i2;
        synchronized (this.lock) {
            ComposedResource createResource = ResourceFactory.createResource(CRAWLER_ID, "tempCollection-" + System.currentTimeMillis(), ComposedResource.class);
            if (this.crawledFiles.isEmpty()) {
                LOG.warn("Either you haven't done a startCrawl before or folder (" + this.folder + ") was empty.");
                return createResource;
            }
            if (i3 >= this.crawledFiles.size()) {
                LOG.warn("Every files have already been crawled.");
                return createResource;
            }
            if (i3 < 0) {
                LOG.warn("Offset was negative, 0 used instead.");
                i3 = 0;
            }
            if (i4 <= 0) {
                LOG.info("Limit was null or negative. Integer.MAX_VALUE will be used.");
                i4 = Integer.MAX_VALUE;
            }
            int i5 = i3;
            boolean z = true;
            do {
                if (i5 < this.crawledFiles.size()) {
                    File file = this.crawledFiles.get(i5);
                    if (file.exists() && file.isFile() && file.canRead()) {
                        Document document = (Document) ResourceFactory.createResource(CRAWLER_ID, "file" + i5, Document.class);
                        LOG.debug("Loading file: " + file.getAbsolutePath());
                        try {
                            this.contentManager.writeNativeContent(new FileInputStream(file), document);
                            writeWeblabAnnotations(document, file);
                            createResource.getResource().add(document);
                            i5++;
                            if (i5 - i3 >= i4) {
                                z = false;
                            }
                        } catch (WebLabCheckedException e) {
                            throw new WebLabUncheckedException("Unexpected error with content manager.", e);
                        } catch (FileNotFoundException e2) {
                            throw new WebLabUncheckedException("Cannot create an InputStream on file [" + file + "].", e2);
                        }
                    } else {
                        this.crawledFiles.remove(i5);
                        LOG.warn("File (" + file + ") is not crawlable");
                    }
                } else {
                    z = false;
                }
            } while (z);
            LOG.info((this.crawledFiles.size() - i5) + " files remaining in foldercrawler " + toString());
            return createResource;
        }
    }

    protected void writeWeblabAnnotations(Document document, File file) {
        String absolutePath;
        try {
            absolutePath = file.getCanonicalPath();
        } catch (IOException e) {
            LOG.warn("Unable to get canonical path of file: " + file.getAbsolutePath() + "; absolute path will be used instead.");
            absolutePath = file.getAbsolutePath();
        }
        WProcessingAnnotator wProcessingAnnotator = new WProcessingAnnotator(document);
        wProcessingAnnotator.writeGatheringDate(new Date());
        wProcessingAnnotator.writeOriginalFileName(file.getName());
        wProcessingAnnotator.writeOriginalFileSize(Long.valueOf(file.length()));
        new DublinCoreAnnotator(document).writeSource(absolutePath);
        DCTermsAnnotator dCTermsAnnotator = new DCTermsAnnotator(document);
        dCTermsAnnotator.writeExtent(file.length() + " bytes");
        dCTermsAnnotator.writeModified(new Date(file.lastModified()));
    }

    public String toString() {
        return "Folder to crawl: '" + this.folder.getAbsolutePath() + "'.";
    }
}
