package gate.corpora;

import gate.Document;
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.event.StatusListener;
import gate.html.HtmlDocumentHandler;
import gate.util.DocumentFormatException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import javax.swing.text.html.parser.ParserDelegator;

/* loaded from: input_file:gate/corpora/HtmlDocumentFormat.class */
public class HtmlDocumentFormat extends TextualDocumentFormat {
    private static final boolean DEBUG = false;

    @Override // gate.DocumentFormat
    public Boolean supportsRepositioning() {
        return new Boolean(true);
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document) throws DocumentFormatException {
        unpackMarkup(document, (RepositioningInfo) null, (RepositioningInfo) null);
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document, RepositioningInfo repositioningInfo, RepositioningInfo repositioningInfo2) throws DocumentFormatException {
        ParserDelegator parserDelegator = new ParserDelegator();
        if (document == null || document.getContent() == null) {
            throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
        }
        InputStreamReader inputStreamReader = new InputStreamReader(new ByteArrayInputStream(document.getContent().toString().getBytes()));
        HtmlDocumentHandler htmlDocumentHandler = new HtmlDocumentHandler(document, this.markupElementsMap);
        StatusListener statusListener = new StatusListener() { // from class: gate.corpora.HtmlDocumentFormat.1
            @Override // gate.event.StatusListener
            public void statusChanged(String str) {
                HtmlDocumentFormat.this.fireStatusChanged(str);
            }
        };
        htmlDocumentHandler.addStatusListener(statusListener);
        htmlDocumentHandler.setRepositioningInfo(repositioningInfo);
        htmlDocumentHandler.setAmpCodingInfo(repositioningInfo2);
        try {
            try {
                parserDelegator.parse(inputStreamReader, htmlDocumentHandler, true);
                if (htmlDocumentHandler != null) {
                    htmlDocumentHandler.removeStatusListener(statusListener);
                }
            } catch (IOException e) {
                throw new DocumentFormatException(e);
            }
        } catch (Throwable th) {
            if (htmlDocumentHandler != null) {
                htmlDocumentHandler.removeStatusListener(statusListener);
            }
            throw th;
        }
    }

    @Override // gate.corpora.TextualDocumentFormat, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        MimeType mimeType = new MimeType("text", "html");
        mimeString2ClassHandlerMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), this);
        mimeString2mimeTypeMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), mimeType);
        suffixes2mimeTypeMap.put("html", mimeType);
        suffixes2mimeTypeMap.put("htm", mimeType);
        magic2mimeTypeMap.put("<html", mimeType);
        setMimeType(mimeType);
        return this;
    }
}
