package gate.corpora;

import gate.Document;
import gate.Resource;
import gate.creole.ResourceInstantiationException;
import gate.creole.metadata.AutoInstance;
import gate.creole.metadata.CreoleParameter;
import gate.creole.metadata.CreoleResource;
import gate.util.DocumentFormatException;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@CreoleResource(name = "GATE HTML Document Format", isPrivate = true, autoinstances = {@AutoInstance(hidden = true)})
/* loaded from: input_file:WEB-INF/lib/gate-core-6.1.jar:gate/corpora/NekoHtmlDocumentFormat.class */
public class NekoHtmlDocumentFormat extends HtmlDocumentFormat {
    private static final boolean DEBUG = false;
    private Set<String> ignorableTags = null;
    private static Pattern afterNewlinePattern = Pattern.compile("^", 8);

    @CreoleParameter(comment = "HTML tags whose text content should be ignored", defaultValue = "script;style")
    public void setIgnorableTags(Set<String> set) {
        this.ignorableTags = set;
    }

    public Set<String> getIgnorableTags() {
        return this.ignorableTags;
    }

    @Override // gate.corpora.HtmlDocumentFormat, gate.DocumentFormat
    public Boolean supportsRepositioning() {
        return Boolean.TRUE;
    }

    @Override // gate.corpora.HtmlDocumentFormat, gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(Document document) throws DocumentFormatException {
        unpackMarkup(document, null, null);
    }

    /*  JADX ERROR: NullPointerException in pass: RegionMakerVisitor
        java.lang.NullPointerException
        */
    @Override // gate.corpora.HtmlDocumentFormat, gate.corpora.TextualDocumentFormat, gate.DocumentFormat
    public void unpackMarkup(gate.Document r10, gate.corpora.RepositioningInfo r11, gate.corpora.RepositioningInfo r12) throws gate.util.DocumentFormatException {
        /*
            Method dump skipped, instructions count: 470
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: gate.corpora.NekoHtmlDocumentFormat.unpackMarkup(gate.Document, gate.corpora.RepositioningInfo, gate.corpora.RepositioningInfo):void");
    }

    private int[] buildLineOffsets(String str) {
        Matcher matcher = afterNewlinePattern.matcher(str);
        int i = 0;
        while (matcher.find()) {
            i++;
        }
        int[] iArr = new int[i];
        matcher.reset();
        for (int i2 = 0; i2 < iArr.length; i2++) {
            matcher.find();
            iArr[i2] = matcher.start();
        }
        return iArr;
    }

    @Override // gate.corpora.HtmlDocumentFormat, gate.corpora.TextualDocumentFormat, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        MimeType mimeType = new MimeType("text", "html");
        mimeString2ClassHandlerMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), this);
        mimeString2mimeTypeMap.put(mimeType.getType() + "/" + mimeType.getSubtype(), mimeType);
        suffixes2mimeTypeMap.put("html", mimeType);
        suffixes2mimeTypeMap.put("htm", mimeType);
        magic2mimeTypeMap.put("<html", mimeType);
        setMimeType(mimeType);
        return this;
    }
}
