package gate.creole.gazetteer;

import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.Resource;
import gate.Utils;
import gate.creole.ANNIEConstants;
import gate.creole.CustomDuplication;
import gate.creole.ExecutionException;
import gate.creole.ExecutionInterruptedException;
import gate.creole.ResourceInstantiationException;
import gate.creole.orthomatcher.OrthoMatcherRule;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.Strings;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

/* loaded from: input_file:gate/creole/gazetteer/DefaultGazetteer.class */
public class DefaultGazetteer extends AbstractGazetteer implements CustomDuplication {
    private static final boolean DEBUG = false;
    public static final String DEF_GAZ_DOCUMENT_PARAMETER_NAME = "document";
    public static final String DEF_GAZ_ANNOT_SET_PARAMETER_NAME = "annotationSetName";
    public static final String DEF_GAZ_LISTS_URL_PARAMETER_NAME = "listsURL";
    public static final String DEF_GAZ_ENCODING_PARAMETER_NAME = "encoding";
    public static final String DEF_GAZ_CASE_SENSITIVE_PARAMETER_NAME = "caseSensitive";
    public static final String DEF_GAZ_LONGEST_MATCH_ONLY_PARAMETER_NAME = "longestMatchOnly";
    public static final String DEF_GAZ_FEATURE_SEPARATOR_PARAMETER_NAME = "gazetteerFeatureSeparator";
    protected String gazetteerFeatureSeparator;
    protected Map listsByNode;
    protected FSMState initialState;
    protected Set fsmStates;

    /* loaded from: input_file:gate/creole/gazetteer/DefaultGazetteer$CharMap.class */
    public static class CharMap {
        char[] itemsKeys = null;
        Object[] itemsObjs = null;

        void resize(int i) {
            int length = this.itemsKeys.length + 1;
            char[] cArr = new char[length];
            Object[] objArr = new Object[length];
            for (int i2 = 0; i2 < i; i2++) {
                cArr[i2] = this.itemsKeys[i2];
                objArr[i2] = this.itemsObjs[i2];
            }
            for (int i3 = i + 1; i3 < length; i3++) {
                cArr[i3] = this.itemsKeys[i3 - 1];
                objArr[i3] = this.itemsObjs[i3 - 1];
            }
            this.itemsKeys = cArr;
            this.itemsObjs = objArr;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public Object get(char c) {
            int binarySearch;
            if (this.itemsKeys != null && (binarySearch = Arrays.binarySearch(this.itemsKeys, c)) >= 0) {
                return this.itemsObjs[binarySearch];
            }
            return null;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public Object put(char c, Object obj) {
            if (this.itemsKeys == null) {
                this.itemsKeys = new char[1];
                this.itemsKeys[0] = c;
                this.itemsObjs = new Object[1];
                this.itemsObjs[0] = obj;
                return obj;
            }
            int binarySearch = Arrays.binarySearch(this.itemsKeys, c);
            if (binarySearch < 0) {
                binarySearch ^= -1;
                resize(binarySearch);
                this.itemsKeys[binarySearch] = c;
                this.itemsObjs[binarySearch] = obj;
            }
            return this.itemsObjs[binarySearch];
        }
    }

    /* loaded from: input_file:gate/creole/gazetteer/DefaultGazetteer$Iter.class */
    public interface Iter {
        boolean hasNext();

        char next();
    }

    @Override // gate.creole.AbstractProcessingResource, gate.creole.AbstractResource, gate.Resource
    public Resource init() throws ResourceInstantiationException {
        this.fsmStates = new HashSet();
        this.initialState = new FSMState(this);
        if (this.listsURL == null) {
            throw new ResourceInstantiationException("No URL provided for gazetteer creation!");
        }
        this.definition = new LinearDefinition();
        this.definition.setSeparator(Strings.unescape(this.gazetteerFeatureSeparator));
        this.definition.setURL(this.listsURL);
        this.definition.load();
        int size = this.definition.size();
        this.listsByNode = this.definition.loadLists();
        Iterator it = this.definition.iterator();
        int i = 0;
        while (it.hasNext()) {
            LinearNode linearNode = (LinearNode) it.next();
            fireStatusChanged("Reading " + linearNode.toString());
            i++;
            fireProgressChanged((i * 100) / size);
            readList(linearNode, true);
        }
        fireProcessFinished();
        return this;
    }

    protected void readList(LinearNode linearNode, boolean z) throws ResourceInstantiationException {
        Lookup lookup;
        MappingNode nodeByList;
        MappingNode nodeByList2;
        if (null == linearNode) {
            throw new ResourceInstantiationException(" LinearNode node is null ");
        }
        String list = linearNode.getList();
        String majorType = linearNode.getMajorType();
        String minorType = linearNode.getMinorType();
        String language = linearNode.getLanguage();
        GazetteerList gazetteerList = (GazetteerList) this.listsByNode.get(linearNode);
        if (null == gazetteerList) {
            throw new ResourceInstantiationException("gazetteer list not found by node");
        }
        Iterator it = gazetteerList.iterator();
        Lookup lookup2 = new Lookup(list, majorType, minorType, language);
        lookup2.list = linearNode.getList();
        if (null != this.mappingDefinition && null != (nodeByList2 = this.mappingDefinition.getNodeByList(lookup2.list))) {
            lookup2.oClass = nodeByList2.getClassID();
            lookup2.ontology = nodeByList2.getOntologyID();
        }
        while (it.hasNext()) {
            GazetteerNode gazetteerNode = (GazetteerNode) it.next();
            String entry = gazetteerNode.getEntry();
            Map featureMap = gazetteerNode.getFeatureMap();
            if (featureMap == null) {
                lookup = lookup2;
            } else {
                lookup = new Lookup(list, majorType, minorType, language);
                lookup.list = linearNode.getList();
                if (null != this.mappingDefinition && null != (nodeByList = this.mappingDefinition.getNodeByList(lookup.list))) {
                    lookup.oClass = nodeByList.getClassID();
                    lookup.ontology = nodeByList.getOntologyID();
                }
                lookup.features = featureMap;
            }
            if (z) {
                addLookup(entry, lookup);
            } else {
                removeLookup(entry, lookup);
            }
        }
    }

    public void addLookup(String str, Lookup lookup) {
        FSMState fSMState = this.initialState;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            boolean isWhitespace = Character.isWhitespace(charAt);
            char upperCase = isWhitespace ? ' ' : this.caseSensitive.booleanValue() ? charAt : Character.toUpperCase(charAt);
            FSMState next = fSMState.next(upperCase);
            if (next == null) {
                next = new FSMState(this);
                fSMState.put(upperCase, next);
                if (isWhitespace) {
                    next.put(' ', next);
                }
            }
            fSMState = next;
        }
        fSMState.addLookup(lookup);
    }

    public void removeLookup(String str, Lookup lookup) {
        FSMState fSMState = this.initialState;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (Character.isWhitespace(charAt)) {
                charAt = ' ';
            }
            FSMState next = fSMState.next(charAt);
            if (next == null) {
                return;
            }
            fSMState = next;
        }
        fSMState.removeLookup(lookup);
    }

    public String getFSMgml() {
        StringBuffer stringBuffer = new StringBuffer(1024);
        StringBuffer stringBuffer2 = new StringBuffer(1024);
        for (FSMState fSMState : this.fsmStates) {
            int index = fSMState.getIndex();
            stringBuffer.append("node[ id ");
            stringBuffer.append(index);
            stringBuffer.append(" label \"");
            stringBuffer.append(index);
            if (fSMState.isFinal()) {
                stringBuffer.append(",F\\n");
                stringBuffer.append(fSMState.getLookupSet());
            }
            stringBuffer.append("\"  ]\n");
            stringBuffer2.append(fSMState.getEdgesGML());
        }
        return "graph[ \ndirected 1\n" + stringBuffer.toString() + stringBuffer2.toString() + "]\n";
    }

    public static boolean isWordInternal(char c) {
        return Character.isLetter(c) || Character.getType(c) == 8 || Character.getType(c) == 6;
    }

    @Override // gate.creole.AbstractProcessingResource, gate.Executable
    public void execute() throws ExecutionException {
        char upperCase;
        this.interrupted = false;
        if (this.document == null) {
            throw new ExecutionException("No document to process!");
        }
        AnnotationSet annotations = (this.annotationSetName == null || this.annotationSetName.equals(OrthoMatcherRule.description)) ? this.document.getAnnotations() : this.document.getAnnotations(this.annotationSetName);
        fireStatusChanged("Performing look-up in " + this.document.getName() + "...");
        String obj = this.document.getContent().toString();
        int length = obj.length();
        FSMState fSMState = this.initialState;
        FSMState fSMState2 = null;
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        while (i3 < length) {
            char charAt = obj.charAt(i3);
            if (Character.isWhitespace(charAt)) {
                upperCase = ' ';
            } else {
                upperCase = this.caseSensitive.booleanValue() ? charAt : Character.toUpperCase(charAt);
            }
            FSMState next = fSMState.next(upperCase);
            if (next == null) {
                if (fSMState2 != null) {
                    createLookups(fSMState2, i2, i, annotations);
                    fSMState2 = null;
                }
                i3 = i2 + 1;
                i2 = i3;
                fSMState = this.initialState;
            } else {
                fSMState = next;
                if (fSMState.isFinal() && (!this.wholeWordsOnly.booleanValue() || ((i2 == 0 || !isWordInternal(obj.charAt(i2 - 1))) && (i3 + 1 >= obj.length() || !isWordInternal(obj.charAt(i3 + 1)))))) {
                    if (!this.longestMatchOnly.booleanValue() && fSMState2 != null) {
                        createLookups(fSMState2, i2, i, annotations);
                    }
                    i = i3;
                    fSMState2 = fSMState;
                }
                i3++;
                if (i3 == obj.length()) {
                    if (fSMState2 != null) {
                        createLookups(fSMState2, i2, i, annotations);
                        fSMState2 = null;
                    }
                    i3 = i2 + 1;
                    i2 = i3;
                    fSMState = this.initialState;
                }
            }
            if (i3 - i4 > 256) {
                fireProgressChanged((100 * i3) / length);
                i4 = i3;
                if (isInterrupted()) {
                    throw new ExecutionInterruptedException("The execution of the " + getName() + " gazetteer has been abruptly interrupted!");
                }
            }
        }
        if (fSMState2 != null) {
            createLookups(fSMState2, i2, i, annotations);
        }
        fireProcessFinished();
        fireStatusChanged("Look-up complete!");
    }

    protected void createLookups(FSMState fSMState, long j, long j2, AnnotationSet annotationSet) {
        for (Lookup lookup : fSMState.getLookupSet()) {
            FeatureMap newFeatureMap = Factory.newFeatureMap();
            newFeatureMap.put(ANNIEConstants.LOOKUP_MAJOR_TYPE_FEATURE_NAME, lookup.majorType);
            if (null != lookup.oClass && null != lookup.ontology) {
                newFeatureMap.put(ANNIEConstants.LOOKUP_CLASS_FEATURE_NAME, lookup.oClass);
                newFeatureMap.put(ANNIEConstants.LOOKUP_ONTOLOGY_FEATURE_NAME, lookup.ontology);
            }
            if (null != lookup.minorType) {
                newFeatureMap.put(ANNIEConstants.LOOKUP_MINOR_TYPE_FEATURE_NAME, lookup.minorType);
            }
            if (null != lookup.languages) {
                newFeatureMap.put(ANNIEConstants.LOOKUP_LANGUAGE_FEATURE_NAME, lookup.languages);
            }
            if (null != lookup.features) {
                newFeatureMap.putAll(lookup.features);
            }
            try {
                annotationSet.add(new Long(j), new Long(j2 + 1), ANNIEConstants.LOOKUP_ANNOTATION_TYPE, newFeatureMap);
            } catch (InvalidOffsetException e) {
                throw new GateRuntimeException(e.toString());
            }
        }
    }

    @Override // gate.creole.gazetteer.Gazetteer
    public Set lookup(String str) {
        HashSet hashSet = new HashSet();
        FSMState fSMState = this.initialState;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (Character.isWhitespace(charAt)) {
                charAt = ' ';
            }
            FSMState next = fSMState.next(charAt);
            if (next == null) {
                return hashSet;
            }
            fSMState = next;
        }
        return fSMState.getLookupSet();
    }

    @Override // gate.creole.gazetteer.Gazetteer
    public boolean remove(String str) {
        FSMState fSMState = this.initialState;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (Character.isWhitespace(charAt)) {
                charAt = ' ';
            }
            FSMState next = fSMState.next(charAt);
            if (next == null) {
                return false;
            }
            fSMState = next;
        }
        fSMState.lookupSet = new HashSet();
        return true;
    }

    @Override // gate.creole.gazetteer.Gazetteer
    public boolean add(String str, Lookup lookup) {
        addLookup(str, lookup);
        return true;
    }

    @Override // gate.creole.CustomDuplication
    public Resource duplicate(Factory.DuplicationContext duplicationContext) throws ResourceInstantiationException {
        return Factory.createResource(SharedDefaultGazetteer.class.getName(), Utils.featureMap(SharedDefaultGazetteer.SDEF_GAZ_BOOTSTRAP_GAZETTEER_PROPERTY_NAME, this), Factory.duplicate(getFeatures(), duplicationContext), getName());
    }

    public String getGazetteerFeatureSeparator() {
        return this.gazetteerFeatureSeparator;
    }

    public void setGazetteerFeatureSeparator(String str) {
        this.gazetteerFeatureSeparator = str;
    }
}
