package gate.creole.annic.lucene;

import com.hp.hpl.jena.util.FileManager;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Factory;
import gate.FeatureMap;
import gate.annotation.AnnotationSetImpl;
import gate.creole.annic.Constants;
import gate.creole.annic.apache.lucene.analysis.Token;
import gate.creole.annic.apache.lucene.document.Document;
import gate.creole.annic.apache.lucene.document.Field;
import gate.util.Err;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.springframework.beans.propertyeditors.StringArrayPropertyEditor;

/* loaded from: input_file:WEB-INF/lib/gate-core-6.1.jar:gate/creole/annic/lucene/LuceneDocument.class */
public class LuceneDocument {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/gate-core-6.1.jar:gate/creole/annic/lucene/LuceneDocument$OffsetGroup.class */
    public class OffsetGroup {
        Long startOffset;
        Long endOffset;

        private OffsetGroup() {
        }
    }

    public List<Document> createDocuments(String str, gate.Document document, String str2, ArrayList<String> arrayList, ArrayList<String> arrayList2, ArrayList<String> arrayList3, ArrayList<String> arrayList4, String str3, String str4, Boolean bool, String str5) {
        if (str4 != null) {
            str4 = str4.trim();
        }
        ArrayList arrayList5 = new ArrayList();
        ArrayList<String> arrayList6 = new ArrayList<>();
        if (arrayList.size() > 0) {
            arrayList6 = arrayList;
        } else if (arrayList2.size() > 0) {
            Set<String> hashSet = new HashSet();
            if (document.getNamedAnnotationSets() != null && document.getNamedAnnotationSets().keySet() != null) {
                hashSet = document.getNamedAnnotationSets().keySet();
            }
            for (String str6 : hashSet) {
                if (!arrayList2.contains(str6)) {
                    arrayList6.add(str6);
                }
            }
            if (!arrayList2.contains(Constants.DEFAULT_ANNOTATION_SET_NAME)) {
                arrayList6.add(Constants.DEFAULT_ANNOTATION_SET_NAME);
            }
        } else {
            Set<String> hashSet2 = new HashSet();
            if (document.getNamedAnnotationSets() != null && document.getNamedAnnotationSets().keySet() != null) {
                hashSet2 = document.getNamedAnnotationSets().keySet();
            }
            Iterator<String> it = hashSet2.iterator();
            while (it.hasNext()) {
                arrayList6.add(it.next());
            }
            arrayList6.add(Constants.DEFAULT_ANNOTATION_SET_NAME);
        }
        AnnotationSet annotationSet = null;
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        int i = -1;
        if (str4 != null && str4.length() > 0) {
            i = str4.lastIndexOf(46);
        }
        if (i >= 0) {
            String substring = str4.substring(0, i);
            str4 = str4.substring(i + 1, str4.length());
            annotationSet = substring.equals(Constants.DEFAULT_ANNOTATION_SET_NAME) ? document.getAnnotations().get(str4) : document.getAnnotations(substring).get(str4);
            if (annotationSet == null || annotationSet.size() == 0) {
                System.err.println("Base Tokens " + str4 + " counldn't be found under the specified annotation set " + substring + "\n searching them in other annotation sets");
                z = true;
            }
        } else {
            z = true;
        }
        if (str4 != null && str4.length() > 0 && z) {
            z3 = true;
            Iterator<String> it2 = arrayList6.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                String next = it2.next();
                if (!next.equals(Constants.DEFAULT_ANNOTATION_SET_NAME)) {
                    AnnotationSet annotationSet2 = document.getAnnotations(next).get(str4);
                    if (annotationSet2.size() > 0) {
                        annotationSet = annotationSet2;
                        z3 = false;
                        break;
                    }
                } else {
                    AnnotationSet annotationSet3 = document.getAnnotations().get(str4);
                    if (annotationSet3.size() > 0) {
                        annotationSet = annotationSet3;
                        z3 = false;
                        break;
                    }
                }
            }
        }
        if (str4 == null || str4.length() == 0) {
            z3 = true;
        }
        if (z3) {
            if (!bool.booleanValue()) {
                System.out.println("Tokens couldn't be found in the document - Ignoring the document " + document.getName());
                return null;
            }
            str4 = "Token";
            if (annotationSet == null) {
                annotationSet = new AnnotationSetImpl(document);
            }
            if (!createTokens(document, annotationSet)) {
                System.out.println("Tokens couldn't be created manually - Ignoring the document " + document.getName());
                return null;
            }
        }
        AnnotationSet annotationSet4 = null;
        int i2 = -1;
        if (str5 != null && str5.trim().length() > 0) {
            i2 = str5.lastIndexOf(46);
        }
        if (i2 >= 0) {
            String substring2 = str5.substring(0, i2);
            str5 = str5.substring(i2 + 1, str5.length());
            annotationSet4 = substring2.equals(Constants.DEFAULT_ANNOTATION_SET_NAME) ? document.getAnnotations().get(str5) : document.getAnnotations(substring2).get(str5);
            if (annotationSet4 == null || annotationSet4.size() == 0) {
                System.err.println("Index Unit " + str5 + " counldn't be found under the specified annotation set " + substring2 + "\n searching them in other annotation sets");
                z2 = true;
            }
        } else {
            z2 = true;
        }
        if (str5 != null && str5.length() > 0 && z2) {
            Iterator<String> it3 = arrayList6.iterator();
            while (true) {
                if (!it3.hasNext()) {
                    break;
                }
                String next2 = it3.next();
                if (!next2.equals(Constants.DEFAULT_ANNOTATION_SET_NAME)) {
                    AnnotationSet annotationSet5 = document.getAnnotations(next2).get(str5);
                    if (annotationSet5.size() > 0) {
                        annotationSet4 = annotationSet5;
                        break;
                    }
                } else {
                    AnnotationSet annotationSet6 = document.getAnnotations().get(str5);
                    if (annotationSet6.size() > 0) {
                        annotationSet4 = annotationSet6;
                        break;
                    }
                }
            }
        }
        if (annotationSet4 == null) {
            str5 = null;
        }
        int i3 = 0;
        Iterator<String> it4 = arrayList6.iterator();
        while (it4.hasNext()) {
            String next3 = it4.next();
            AnnotationSet annotations = next3.equals(Constants.DEFAULT_ANNOTATION_SET_NAME) ? document.getAnnotations() : document.getAnnotations(next3);
            HashSet hashSet3 = new HashSet();
            ArrayList<Token>[] tokens = getTokens(document, annotations, arrayList3, arrayList4, str4, annotationSet, str5, annotationSet4, hashSet3);
            if (tokens == null) {
                return null;
            }
            StringBuffer stringBuffer = new StringBuffer();
            Iterator<String> it5 = hashSet3.iterator();
            while (it5.hasNext()) {
                stringBuffer.append(it5.next() + FileManager.PATH_DELIMITER);
            }
            Document[] documentArr = new Document[tokens.length];
            int i4 = 0;
            while (i4 < tokens.length) {
                Document document2 = new Document();
                LuceneReader luceneReader = new LuceneReader(document, tokens[i4]);
                document2.add(Field.Keyword("DOCUMENT_ID", str2));
                document2.add(Field.Keyword(Constants.DOCUMENT_ID_FOR_SERIALIZED_FILE, str2 + "-" + i3));
                document2.add(Field.Keyword(Constants.INDEXED_FEATURES, stringBuffer.substring(0, stringBuffer.length() - 1)));
                if (str != null) {
                    document2.add(Field.Keyword(Constants.CORPUS_ID, str));
                }
                document2.add(Field.Keyword(Constants.ANNOTATION_SET_ID, next3));
                document2.add(Field.Text(org.apache.xalan.templates.Constants.ELEMNAME_CONTENTS_STRING, luceneReader));
                try {
                    writeOnDisk(tokens[i4], str2, str2 + "-" + i3, str3);
                    documentArr[i4] = document2;
                    i4++;
                    i3++;
                } catch (Exception e) {
                    Err.println("\nIgnoring the document : " + document.getName() + " since its token stream cannot be written on the disk");
                    Err.println("Reason: " + e.getMessage());
                    return null;
                }
            }
            arrayList5.addAll(Arrays.asList(documentArr));
        }
        return arrayList5;
    }

    private boolean createTokens(gate.Document document, AnnotationSet annotationSet) {
        String obj = document.getContent().toString();
        int i = -1;
        for (int i2 = 0; i2 < obj.length(); i2++) {
            if (Character.isWhitespace(obj.charAt(i2))) {
                if (i != -1) {
                    FeatureMap newFeatureMap = Factory.newFeatureMap();
                    String substring = obj.substring(i, i2);
                    if (substring.trim().length() > 0) {
                        newFeatureMap.put("string", substring);
                        try {
                            annotationSet.add(new Long(i), new Long(i2), "Token", newFeatureMap);
                        } catch (InvalidOffsetException e) {
                            e.printStackTrace();
                            return false;
                        }
                    }
                    i = i2 + 1;
                } else {
                    continue;
                }
            } else if (i == -1) {
                i = i2;
            }
        }
        if (i == -1) {
            return false;
        }
        if (i >= obj.length()) {
            return true;
        }
        FeatureMap newFeatureMap2 = Factory.newFeatureMap();
        String substring2 = obj.substring(i, obj.length());
        if (substring2.trim().length() <= 0) {
            return true;
        }
        newFeatureMap2.put("string", substring2);
        try {
            annotationSet.add(new Long(i), new Long(obj.length()), "Token", newFeatureMap2);
            return true;
        } catch (InvalidOffsetException e2) {
            e2.printStackTrace();
            return false;
        }
    }

    private String getCompatibleName(String str) {
        return str.replaceAll("[\\/:\\*\\?\"<>|]", "_");
    }

    private void writeOnDisk(ArrayList arrayList, String str, String str2, String str3) throws Exception {
        String compatibleName = getCompatibleName(str2);
        String compatibleName2 = getCompatibleName(str);
        if (str3.startsWith("file:/")) {
            str3 = str3.substring(6, str3.length());
        }
        if (str3.charAt(1) != ':') {
            str3 = "/" + str3;
        }
        File file = new File(new File(str3), Constants.SERIALIZED_FOLDER_NAME);
        if (!file.exists()) {
            file.mkdirs();
        }
        if (!file.exists()) {
            throw new IOException("Directory could not be created :" + file.getAbsolutePath());
        }
        File file2 = new File(file, compatibleName2);
        if (!file2.exists()) {
            file2.mkdirs();
        }
        if (!file2.exists()) {
            throw new IOException("Directory could not be created :" + file2.getAbsolutePath());
        }
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(new File(file2, compatibleName + ".annic"))));
        objectOutputStream.writeObject(arrayList);
        if (objectOutputStream != null) {
            objectOutputStream.close();
        }
    }

    private ArrayList<Token>[] getTokens(gate.Document document, AnnotationSet annotationSet, ArrayList<String> arrayList, ArrayList<String> arrayList2, String str, AnnotationSet annotationSet2, String str2, AnnotationSet annotationSet3, Set<String> set) {
        int intValue;
        int intValue2;
        String substring;
        boolean z = false;
        boolean z2 = false;
        if (!arrayList.isEmpty()) {
            z2 = true;
        } else if (!arrayList2.isEmpty()) {
            z = true;
        }
        HashSet hashSet = new HashSet();
        if (str2 == null || str2.trim().length() == 0 || annotationSet3 == null || annotationSet3.size() == 0) {
            OffsetGroup offsetGroup = new OffsetGroup();
            offsetGroup.startOffset = new Long(0L);
            offsetGroup.endOffset = document.getContent().size();
            hashSet.add(offsetGroup);
        } else {
            for (Annotation annotation : annotationSet3) {
                OffsetGroup offsetGroup2 = new OffsetGroup();
                offsetGroup2.startOffset = annotation.getStartNode().getOffset();
                offsetGroup2.endOffset = annotation.getEndNode().getOffset();
                hashSet.add(offsetGroup2);
            }
        }
        HashSet hashSet2 = new HashSet();
        for (String str3 : annotationSet.getAllTypes()) {
            if (str3.indexOf(".") > -1 || str3.indexOf("=") > -1 || str3.indexOf(FileManager.PATH_DELIMITER) > -1 || str3.indexOf(StringArrayPropertyEditor.DEFAULT_SEPARATOR) > -1) {
                System.err.println("Annotations of type " + str3 + " cannot be indexed as the type name contains one of the ., =, or ; character");
            } else {
                hashSet2.add(str3);
            }
        }
        if (annotationSet2 != null && annotationSet2.size() > 0) {
            hashSet2.remove(str);
        }
        if (annotationSet3 != null && annotationSet3.size() > 0) {
            hashSet2.remove(str2);
        }
        AnnotationSetImpl annotationSetImpl = new AnnotationSetImpl(document);
        Iterator it = hashSet2.iterator();
        while (it.hasNext()) {
            for (Annotation annotation2 : annotationSet.get((String) it.next())) {
                try {
                    annotationSetImpl.add(annotation2.getStartNode().getOffset(), annotation2.getEndNode().getOffset(), annotation2.getType(), annotation2.getFeatures());
                } catch (InvalidOffsetException e) {
                    throw new GateRuntimeException(e);
                }
            }
        }
        ArrayList<Token>[] arrayListArr = new ArrayList[hashSet.size()];
        Iterator it2 = hashSet.iterator();
        int i = 0;
        while (it2.hasNext()) {
            OffsetGroup offsetGroup3 = (OffsetGroup) it2.next();
            ArrayList<Token> arrayList3 = new ArrayList<>();
            ArrayList arrayList4 = new ArrayList(annotationSetImpl.getContained(offsetGroup3.startOffset, offsetGroup3.endOffset));
            if (annotationSet2 != null && annotationSet2.size() != 0) {
                arrayList4.addAll(annotationSet2.getContained(offsetGroup3.startOffset, offsetGroup3.endOffset));
            }
            if (arrayList4 == null || arrayList4.size() == 0) {
                return null;
            }
            Collections.sort(arrayList4, new OffsetComparator());
            int i2 = -1;
            for (int i3 = 0; i3 < arrayList4.size(); i3++) {
                int i4 = 1;
                Annotation annotation3 = (Annotation) arrayList4.get(i3);
                String type = annotation3.getType();
                if ((!z || !arrayList2.contains(type)) && ((!z2 || arrayList.contains(type)) && (substring = document.getContent().toString().substring((intValue = annotation3.getStartNode().getOffset().intValue()), (intValue2 = annotation3.getEndNode().getOffset().intValue()))) != null)) {
                    Token token = new Token(type, intValue, intValue2, "*");
                    if (i3 > 0 && annotation3.getStartNode().getOffset().longValue() == ((Annotation) arrayList4.get(i3 - 1)).getStartNode().getOffset().longValue()) {
                        token.setPositionIncrement(0);
                        i4 = 0;
                    }
                    i2 += i4;
                    token.setPosition(i2);
                    arrayList3.add(token);
                    if (!type.equals(str) || annotation3.getFeatures().get("string") == null) {
                        Token token2 = new Token(substring, intValue, intValue2, type + ".string");
                        set.add(type + ".string");
                        token2.setPositionIncrement(0);
                        token2.setPosition(i2);
                        arrayList3.add(token2);
                    }
                    FeatureMap features = annotation3.getFeatures();
                    for (String str4 : features.keySet()) {
                        if (!z || !arrayList2.contains(type + "." + str4)) {
                            if (!z2 || arrayList.contains(type + "." + str4)) {
                                Object obj = features.get(str4);
                                if (obj != null) {
                                    Token token3 = new Token(obj.toString(), intValue, intValue2, type + "." + str4);
                                    set.add(type + "." + str4);
                                    token3.setPositionIncrement(0);
                                    token3.setPosition(i2);
                                    arrayList3.add(token3);
                                    Token token4 = new Token(type + "." + str4, intValue, intValue2, "**");
                                    token4.setPosition(i2);
                                    token4.setPositionIncrement(0);
                                    arrayList3.add(token4);
                                }
                            }
                        }
                    }
                }
            }
            arrayListArr[i] = arrayList3;
            i++;
        }
        return arrayListArr;
    }
}
