package org.exoplatform.services.document.impl;

import java.io.InputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.exoplatform.container.xml.InitParams;
import org.htmlparser.Parser;
import org.htmlparser.beans.StringBean;

/* loaded from: input_file:org/exoplatform/services/document/impl/HTMLDocumentReader.class */
public class HTMLDocumentReader extends BaseDocumentReader {
    public HTMLDocumentReader(InitParams initParams) {
    }

    public String getMimeTypes() {
        return "text/html";
    }

    public String getContentAsText(InputStream inputStream) throws Exception {
        String str = "";
        try {
            byte[] bArr = new byte[inputStream.available()];
            inputStream.read(bArr);
            String str2 = new String(bArr);
            StringBean stringBean = new StringBean();
            Parser createParser = Parser.createParser(str2, (String) null);
            createParser.visitAllNodesWith(stringBean);
            stringBean.getStrings();
            stringBean.setLinks(true);
            createParser.reset();
            createParser.visitAllNodesWith(stringBean);
            str = delete(new String(stringBean.getStrings().getBytes()));
        } catch (Exception e) {
        }
        return str;
    }

    public String delete(String str) {
        try {
            StringBuffer stringBuffer = new StringBuffer(str);
            Pattern compile = Pattern.compile("<+[^>]*>+", 34);
            for (Matcher matcher = compile.matcher(stringBuffer); matcher.find(); matcher = compile.matcher(stringBuffer)) {
                stringBuffer.delete(matcher.start(), matcher.end());
            }
            return stringBuffer.substring(0, stringBuffer.length());
        } catch (PatternSyntaxException e) {
            return "";
        }
    }
}
