/*
 * Decompiled with CFR 0.152.
 */
package com.ontotext.trree.plugin.similarity.text;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.google.common.annotations.VisibleForTesting;
import com.ontotext.trree.plugin.similarity.FlagConfigUtils;
import com.ontotext.trree.plugin.similarity.SimilarityIndex;
import com.ontotext.trree.plugin.similarity.SimilarityIndexes;
import com.ontotext.trree.plugin.similarity.text.SimilarityTextIndexWriter;
import com.ontotext.trree.sdk.PluginException;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import pitt.search.semanticvectors.BuildIndex;
import pitt.search.semanticvectors.DocVectors;
import pitt.search.semanticvectors.ElementalVectorStore;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.Search;
import pitt.search.semanticvectors.SearchResult;
import pitt.search.semanticvectors.VectorStoreUtils;
import pitt.search.semanticvectors.lsh.LSHStoreFactory;

public class SimilarityTextIndex
extends SimilarityIndex<SimilarityTextIndexWriter> {
    private static final Logger LOGGER = LoggerFactory.getLogger(SimilarityTextIndex.class);
    public static final char CARET = '^';
    public static final char ESCAPE_CHAR = '\\';
    private static final String[] IMMUTABLE_ARGUMENTS = new String[]{"luceneindexpath", "elementalmethod", "docindexing"};
    public static final String TEXT_TYPE = "text";
    public static final String LITERAL_TYPE = "textLiteral";
    @JsonInclude(value=JsonInclude.Include.NON_NULL)
    private Class<? extends Analyzer> analyzer;
    private Analyzer analyzerInst;
    private List<String> stopList = StopAnalyzer.ENGLISH_STOP_WORDS_SET.stream().map(s -> new String((char[])s)).collect(Collectors.toList());

    public SimilarityTextIndex() {
    }

    public SimilarityTextIndex(String name, String initialConfigParams) {
        super(name, initialConfigParams, TEXT_TYPE);
    }

    @Override
    public void initialize(File parentDir, boolean overwrite) throws IOException {
        super.initialize(parentDir, overwrite);
        if (this.config.literal_index()) {
            this.type = LITERAL_TYPE;
        }
        if (this.getOldStopListFile().exists()) {
            String[] stopWords = new String[]{};
            try {
                stopWords = FileUtils.readFileToString((File)this.getOldStopListFile()).split(" ");
                this.setStopWords(Arrays.asList(stopWords));
                this.getOldStopListFile().delete();
                SimilarityIndexes.serialize(this);
            }
            catch (IOException e) {
                LOGGER.error("Could not init stopwords", (Throwable)e);
            }
        }
    }

    @Override
    protected void initializeWriter(File indexDir) throws IOException {
        if (this.writer != null) {
            try {
                ((SimilarityTextIndexWriter)this.writer).close();
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        this.createAnalyzerInst();
        this.writer = new SimilarityTextIndexWriter(indexDir, this.analyzerInst);
    }

    @Override
    public boolean buildVectorsIndex(FlagConfig config) throws IOException {
        if (!this.stopList.isEmpty()) {
            try {
                FlagConfigUtils.setFlagValue(config, "stopWordsList", this.stopList);
            }
            catch (IllegalAccessException | NoSuchFieldException e) {
                LOGGER.error("Could not set list of stop words");
                throw new PluginException("Could not set list of stop words");
            }
        }
        if (BuildIndex.buildIndex((FlagConfig)config, (AtomicBoolean)this.interruptIndexCreation)) {
            if (config.lsh_hashes_num() != config.lsh_max_bits_diff()) {
                String termStoreName = VectorStoreUtils.getStoreFileName((String)config.termvectorsfile(), (FlagConfig)config);
                String docStoreName = VectorStoreUtils.getStoreFileName((String)config.docvectorsfile(), (FlagConfig)config);
                LSHStoreFactory.INSTANCE.getStore(new File(termStoreName), config);
                LSHStoreFactory.INSTANCE.getStore(new File(docStoreName), config);
            }
            return true;
        }
        return false;
    }

    public List<String> getStopWords() {
        return this.stopList;
    }

    public void setStopWords(List<String> stopWords) {
        this.stopList = stopWords;
    }

    private File getOldStopListFile() {
        return new File(this.getHomeDir(), "stoplist.txt");
    }

    public Class<? extends Analyzer> getAnalyzer() {
        return this.analyzer;
    }

    public void setAnalyzer(String analyzerClass) {
        try {
            this.analyzer = Class.forName(analyzerClass);
        }
        catch (ClassNotFoundException e) {
            throw new PluginException("Could not find class: " + e.getMessage());
        }
    }

    private void createAnalyzerInst() {
        if (this.analyzerInst != null) {
            return;
        }
        CharArraySet charArraySet = new CharArraySet(this.stopList, false);
        try {
            if (this.analyzer != null) {
                try {
                    Constructor<? extends Analyzer> constructor = this.analyzer.getConstructor(CharArraySet.class);
                    this.analyzerInst = constructor.newInstance(charArraySet);
                }
                catch (NoSuchMethodException e) {
                    LOGGER.debug("Analyzer does not support stop words.");
                    this.analyzerInst = this.analyzer.getConstructor(new Class[0]).newInstance(new Object[0]);
                }
            } else {
                this.analyzerInst = new StandardAnalyzer(charArraySet);
            }
        }
        catch (Exception e) {
            LOGGER.error("Could not initialize analyzer properly. Will use StandardAnalyzer.", (Throwable)e);
            this.analyzerInst = new StandardAnalyzer(charArraySet);
        }
    }

    @Override
    public List<SearchResult> search(SimilarityIndex.SearchType searchType, String searchString, String configurableArguments) {
        if (!this.isIndexSearchable()) {
            LOGGER.error("The text index {} could not be searched, because its status is: {}", (Object)this.getName(), (Object)this.getStatus());
            return new LinkedList<SearchResult>();
        }
        FlagConfig searchConfiguration = FlagConfigUtils.fromString(configurableArguments);
        try {
            if (this.activeSearchesCounter.incrementAndGet() == 1) {
                this.specialStamp.getAndSet(this.activeSearchLock.readLock());
            }
            if (!configurableArguments.contains("lsh_hashes_num")) {
                FlagConfigUtils.setFlagValue(searchConfiguration, "lsh_hashes_num", this.config.lsh_hashes_num());
            }
            if (!configurableArguments.contains("lsh_max_bits_diff")) {
                FlagConfigUtils.setFlagValue(searchConfiguration, "lsh_max_bits_diff", this.config.lsh_max_bits_diff());
            }
            String luceneIndexPath = this.config.luceneindexpath();
            String queryVectorFile = searchType.fromTerm() ? this.config.termvectorsfile() : this.config.docvectorsfile();
            String searchVectorFile = searchType.toTerm() ? this.config.termvectorsfile() : this.config.docvectorsfile();
            FlagConfigUtils.setFlagValue(searchConfiguration, "luceneindexpath", luceneIndexPath);
            FlagConfigUtils.setFlagValue(searchConfiguration, "queryvectorfile", queryVectorFile);
            FlagConfigUtils.setFlagValue(searchConfiguration, "searchvectorfile", searchVectorFile);
            FlagConfigUtils.setFlagValue(searchConfiguration, "remainingArgs", searchString.split(" "));
            if (!searchType.fromTerm()) {
                FlagConfigUtils.setFlagValue(searchConfiguration, "termweight", LuceneUtils.TermWeight.NONE);
            }
            if (this.initialConfigParams.toLowerCase().contains("-indexfileformat text") && !configurableArguments.toLowerCase().contains("-indexfileformat text")) {
                LOGGER.info("Set indexfileformat ot text, found in create index parameters and not in search parameters.");
                FlagConfigUtils.setFlagValue(searchConfiguration, "indexfileformat", VectorStoreUtils.VectorStoreFormat.TEXT);
            }
            HashMap<String, Float> boostedTerms = new HashMap<String, Float>();
            this.preprocessSearchArguments(searchConfiguration, boostedTerms);
            List list = Search.runSearch((FlagConfig)searchConfiguration, boostedTerms);
            return list;
        }
        catch (IllegalAccessException | NoSuchFieldException e) {
            throw new PluginException("You should not see this. Error when setting search arguments", (Throwable)e);
        }
        finally {
            if (this.activeSearchesCounter.decrementAndGet() == 0) {
                this.activeSearchLock.unlockRead(this.specialStamp.getAndSet(0L));
            }
        }
    }

    protected void preprocessSearchArguments(FlagConfig flagConfig, Map<String, Float> boostedTerms) {
        this.createAnalyzerInst();
        boostedTerms.putAll(SimilarityTextIndex.stripSearchTermsFromBoostingSymbol(flagConfig, this));
        flagConfig.remainingArgs = this.getTokenizedSearchArguments(flagConfig);
    }

    protected String[] getTokenizedSearchArguments(FlagConfig flagConfig) {
        String original = String.join((CharSequence)" ", flagConfig.remainingArgs);
        List<String> args = this.getTokenizedString(original);
        String[] result = new String[args.size()];
        for (int i = 0; i < args.size(); ++i) {
            result[i] = args.get(i);
        }
        return result;
    }

    public static Map<String, Float> stripSearchTermsFromBoostingSymbol(FlagConfig flagConfig, SimilarityTextIndex idx) {
        HashMap<String, Float> boostedTerms = new HashMap<String, Float>();
        block2: for (int i = 0; i < flagConfig.remainingArgs.length; ++i) {
            Object term = flagConfig.remainingArgs[i];
            int caretPosition = -1;
            while ((caretPosition = ((String)term).indexOf(94, caretPosition + 1)) != -1) {
                Object actualTerm;
                if (caretPosition == 0) {
                    throw new PluginException("Term can not start with unescaped caret: " + (String)term);
                }
                if (((String)term).charAt(caretPosition - 1) != '\\') {
                    Float weight;
                    actualTerm = ((String)term).substring(0, caretPosition);
                    try {
                        weight = Float.valueOf(((String)term).substring(caretPosition + 1));
                    }
                    catch (NumberFormatException e) {
                        throw new PluginException("There is either an unescaped caret or the weight for term is not a valid number: " + (String)term + ". Either escape the caret using '\\' or set the weight to be e valid number.");
                    }
                    if (weight.floatValue() < 0.0f) {
                        throw new PluginException("Weight can not be negative: " + (String)term);
                    }
                    Float currentValue = (Float)boostedTerms.get(actualTerm);
                    if (currentValue == null) {
                        for (String token : idx.getTokenizedString((String)actualTerm)) {
                            boostedTerms.put(token, weight);
                        }
                    } else if (currentValue != weight) {
                        LOGGER.warn("Term {} is set with multiple weights. Using the first weight occurrence: {}", term, (Object)currentValue);
                    }
                    flagConfig.remainingArgs[i] = actualTerm;
                    continue block2;
                }
                term = flagConfig.remainingArgs[i] = (actualTerm = ((String)term).substring(0, caretPosition - 1) + ((String)term).substring(caretPosition));
                --caretPosition;
            }
        }
        return boostedTerms;
    }

    protected List<String> getTokenizedString(String original) {
        LinkedList<String> args = new LinkedList<String>();
        try (TokenStream tokenStream = this.analyzerInst.tokenStream("content", original);){
            CharTermAttribute charTermAttribute = (CharTermAttribute)tokenStream.addAttribute(CharTermAttribute.class);
            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                args.add(charTermAttribute.toString());
            }
            tokenStream.end();
        }
        catch (IOException e) {
            throw new PluginException("Could not tokenize search arguments. " + e.getMessage());
        }
        return args;
    }

    @Override
    protected void setConfig(String configurableArguments) {
        if (StringUtils.isNotEmpty((CharSequence)configurableArguments)) {
            for (String immutable : IMMUTABLE_ARGUMENTS) {
                if (!configurableArguments.contains("-" + immutable)) continue;
                throw new PluginException(String.format("Parameter \"%s\" is not configurable!", immutable));
            }
        }
        if (this.getStatus() == SimilarityIndex.Status.REBUILDING) {
            this.rebuildFlagConfig = FlagConfigUtils.fromString(configurableArguments);
        } else {
            this.config = FlagConfigUtils.fromString(configurableArguments);
        }
        try {
            FlagConfigUtils.setFlagValue(this.getCurrentFlagConfig(), "luceneindexpath", this.relationalizeFile(this.getName()));
            FlagConfigUtils.setFlagValue(this.getCurrentFlagConfig(), "elementalmethod", ElementalVectorStore.ElementalGenerationMethod.CONTENTHASH);
            FlagConfigUtils.setFlagValue(this.getCurrentFlagConfig(), "docindexing", DocVectors.DocIndexingStrategy.INCREMENTAL);
        }
        catch (IOException | IllegalAccessException | NoSuchFieldException e) {
            throw new PluginException("Could not set index config", (Throwable)e);
        }
        this.relationalizeConfigFilePaths();
    }

    public void indexData(long key, String value) throws IOException {
        if (this.writer == null) {
            this.prepareForBuild();
        }
        try {
            ((SimilarityTextIndexWriter)this.writer).write(key, value);
            this.transactionFingerprint ^= key;
            ++this.docCount;
            if (this.docCount % 10000 == 0) {
                LOGGER.info("{} documents indexed in lucene", (Object)this.docCount);
            }
        }
        catch (IOException e) {
            LOGGER.error(e.getMessage(), (Throwable)e);
        }
    }

    @VisibleForTesting
    public Analyzer analyserInstanceForTest() {
        return this.analyzerInst;
    }
}

