/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors;

import gnu.trove.TCollections;
import gnu.trove.impl.sync.TSynchronizedObjectFloatMap;
import gnu.trove.impl.sync.TSynchronizedObjectIntMap;
import gnu.trove.map.hash.TObjectFloatHashMap;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.ArrayList;
import java.util.List;
import java.util.TreeSet;
import java.util.logging.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.BaseCompositeReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import pitt.search.semanticvectors.DocVectors;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.utils.StringUtils;
import pitt.search.semanticvectors.utils.VerbatimLogger;

public class LuceneUtils {
    public static final Version LUCENE_VERSION = Version.LUCENE_6_6_0;
    private static final Logger logger = Logger.getLogger(DocVectors.class.getCanonicalName());
    private FlagConfig flagConfig;
    private BaseCompositeReader<LeafReader> compositeReader;
    private LeafReader leafReader;
    private FSDirectory fsDirectory;
    private TSynchronizedObjectFloatMap<String> termEntropy;
    private TSynchronizedObjectFloatMap<String> termIDF;
    private TSynchronizedObjectIntMap<String> termFreq;
    private boolean totalTermCountCaching = true;
    private TreeSet<String> stopwords = null;
    private TreeSet<String> startwords = null;
    public static final double MAPS_INITIAL_CAPACITY_COEFFICIENT = 0.75;

    public LuceneUtils(FlagConfig flagConfig) throws IOException {
        if (flagConfig.luceneindexpath().isEmpty()) {
            throw new IllegalArgumentException("-luceneindexpath is a required argument for initializing LuceneUtils instance.");
        }
        this.fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath(), new String[0]));
        this.compositeReader = DirectoryReader.open(this.fsDirectory);
        int MAPS_INITIAL_CAPACITY = (int)(0.75 * (double)this.getNumDocs());
        this.termEntropy = (TSynchronizedObjectFloatMap)TCollections.synchronizedMap(new TObjectFloatHashMap(MAPS_INITIAL_CAPACITY));
        this.termIDF = (TSynchronizedObjectFloatMap)TCollections.synchronizedMap(new TObjectFloatHashMap(MAPS_INITIAL_CAPACITY));
        this.termFreq = (TSynchronizedObjectIntMap)TCollections.synchronizedMap(new TObjectIntHashMap(MAPS_INITIAL_CAPACITY));
        this.leafReader = SlowCompositeReaderWrapper.wrap(this.compositeReader);
        MultiFields.getFields(this.compositeReader);
        this.flagConfig = flagConfig;
        if (!flagConfig.stopWordsList().isEmpty()) {
            this.loadStopWords(flagConfig.stopWordsList());
        } else if (!flagConfig.stoplistfile().isEmpty()) {
            this.loadStopWords(flagConfig.stoplistfile());
        }
        if (!flagConfig.startlistfile().isEmpty()) {
            this.loadStartWords(flagConfig.startlistfile());
        }
        VerbatimLogger.info("Initialized LuceneUtils from Lucene index in directory: " + flagConfig.luceneindexpath() + "\n");
        VerbatimLogger.info("Fields in index are: " + String.join((CharSequence)", ", this.getFieldNames()) + "\n");
    }

    public void loadStopWords(String stoppath) throws IOException {
        logger.info("Using stopword file: " + stoppath);
        this.stopwords = new TreeSet();
        try {
            BufferedReader readIn = new BufferedReader(new FileReader(stoppath));
            String in = readIn.readLine();
            while (in != null) {
                this.stopwords.add(in);
                in = readIn.readLine();
            }
            readIn.close();
        }
        catch (IOException e) {
            throw new IOException("Couldn't open file " + stoppath);
        }
    }

    public void loadStopWords(List<String> stopWordsList) {
        this.stopwords = new TreeSet();
        this.stopwords.addAll(stopWordsList);
    }

    public void loadStartWords(String startpath) throws IOException {
        this.startwords = new TreeSet();
        try {
            BufferedReader readIn = new BufferedReader(new FileReader(startpath));
            String in = readIn.readLine();
            while (in != null) {
                this.startwords.add(in);
                in = readIn.readLine();
            }
            VerbatimLogger.info(String.format("Loading startword file: '%s'. Only these %d words will be indexed.\n", startpath, this.startwords.size()));
            readIn.close();
        }
        catch (IOException e) {
            throw new IOException("Couldn't open file " + startpath);
        }
    }

    public boolean stoplistContains(String x) {
        if (this.stopwords == null) {
            return false;
        }
        return this.stopwords.contains(x);
    }

    public boolean startlistContains(String x) {
        if (this.startwords == null) {
            return true;
        }
        return this.startwords.contains(x);
    }

    public Document getDoc(int docID) throws IOException {
        return this.leafReader.document(docID);
    }

    public String getExternalDocId(int docID) throws IOException {
        String externalDocId;
        if (this.flagConfig.docidfield().equals("luceneID")) {
            return docID + "";
        }
        try {
            externalDocId = this.getDoc(docID).getField(this.flagConfig.docidfield()).stringValue();
        }
        catch (IOException | NullPointerException e) {
            logger.severe(String.format("Failed to get external doc ID from doc no. %d in Lucene index.\nThis is almost certain to lead to problems.\nCheck that -docidfield was set correctly and exists in the Lucene index", docID));
            throw e;
        }
        return externalDocId;
    }

    public Terms getTermsForField(String field) throws IOException {
        Terms terms = this.leafReader.terms(field);
        if (terms == null) {
            throw new NullPointerException(String.format("No terms for field: '%s'.\nKnown fields are: '%s'.", field, StringUtils.join(this.getFieldNames())));
        }
        return this.leafReader.terms(field);
    }

    public PostingsEnum getDocsForTerm(Term term) throws IOException {
        return this.leafReader.postings(term);
    }

    public Terms getTermVector(int docID, String field) throws IOException {
        return this.leafReader.getTermVector(docID, field);
    }

    public FieldInfos getFieldInfos() {
        return this.leafReader.getFieldInfos();
    }

    public List<String> getFieldNames() {
        ArrayList<String> fieldNames = new ArrayList<String>();
        for (FieldInfo fieldName : this.leafReader.getFieldInfos()) {
            fieldNames.add(fieldName.name);
        }
        return fieldNames;
    }

    public int getGlobalDocFreq(Term term) {
        try {
            return this.compositeReader.docFreq(term);
        }
        catch (IOException e) {
            logger.info("Couldn't get term frequency for term " + term.text());
            return 1;
        }
    }

    public int getGlobalTermFreq(Term term) {
        int tf = 0;
        String generatedKey = term.field() + "_" + term.text();
        if (this.totalTermCountCaching && this.termFreq.containsKey(generatedKey)) {
            return this.termFreq.get(generatedKey);
        }
        try {
            tf = (int)this.compositeReader.totalTermFreq(term);
            this.termFreq.put(generatedKey, tf);
        }
        catch (IOException e) {
            logger.info("Couldn't get term frequency for term " + term.text());
            return 1;
        }
        if (tf == -1) {
            logger.warning("Lucene StandardDirectoryReader returned -1 for term: '" + term.text() + "' in field: '" + term.field() + "'. Changing to 0.\nThis may be due to a version-mismatch and might be solved by rebuilding your Lucene index.");
            tf = 0;
        }
        return tf;
    }

    public float getGlobalTermWeightFromString(String termString) {
        float freq = 0.0f;
        for (String field : this.flagConfig.contentsfields()) {
            freq += this.getGlobalTermWeight(new Term(field, termString));
        }
        return freq;
    }

    public float getGlobalTermWeight(Term term) {
        switch (this.flagConfig.termweight()) {
            case NONE: {
                return 1.0f;
            }
            case SQRT: {
                return (float)Math.sqrt(this.getGlobalTermFreq(term));
            }
            case IDF: {
                return this.getIDF(term);
            }
            case LOGENTROPY: {
                return this.getEntropy(term);
            }
            case FREQ: {
                return this.getGlobalTermFreq(term);
            }
            case LOGFREQ: {
                return (float)Math.log(this.getGlobalTermFreq(term));
            }
        }
        VerbatimLogger.severe("Unrecognized termweight option: " + (Object)((Object)this.flagConfig.termweight()) + ". Returning 1.\n");
        return 1.0f;
    }

    public float getLocalTermWeight(int docfreq) {
        switch (this.flagConfig.termweight()) {
            case NONE: {
                return 1.0f;
            }
            case IDF: {
                return docfreq;
            }
            case LOGENTROPY: {
                return (float)Math.log10(1 + docfreq);
            }
            case SQRT: {
                return (float)Math.sqrt(docfreq);
            }
        }
        VerbatimLogger.severe("Unrecognized termweight option: " + (Object)((Object)this.flagConfig.termweight()) + ". Returning 1.");
        return 1.0f;
    }

    public int getNumDocs() {
        return this.compositeReader.numDocs();
    }

    private float getIDF(Term term) {
        String generatedKey = term.field() + "_" + term.text();
        if (this.termIDF.containsKey(generatedKey)) {
            return this.termIDF.get(generatedKey);
        }
        try {
            int freq = this.compositeReader.docFreq(term);
            if (freq == 0) {
                return 0.0f;
            }
            float idf = (float)Math.log10((float)this.compositeReader.numDocs() / (float)freq);
            this.termIDF.put(generatedKey, idf);
            return idf;
        }
        catch (IOException e) {
            e.printStackTrace();
            return 1.0f;
        }
    }

    private float getEntropy(Term term) {
        String generatedKey = term.field() + "_" + term.text();
        if (this.termEntropy.containsKey(generatedKey)) {
            return this.termEntropy.get(generatedKey);
        }
        int gf = this.getGlobalTermFreq(term);
        double entropy = 0.0;
        try {
            PostingsEnum docsEnum = this.getDocsForTerm(term);
            while (docsEnum.nextDoc() != Integer.MAX_VALUE) {
                double p = docsEnum.freq();
                entropy += (p /= (double)gf) * (Math.log(p) / Math.log(2.0));
            }
            int n = this.getNumDocs();
            double log2n = Math.log(n) / Math.log(2.0);
            entropy /= log2n;
        }
        catch (IOException e) {
            logger.info("Couldn't get term entropy for term " + term.text());
        }
        this.termEntropy.put(generatedKey, 1.0f + (float)entropy);
        return (float)(1.0 + entropy);
    }

    public boolean termFilter(Term term) {
        return this.termFilter(term, this.flagConfig.contentsfields(), this.flagConfig.minfrequency(), this.flagConfig.maxfrequency(), this.flagConfig.maxnonalphabetchars(), this.flagConfig.filteroutnumbers(), this.flagConfig.mintermlength());
    }

    protected boolean termFilter(Term term, String[] desiredFields, int minFreq, int maxFreq, int maxNonAlphabet, int minTermLength) {
        int termfreq;
        boolean isDesiredField = false;
        for (String desiredField : desiredFields) {
            if (term.field().compareToIgnoreCase(desiredField) != 0) continue;
            isDesiredField = true;
            break;
        }
        if (this.stoplistContains(term.text())) {
            return false;
        }
        if (!this.startlistContains(term.text())) {
            return false;
        }
        if (!isDesiredField) {
            return false;
        }
        if (maxNonAlphabet != -1) {
            int nonLetter = 0;
            String termText = term.text();
            if (termText.length() < minTermLength) {
                return false;
            }
            for (int i = 0; i < termText.length(); ++i) {
                if (!Character.isLetter(termText.charAt(i))) {
                    ++nonLetter;
                }
                if (nonLetter <= maxNonAlphabet) continue;
                return false;
            }
        }
        return !((termfreq = this.getGlobalTermFreq(term)) < minFreq | termfreq > maxFreq);
    }

    private boolean termFilter(Term term, String[] desiredFields, int minFreq, int maxFreq, int maxNonAlphabet, boolean filterNumbers, int minTermLength) {
        if (filterNumbers) {
            try {
                Double.parseDouble(term.text());
                return false;
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        return this.termFilter(term, desiredFields, minFreq, maxFreq, maxNonAlphabet, minTermLength);
    }

    public void closeLuceneDir() {
        try {
            this.leafReader.close();
            this.compositeReader.close();
            this.fsDirectory.close();
        }
        catch (IOException e) {
            logger.severe("Could not close lucene index dir");
        }
    }

    public static enum TermWeight {
        NONE,
        IDF,
        LOGENTROPY,
        FREQ,
        SQRT,
        LOGFREQ;

    }
}

