/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.NoSuchFileException;
import java.util.Enumeration;
import java.util.Random;
import java.util.logging.Logger;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.ObjectVector;
import pitt.search.semanticvectors.VectorStore;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreUtils;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.utils.FileUtil;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

public class IncrementalTermVectors
implements VectorStore {
    public static String usageMessage = "\nIncrementalTermVectors class in package pitt.search.semanticvectors\nUsage: java pitt.search.semanticvectors.IncrementalTermVectors [-docvectorsfile ...] [-luceneindexpath ...]\nIncrementalTermVectors creates termvectors files in local directory from docvectors file.";
    private static final Logger logger = Logger.getLogger(IncrementalTermVectors.class.getCanonicalName());
    private FlagConfig flagConfig;
    private VectorStoreRAM termVectorData;
    private LuceneUtils luceneUtils = null;

    public IncrementalTermVectors(FlagConfig flagConfig, LuceneUtils luceneUtils) throws IOException {
        this.flagConfig = flagConfig;
        this.luceneUtils = luceneUtils;
        this.createIncrementalTermVectorsFromLucene();
    }

    private void initializeVectorStore() throws IOException {
        this.termVectorData = new VectorStoreRAM(this.flagConfig);
        for (String fieldName : this.flagConfig.contentsfields()) {
            BytesRef bytes;
            Terms terms = this.luceneUtils.getTermsForField(fieldName);
            TermsEnum termEnum = terms.iterator();
            int tc = 0;
            while ((bytes = termEnum.next()) != null) {
                Term term = new Term(fieldName, bytes);
                if (this.termVectorData.getVector(term.text()) != null || !this.luceneUtils.termFilter(term)) continue;
                ++tc;
                Vector termVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                this.termVectorData.putVector(term.text(), termVector);
            }
            VerbatimLogger.info(String.format("There are %d terms (and %d docs)", tc, this.luceneUtils.getNumDocs()));
        }
    }

    private void createIncrementalTermVectorsFromLucene() throws IOException {
        IndexInput docVectorsInputStream;
        int numdocs = this.luceneUtils.getNumDocs();
        File vectorFile = new File(this.flagConfig.docvectorsfile());
        String parentPath = vectorFile.getParent();
        if (parentPath == null) {
            parentPath = "";
        }
        FSDirectory fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(parentPath, new String[0]));
        Random random = new Random();
        try {
            docVectorsInputStream = fsDirectory.openInput(VectorStoreUtils.getStoreFileName(this.flagConfig.docvectorsfile(), this.flagConfig), IOContext.DEFAULT);
            logger.info("Reading vectors incrementally from file " + vectorFile);
            String header = docVectorsInputStream.readString();
            FlagConfig.mergeWriteableFlagsFromString(header, this.flagConfig);
        }
        catch (NoSuchFileException e) {
            logger.info("No file '" + vectorFile + "' so will use random elemental vectors instead.");
            docVectorsInputStream = null;
        }
        this.initializeVectorStore();
        for (int dc = 0; dc < numdocs; ++dc) {
            if (dc % 10000 == 0 || dc < 10000 && dc % 1000 == 0) {
                VerbatimLogger.info(dc + " ... ");
                FileUtil.checkDiskSpace(vectorFile.getParentFile());
            }
            Vector docVector = null;
            if (docVectorsInputStream != null && docVectorsInputStream.getFilePointer() < docVectorsInputStream.length() - 1L) {
                docVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                docVectorsInputStream.readString();
                docVector.readFromLuceneStream(docVectorsInputStream);
            } else {
                docVector = VectorFactory.generateRandomVector(this.flagConfig.vectortype(), this.flagConfig.dimension(), this.flagConfig.seedlength(), random);
            }
            for (String fieldName : this.flagConfig.contentsfields()) {
                BytesRef bytes;
                Terms docTerms = this.luceneUtils.getTermVector(dc, fieldName);
                if (docTerms == null) {
                    logger.severe("No term vector for document " + dc);
                    continue;
                }
                TermsEnum termsEnum = docTerms.iterator();
                while ((bytes = termsEnum.next()) != null) {
                    Vector termVector = null;
                    try {
                        termVector = this.termVectorData.getVector(bytes.utf8ToString());
                    }
                    catch (NullPointerException npe) {
                        logger.finest(String.format("term %s not represented", bytes.utf8ToString()));
                    }
                    if (termVector == null || termVector.getDimension() <= 0) continue;
                    PostingsEnum docs = termsEnum.postings(null);
                    docs.nextDoc();
                    float freq = this.luceneUtils.getLocalTermWeight(docs.freq());
                    termVector.superpose(docVector, freq, null);
                }
            }
        }
        Enumeration<ObjectVector> allVectors = this.termVectorData.getAllVectors();
        while (allVectors.hasMoreElements()) {
            ObjectVector obVec = allVectors.nextElement();
            Vector termVector = obVec.getVector();
            termVector.normalize();
            obVec.setVector(termVector);
        }
        if (docVectorsInputStream != null) {
            docVectorsInputStream.close();
        }
        fsDirectory.close();
    }

    @Override
    public Vector getVector(Object term) {
        return this.termVectorData.getVector(term);
    }

    @Override
    public Enumeration<ObjectVector> getAllVectors() {
        return this.termVectorData.getAllVectors();
    }

    @Override
    public int getNumVectors() {
        return this.termVectorData.getNumVectors();
    }

    public static void main(String[] args) throws IOException {
        FlagConfig flagConfig;
        try {
            flagConfig = FlagConfig.getFlagConfig(args);
        }
        catch (IllegalArgumentException e) {
            System.err.println(usageMessage);
            throw e;
        }
        IncrementalTermVectors termVectors = new IncrementalTermVectors(flagConfig, new LuceneUtils(flagConfig));
        VectorStoreWriter.writeVectors(flagConfig.termvectorsfile(), flagConfig, termVectors);
    }

    @Override
    public boolean containsVector(Object object) {
        return this.getVector(object) != null;
    }
}

