/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Logger;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.eclipse.rdf4j.query.QueryInterruptedException;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.VectorStore;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreUtils;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.utils.FileUtil;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

public class IncrementalDocVectors {
    private static final Logger logger = Logger.getLogger(IncrementalDocVectors.class.getCanonicalName());
    private FlagConfig flagConfig;
    private VectorStore termVectorData;
    private LuceneUtils luceneUtils;

    private IncrementalDocVectors() {
    }

    public static void createIncrementalDocVectors(VectorStore termVectorData, FlagConfig flagConfig, LuceneUtils luceneUtils) throws IOException {
        IncrementalDocVectors.createIncrementalDocVectors(termVectorData, flagConfig, luceneUtils, new AtomicBoolean());
    }

    public static void createIncrementalDocVectors(VectorStore termVectorData, FlagConfig flagConfig, LuceneUtils luceneUtils, AtomicBoolean isCreationInterruptedByUser) throws IOException {
        IncrementalDocVectors incrementalDocVectors = new IncrementalDocVectors();
        incrementalDocVectors.flagConfig = flagConfig;
        incrementalDocVectors.termVectorData = termVectorData;
        incrementalDocVectors.luceneUtils = luceneUtils;
        incrementalDocVectors.trainIncrementalDocVectors(isCreationInterruptedByUser);
    }

    private void trainIncrementalDocVectors(AtomicBoolean isCreationInterruptedByUser) throws IOException {
        int numdocs = this.luceneUtils.getNumDocs();
        File vectorFile = new File(VectorStoreUtils.getStoreFileName(this.flagConfig.docvectorsfile(), this.flagConfig));
        String parentPath = vectorFile.getParent();
        if (parentPath == null) {
            parentPath = "";
        }
        FSDirectory fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(parentPath, new String[0]));
        Files.deleteIfExists(vectorFile.toPath());
        IndexOutput outputStream = fsDirectory.createOutput(vectorFile.getName(), IOContext.DEFAULT);
        VerbatimLogger.info("Writing vectors incrementally to file " + vectorFile + " ... ");
        outputStream.writeString(VectorStoreWriter.generateHeaderString(this.flagConfig));
        for (int dc = 0; dc < numdocs; ++dc) {
            if (isCreationInterruptedByUser.get()) {
                throw new QueryInterruptedException("Transaction was aborted by the user");
            }
            if (dc > 0 && (dc % 10000 == 0 || dc < 10000 && dc % 1000 == 0)) {
                VerbatimLogger.info("Processed " + dc + " documents ... ");
                FileUtil.checkDiskSpace(vectorFile.getParentFile());
            }
            String docID = this.luceneUtils.getExternalDocId(dc);
            Vector docVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
            for (String fieldName : this.flagConfig.contentsfields()) {
                BytesRef bytes;
                Terms terms = this.luceneUtils.getTermVector(dc, fieldName);
                if (terms == null) {
                    VerbatimLogger.fine(String.format("When building document vectors, no term vector for field: '%s' in document '%s'.", fieldName, docID));
                    continue;
                }
                TermsEnum termsEnum = terms.iterator();
                while ((bytes = termsEnum.next()) != null) {
                    Term term = new Term(fieldName, bytes);
                    String termString = term.text();
                    PostingsEnum docs = termsEnum.postings(null);
                    docs.nextDoc();
                    int freq = docs.freq();
                    try {
                        Vector termVector = this.termVectorData.getVector(termString);
                        if (termVector == null || termVector.getDimension() <= 0) continue;
                        float localweight = this.luceneUtils.getLocalTermWeight(freq);
                        float globalweight = this.luceneUtils.getGlobalTermWeight(new Term(fieldName, termString));
                        float fieldweight = 1.0f;
                        if (this.flagConfig.fieldweight()) {
                            fieldweight = (float)(1.0 / Math.sqrt(terms.size()));
                        }
                        docVector.superpose(termVector, localweight * globalweight * fieldweight, null);
                    }
                    catch (NullPointerException npe) {
                        logger.finest("term " + termString + " not represented");
                    }
                }
            }
            if (docVector.isZeroVector()) {
                logger.warning(String.format("Outputting zero vector for document '%s'. This probably means that none of the -contentsfields were populated, or all terms failed the LuceneUtils termsfilter. You may want to investigate.", docID));
            }
            docVector.normalize();
            outputStream.writeString(docID);
            docVector.writeToLuceneStream(outputStream);
        }
        VerbatimLogger.info("Finished writing vectors.\n");
        outputStream.close();
        fsDirectory.close();
    }

    public static void main(String[] args) throws Exception {
        FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
        args = flagConfig.remainingArgs;
        if (args.length != 2) {
            throw new IllegalArgumentException("After parsing command line flags, there were " + args.length + " arguments, instead of the expected 2.");
        }
        VectorStoreRAM vsr = new VectorStoreRAM(flagConfig);
        vsr.initFromFile(args[0]);
        logger.info("Minimum frequency = " + flagConfig.minfrequency());
        logger.info("Maximum frequency = " + flagConfig.maxfrequency());
        logger.info("Number non-alphabet characters = " + flagConfig.maxnonalphabetchars());
        logger.info("Contents fields are: " + Arrays.toString(flagConfig.contentsfields()));
        IncrementalDocVectors.createIncrementalDocVectors(vsr, flagConfig, new LuceneUtils(flagConfig));
    }
}

