/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors.orthography;

import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.StringTokenizer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.ObjectVector;
import pitt.search.semanticvectors.VectorStore;
import pitt.search.semanticvectors.VectorStoreDeterministic;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.orthography.NumberRepresentation;
import pitt.search.semanticvectors.utils.Bobcat;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

public class SentenceVectors {
    static LuceneUtils lUtils;

    public static Vector getPhraseVector(String theSentence, VectorStoreRAM theNumbers, VectorStore theWords, VectorStoreRAM semanticWords, Random random, FlagConfig flagConfig) {
        Vector theVector = VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension());
        StringTokenizer theTokenizer = new StringTokenizer(theSentence, " ");
        int allTokens = theTokenizer.countTokens();
        random.setSeed(Bobcat.asLong(theSentence));
        Vector elementalDocVector = VectorFactory.generateRandomVector(flagConfig.vectortype(), flagConfig.dimension(), flagConfig.seedlength, random);
        for (int q = 0; q < allTokens; ++q) {
            Vector docVector = elementalDocVector.copy();
            String word = theTokenizer.nextToken();
            float theweight = 1.0f;
            if (lUtils != null) {
                theweight = lUtils.getGlobalTermWeight(new Term(flagConfig.contentsfields()[0], word));
            }
            if (!theWords.containsVector(word)) continue;
            Vector incoming = theWords.getVector(word).copy();
            if (!semanticWords.containsVector(word)) {
                semanticWords.putVector(word, VectorFactory.createZeroVector(flagConfig.vectortype(), flagConfig.dimension()));
            }
            Vector outgoing = semanticWords.getVector(word);
            Vector posVector = theNumbers.getVector(q + 1);
            if (posVector == null) {
                System.out.println(allTokens + ":" + (q + 1));
                System.out.println(posVector);
                System.exit(0);
            }
            try {
                incoming.bind(posVector);
                docVector.bind(posVector);
            }
            catch (Exception e) {
                System.out.println(incoming);
                System.out.println(posVector);
                e.printStackTrace();
                System.exit(0);
            }
            theVector.superpose(incoming, theweight, null);
            outgoing.superpose(docVector, 1.0, null);
        }
        theVector.normalize();
        return theVector;
    }

    public static void main(String[] args) throws Exception {
        FlagConfig flagConfig = null;
        flagConfig = FlagConfig.getFlagConfig(args);
        args = flagConfig.remainingArgs;
        lUtils = null;
        if (!flagConfig.luceneindexpath().isEmpty()) {
            lUtils = new LuceneUtils(flagConfig);
        }
        DirectoryReader indexReader = DirectoryReader.open(FSDirectory.open(FileSystems.getDefault().getPath(flagConfig.luceneindexpath(), new String[0])));
        int numdocs = ((IndexReader)indexReader).numDocs();
        VectorStoreRAM sentenceVectors = new VectorStoreRAM(flagConfig);
        VectorStoreRAM theNumbers = new VectorStoreRAM(flagConfig);
        VectorStore theWords = new VectorStoreRAM(flagConfig);
        VectorStoreRAM semanticWords = new VectorStoreRAM(flagConfig);
        if (!flagConfig.initialtermvectors().equals("random")) {
            theWords.initFromFile(flagConfig.initialtermvectors());
        } else {
            theWords = new VectorStoreDeterministic(flagConfig);
        }
        VectorStoreRAM OOV = new VectorStoreRAM(flagConfig);
        Hashtable<Integer, VectorStoreRAM> allNumbers = new Hashtable<Integer, VectorStoreRAM>();
        NumberRepresentation NR = new NumberRepresentation(flagConfig, "*STARTSENTENCE*", "*ENDSENTENCE*");
        theNumbers = NR.getNumberVectors(0, 11);
        allNumbers.put(new Integer(10), theNumbers);
        System.err.println("Numdocs " + numdocs);
        for (int x = 0; x < numdocs; ++x) {
            if (x % 10000 == 0) {
                System.err.print(x + "...");
            }
            Terms terms = lUtils.getTermVector(x, flagConfig.contentsfields()[0]);
            ArrayList<String> localTerms = new ArrayList<String>();
            ArrayList<Integer> freqs = new ArrayList<Integer>();
            Hashtable<Integer, Integer> localTermPositions = new Hashtable<Integer, Integer>();
            TermsEnum termsEnum = null;
            try {
                termsEnum = terms.iterator();
            }
            catch (IOException e1) {
                e1.printStackTrace();
            }
            int termcount = 0;
            try {
                BytesRef text;
                while ((text = termsEnum.next()) != null) {
                    String theTerm = text.utf8ToString();
                    PostingsEnum docsAndPositions = termsEnum.postings(null);
                    if (docsAndPositions == null) continue;
                    docsAndPositions.nextDoc();
                    freqs.add(docsAndPositions.freq());
                    localTerms.add(theTerm);
                    for (int y = 0; y < docsAndPositions.freq(); ++y) {
                        localTermPositions.put(new Integer(docsAndPositions.nextPosition()), termcount);
                    }
                    ++termcount;
                }
            }
            catch (IOException e1) {
                e1.printStackTrace();
            }
            int allTokens = localTermPositions.size();
            String theSentence = "";
            for (int q = 0; q < allTokens; ++q) {
                if (localTermPositions.get(q) == null || localTerms.get((Integer)localTermPositions.get(q)) == null) continue;
                theSentence = theSentence + (String)localTerms.get((Integer)localTermPositions.get(q)) + " ";
            }
            StringTokenizer theTokenizer = new StringTokenizer(theSentence, " ");
            int numTokens = theTokenizer.countTokens();
            if (numTokens < 2) continue;
            theNumbers = (VectorStoreRAM)allNumbers.get(new Integer(numTokens));
            if (theNumbers == null) {
                theNumbers = NR.getNumberVectors(0, numTokens + 1);
                allNumbers.put(new Integer(numTokens), theNumbers);
                Enumeration<ObjectVector> newNumbers = theNumbers.getAllVectors();
                while (newNumbers.hasMoreElements()) {
                    ObjectVector nextObjectVector = newNumbers.nextElement();
                    if (OOV.getVector(numTokens + ":" + nextObjectVector.getObject()) != null) continue;
                    OOV.putVector(numTokens + ":" + nextObjectVector.getObject(), nextObjectVector.getVector());
                }
            }
            Random random = new Random();
            Vector sentenceVector = SentenceVectors.getPhraseVector(theSentence, theNumbers, theWords, semanticWords, random, flagConfig);
            sentenceVectors.putVector(theSentence, sentenceVector);
        }
        VectorStoreWriter.writeVectorsInLuceneFormat("sentencevectors.bin", flagConfig, sentenceVectors);
        VectorStoreWriter.writeVectorsInLuceneFormat("sentencenumbervectors.bin", flagConfig, OOV);
        VectorStoreWriter.writeVectorsInLuceneFormat("sentencetermvectors.bin", flagConfig, theWords);
        VerbatimLogger.info("\nNormalizing semantic term vectors ...\n");
        Enumeration<ObjectVector> docEnum = semanticWords.getAllVectors();
        while (docEnum.hasMoreElements()) {
            docEnum.nextElement().getVector().normalize();
        }
        VectorStoreWriter.writeVectorsInLuceneFormat("positionalritermvectors.bin", flagConfig, semanticWords);
    }
}

