/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.netlib.blas.BLAS;
import pitt.search.semanticvectors.CompressedVectorStoreRAM;
import pitt.search.semanticvectors.DocVectors;
import pitt.search.semanticvectors.ElementalVectorStore;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.LuceneUtils;
import pitt.search.semanticvectors.ObjectVector;
import pitt.search.semanticvectors.VectorStore;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreUtils;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.orthography.NumberRepresentation;
import pitt.search.semanticvectors.utils.SigmoidTable;
import pitt.search.semanticvectors.utils.VerbatimLogger;
import pitt.search.semanticvectors.vectors.PermutationUtils;
import pitt.search.semanticvectors.vectors.PermutationVector;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;
import pitt.search.semanticvectors.vectors.VectorType;
import pitt.search.semanticvectors.vectors.VectorUtils;

public class TermTermVectorsFromLucene {
    private static final int MAX_EXP = 6;
    private FlagConfig flagConfig;
    private AtomicBoolean exhaustedQ = new AtomicBoolean();
    private int qsize = 100000;
    private boolean retraining = false;
    private volatile VectorStoreRAM semanticTermVectors;
    private volatile VectorStore elementalTermVectors;
    private volatile VectorStoreRAM embeddingDocVectors;
    private volatile CompressedVectorStoreRAM subwordEmbeddingVectors;
    private LuceneUtils luceneUtils;
    private VectorStoreRAM positionalNumberVectors;
    private Random random;
    private ConcurrentSkipListMap<Double, String> termDic;
    private ConcurrentHashMap<String, Double> subsamplingProbabilities;
    private ConcurrentLinkedQueue<DocIdTerms> theQ;
    private double totalPool = 0.0;
    private long totalCount = 0L;
    private double initial_alpha;
    private double alpha = this.initial_alpha = 0.05;
    private double minimum_alpha = 1.0E-4 * this.initial_alpha;
    private AtomicInteger totalDocCount = new AtomicInteger();
    private AtomicInteger totalQueueCount = new AtomicInteger();
    private SigmoidTable sigmoidTable = new SigmoidTable(6, 1000);
    private long tpd_average;
    private VectorStoreRAM permutationCache;
    private ConcurrentLinkedQueue<Integer> randomStartpoints;

    public VectorStore getSemanticTermVectors() {
        return this.semanticTermVectors;
    }

    private void initializeRandomizationStartpoints(int incrementSize) {
        boolean remainder;
        this.randomStartpoints = new ConcurrentLinkedQueue();
        int increments = this.luceneUtils.getNumDocs() / incrementSize;
        boolean bl = remainder = this.luceneUtils.getNumDocs() % incrementSize > 0;
        if (remainder) {
            ++increments;
        }
        ArrayList<Integer> toRandomize = new ArrayList<Integer>();
        for (int x = 0; x < increments; ++x) {
            toRandomize.add(x * incrementSize);
        }
        Collections.shuffle(toRandomize);
        this.randomStartpoints.addAll(toRandomize);
    }

    public ArrayList<String> getComponentNgrams(String incomingString) {
        ArrayList<String> outgoingNgrams = new ArrayList<String>();
        String toDecompose = "<" + incomingString + ">";
        for (int ngram_length = this.flagConfig.minimum_ngram_length(); ngram_length <= this.flagConfig.maximum_ngram_length(); ++ngram_length) {
            for (int j = 0; j <= toDecompose.length() - ngram_length; ++j) {
                String toAdd = toDecompose.substring(j, j + ngram_length);
                if (toAdd.equals(toDecompose)) continue;
                outgoingNgrams.add(toAdd);
            }
        }
        return outgoingNgrams;
    }

    public TermTermVectorsFromLucene(FlagConfig flagConfig, VectorStore elementalTermVectors) throws IOException {
        this.flagConfig = flagConfig;
        this.random = new Random();
        this.alpha = this.initial_alpha = flagConfig.initial_alpha();
        if (flagConfig.subword_embeddings()) {
            VerbatimLogger.info("Using subword embeddings\n");
            this.subwordEmbeddingVectors = new CompressedVectorStoreRAM(flagConfig);
        }
        if (elementalTermVectors != null) {
            this.retraining = true;
            this.elementalTermVectors = elementalTermVectors;
            VerbatimLogger.info("Reusing basic term vectors; number of terms: " + elementalTermVectors.getNumVectors() + "\n");
            if (flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS)) {
                this.semanticTermVectors = new VectorStoreRAM(flagConfig);
                this.semanticTermVectors.initFromFile(flagConfig.initialtermvectors().replaceAll("elemental", "embedding"));
            }
        } else {
            this.elementalTermVectors = new ElementalVectorStore(flagConfig);
        }
        if (flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS)) {
            if (!flagConfig.vectortype().equals((Object)VectorType.BINARY)) {
                flagConfig.seedlength = flagConfig.dimension();
                VerbatimLogger.info("Setting seedlength=dimensionality, to initialize embedding weights");
            } else {
                VerbatimLogger.info("Warning: binary vector embeddings are in the experimental phase");
            }
        }
        if (flagConfig.positionalmethod() == PositionalMethod.PERMUTATION || flagConfig.positionalmethod() == PositionalMethod.PERMUTATIONPLUSBASIC) {
            this.initializePermutations();
        } else if (flagConfig.positionalmethod() == PositionalMethod.DIRECTIONAL) {
            this.initializeDirectionalPermutations();
        } else if (flagConfig.positionalmethod() == PositionalMethod.PROXIMITY) {
            this.initializeNumberRepresentations();
        }
        this.trainTermTermVectors();
    }

    private void initializePermutations() {
        VectorType typeA = this.flagConfig.vectortype();
        this.flagConfig.setVectortype(VectorType.PERMUTATION);
        this.permutationCache = new VectorStoreRAM(this.flagConfig);
        this.flagConfig.setVectortype(typeA);
        for (int i = -1 * this.flagConfig.windowradius(); i <= this.flagConfig.windowradius(); ++i) {
            if (i == 0) {
                int[] noPerm = new int[this.flagConfig.dimension()];
                for (int q = 0; q < this.flagConfig.dimension(); ++q) {
                    noPerm[q] = q;
                }
                this.permutationCache.putVector(0, new PermutationVector(noPerm));
                continue;
            }
            this.permutationCache.putVector(i, new PermutationVector(PermutationUtils.getRandomPermutation(this.flagConfig.vectortype(), this.flagConfig.dimension())));
            this.permutationCache.putVector("_" + i, new PermutationVector(PermutationUtils.getInversePermutation(((PermutationVector)this.permutationCache.getVector(i)).getCoordinates())));
        }
    }

    private synchronized void populateQueue() {
        if (this.totalQueueCount.get() >= this.luceneUtils.getNumDocs() || this.randomStartpoints.isEmpty()) {
            if (this.theQ.size() == 0) {
                this.exhaustedQ.set(true);
            }
            return;
        }
        int added = 0;
        int startdoc = this.randomStartpoints.poll();
        int stopdoc = Math.min(startdoc + this.qsize, this.luceneUtils.getNumDocs());
        for (int a = startdoc; a < stopdoc; ++a) {
            for (String field : this.flagConfig.contentsfields()) {
                try {
                    int docID = a;
                    Terms incomingTermVector = this.luceneUtils.getTermVector(a, field);
                    this.totalQueueCount.incrementAndGet();
                    if (incomingTermVector == null) continue;
                    this.theQ.add(new DocIdTerms(docID, incomingTermVector));
                    ++added;
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        if (added > 0) {
            System.err.println("Initialized TermVector Queue with " + added + " documents");
        }
    }

    private synchronized DocIdTerms drawFromQueue() {
        if (this.theQ.isEmpty()) {
            this.populateQueue();
        }
        DocIdTerms toReturn = this.theQ.poll();
        return toReturn;
    }

    private boolean queueExhausted() {
        return this.exhaustedQ.get();
    }

    private void initializeNumberRepresentations() {
        if (this.flagConfig.vectortype().equals((Object)VectorType.REAL)) {
            this.initializeProximityPermutations();
        } else {
            NumberRepresentation numberRepresentation = new NumberRepresentation(this.flagConfig);
            this.positionalNumberVectors = numberRepresentation.getNumberVectors(1, 2 * this.flagConfig.windowradius() + 2);
            try {
                VectorStoreWriter.writeVectorsInLuceneFormat("numbervectors.bin", this.flagConfig, this.positionalNumberVectors);
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private void initializeProximityPermutations() {
        int[] toAdd;
        int i;
        VectorType typeA = this.flagConfig.vectortype();
        this.flagConfig.setVectortype(VectorType.PERMUTATION);
        this.permutationCache = new VectorStoreRAM(this.flagConfig);
        this.flagConfig.setVectortype(typeA);
        int[] noPerm = new int[this.flagConfig.dimension()];
        for (int q = 0; q < this.flagConfig.dimension(); ++q) {
            noPerm[q] = q;
        }
        this.permutationCache.putVector(0, new PermutationVector(noPerm));
        this.permutationCache.putVector(1, new PermutationVector(PermutationUtils.getRandomPermutation(this.flagConfig.vectortype(), this.flagConfig.dimension())));
        this.permutationCache.putVector("_1", new PermutationVector(PermutationUtils.getInversePermutation(((PermutationVector)this.permutationCache.getVector(1)).getCoordinates())));
        this.permutationCache.putVector(-1, new PermutationVector(PermutationUtils.getRandomPermutation(this.flagConfig.vectortype(), this.flagConfig.dimension())));
        this.permutationCache.putVector("_-1", new PermutationVector(PermutationUtils.getInversePermutation(((PermutationVector)this.permutationCache.getVector(-1)).getCoordinates())));
        for (i = -2; i >= -1 * this.flagConfig.windowradius(); --i) {
            toAdd = PermutationUtils.getSwapPermutation(this.flagConfig.vectortype(), ((PermutationVector)this.permutationCache.getVector(i + 1)).getCoordinates(), 0.25);
            this.permutationCache.putVector(i, new PermutationVector(toAdd));
            this.permutationCache.putVector("_" + i, new PermutationVector(PermutationUtils.getInversePermutation(toAdd)));
        }
        for (i = 2; i <= this.flagConfig.windowradius(); ++i) {
            toAdd = PermutationUtils.getSwapPermutation(this.flagConfig.vectortype(), ((PermutationVector)this.permutationCache.getVector(i - 1)).getCoordinates(), 0.25);
            this.permutationCache.putVector(i, new PermutationVector(toAdd));
            this.permutationCache.putVector("_" + i, new PermutationVector(PermutationUtils.getInversePermutation(toAdd)));
        }
    }

    private void initializeDirectionalPermutations() {
        VectorType typeA = this.flagConfig.vectortype();
        this.flagConfig.setVectortype(VectorType.PERMUTATION);
        this.permutationCache = new VectorStoreRAM(this.flagConfig);
        this.flagConfig.setVectortype(typeA);
        this.permutationCache.putVector(-1, new PermutationVector(PermutationUtils.getRandomPermutation(this.flagConfig.vectortype(), this.flagConfig.dimension())));
        this.permutationCache.putVector("_-1", new PermutationVector(PermutationUtils.getInversePermutation(((PermutationVector)this.permutationCache.getVector(-1)).getCoordinates())));
        int[] noPerm = new int[this.flagConfig.dimension()];
        for (int q = 0; q < this.flagConfig.dimension(); ++q) {
            noPerm[q] = q;
        }
        this.permutationCache.putVector(0, new PermutationVector(noPerm));
        this.permutationCache.putVector(1, new PermutationVector(PermutationUtils.getRandomPermutation(this.flagConfig.vectortype(), this.flagConfig.dimension())));
        this.permutationCache.putVector("_1", new PermutationVector(PermutationUtils.getInversePermutation(((PermutationVector)this.permutationCache.getVector(1)).getCoordinates())));
    }

    private void trainTermTermVectors() throws IOException, RuntimeException {
        this.luceneUtils = new LuceneUtils(this.flagConfig);
        this.termDic = new ConcurrentSkipListMap();
        if (this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) && this.flagConfig.docindexing().equals((Object)DocVectors.DocIndexingStrategy.INMEMORY)) {
            this.embeddingDocVectors = new VectorStoreRAM(this.flagConfig);
        }
        this.totalPool = 0.0;
        FieldInfos fieldsWithPositions = this.luceneUtils.getFieldInfos();
        if (!fieldsWithPositions.hasVectors()) {
            throw new IOException("Term-term indexing requires a Lucene index containing TermPositionVectors.\nTry rebuilding Lucene index using pitt.search.lucene.IndexFilePositions");
        }
        if (this.semanticTermVectors == null) {
            this.semanticTermVectors = new VectorStoreRAM(this.flagConfig);
        }
        int tc = 0;
        for (String fieldName : this.flagConfig.contentsfields()) {
            BytesRef bytes;
            TermsEnum terms = this.luceneUtils.getTermsForField(fieldName).iterator();
            while ((bytes = terms.next()) != null) {
                Term term = new Term(fieldName, bytes);
                if (!this.luceneUtils.termFilter(term)) continue;
                ++tc;
                this.totalCount += (long)this.luceneUtils.getGlobalTermFreq(term);
                if (this.flagConfig.samplingthreshold() <= 0.0 || this.flagConfig.samplingthreshold() >= 1.0) {
                    this.totalPool += Math.pow(this.luceneUtils.getGlobalTermFreq(term), 0.5);
                    this.termDic.put(this.totalPool, term.text());
                }
                Vector termVector = null;
                termVector = this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) ? VectorFactory.generateRandomVector(this.flagConfig.vectortype(), this.flagConfig.dimension(), this.flagConfig.seedlength(), this.random) : VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                if (!this.semanticTermVectors.containsVector(term.text())) {
                    this.semanticTermVectors.putVector(term.text(), termVector);
                }
                if (!this.retraining) {
                    this.elementalTermVectors.getVector(term.text());
                    continue;
                }
                if (!this.retraining || !this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) || this.elementalTermVectors.containsVector(term.text())) continue;
                ((VectorStoreRAM)this.elementalTermVectors).putVector(term.text(), VectorFactory.generateRandomVector(this.flagConfig.vectortype(), this.flagConfig.dimension(), this.flagConfig.seedlength, this.random));
            }
        }
        VerbatimLogger.info("\nNumber term vectors " + this.semanticTermVectors.getNumVectors() + "\t" + this.elementalTermVectors.getNumVectors());
        this.tpd_average = this.totalCount / (long)this.luceneUtils.getNumDocs();
        if (this.flagConfig.samplingthreshold() > 0.0 && this.flagConfig.samplingthreshold() < 1.0) {
            this.subsamplingProbabilities = new ConcurrentHashMap();
            VerbatimLogger.info("Populating subsampling probabilities - total term count = " + this.totalCount + " which is " + this.tpd_average + " per doc on average");
            int count = 0;
            for (String fieldName : this.flagConfig.contentsfields()) {
                BytesRef bytes;
                TermsEnum terms = this.luceneUtils.getTermsForField(fieldName).iterator();
                while ((bytes = terms.next()) != null) {
                    Term term = new Term(fieldName, bytes);
                    if (++count % 10000 == 0) {
                        VerbatimLogger.info(".");
                    }
                    if (!this.semanticTermVectors.containsVector(term.text())) continue;
                    double globalFreq = (double)this.luceneUtils.getGlobalTermFreq(term) / (double)this.totalCount;
                    double subdiscount = 1.0;
                    if (globalFreq > this.flagConfig.samplingthreshold()) {
                        double subsample_probability = 1.0 - Math.sqrt(this.flagConfig.samplingthreshold() / globalFreq);
                        if (this.flagConfig.aggressivesubsampling()) {
                            subsample_probability = 1.0 - (Math.sqrt(this.flagConfig.samplingthreshold() / globalFreq) + this.flagConfig.samplingthreshold() / globalFreq);
                        }
                        this.subsamplingProbabilities.put(fieldName + ":" + bytes.utf8ToString(), subsample_probability);
                        if (this.flagConfig.discountnegativesampling()) {
                            subdiscount = 1.0 - subsample_probability;
                        }
                    }
                    this.totalPool += Math.pow(subdiscount * (double)this.luceneUtils.getGlobalTermFreq(term), 0.5);
                    this.termDic.put(this.totalPool, term.text());
                }
            }
            VerbatimLogger.info("\n");
            if (this.subsamplingProbabilities != null && this.subsamplingProbabilities.size() > 0) {
                VerbatimLogger.info("Selected for subsampling: " + this.subsamplingProbabilities.size() + " terms.\n");
            }
        }
        VerbatimLogger.info("There are now elemental term vectors for " + tc + " terms (and " + this.luceneUtils.getNumDocs() + " docs).\n");
        this.totalDocCount.set(0);
        if (this.qsize > this.luceneUtils.getNumDocs()) {
            this.qsize = this.luceneUtils.getNumDocs() / 10;
        }
        for (int trainingcycle = 0; trainingcycle <= this.flagConfig.trainingcycles(); ++trainingcycle) {
            this.initializeRandomizationStartpoints(this.qsize);
            this.exhaustedQ.set(false);
            this.theQ = new ConcurrentLinkedQueue();
            this.totalQueueCount.set(0);
            this.populateQueue();
            double cycleStart = System.currentTimeMillis();
            int numthreads = this.flagConfig.numthreads();
            ExecutorService executor = Executors.newFixedThreadPool(numthreads);
            for (int q = 0; q < numthreads; ++q) {
                executor.execute(new TrainTermVectorThread(q));
                VerbatimLogger.info("Started thread " + q + "\n");
            }
            executor.shutdown();
            while (!executor.isTerminated()) {
                if (this.theQ.size() >= this.qsize / 2) continue;
                this.populateQueue();
            }
            VerbatimLogger.info("\nTime for training cycle " + ((double)System.currentTimeMillis() - cycleStart) + "ms \n");
            VerbatimLogger.info("\nProcessed " + this.totalQueueCount.get() + " documents");
        }
        VerbatimLogger.info("\nCreated " + this.semanticTermVectors.getNumVectors() + " term vectors ...\n");
        if (this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) && (!this.flagConfig.notnormalized || this.flagConfig.subword_embeddings())) {
            Enumeration<ObjectVector> g = this.semanticTermVectors.getAllVectors();
            while (g.hasMoreElements()) {
                ObjectVector nextObjectVector = g.nextElement();
                if (this.flagConfig.subword_embeddings()) {
                    ArrayList<String> subwordStrings = this.getComponentNgrams(nextObjectVector.getObject().toString());
                    float weightReduction = 1.0f / ((float)subwordStrings.size() + 1.0f);
                    Vector wordVec = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
                    if (this.flagConfig.balanced_subwords()) {
                        weightReduction = 1.0f;
                    }
                    wordVec.superpose(nextObjectVector.getVector(), weightReduction, null);
                    if (this.flagConfig.balanced_subwords()) {
                        weightReduction = 1.0f / (float)subwordStrings.size();
                    }
                    for (String subword : subwordStrings) {
                        Vector subwordVector = this.subwordEmbeddingVectors.getVector(subword, false);
                        wordVec.superpose(subwordVector, weightReduction, null);
                    }
                    nextObjectVector.setVector(wordVec);
                }
                if (this.flagConfig.notnormalized()) continue;
                nextObjectVector.getVector().normalize();
            }
        }
        if (this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) || this.flagConfig.positionalmethod() == PositionalMethod.PERMUTATION || this.flagConfig.positionalmethod() == PositionalMethod.PERMUTATIONPLUSBASIC && (!this.retraining || this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS))) {
            VerbatimLogger.info("Normalizing and writing elemental vectors to " + this.flagConfig.elementalvectorfile() + "\n");
            Enumeration<ObjectVector> f = this.elementalTermVectors.getAllVectors();
            if (!this.flagConfig.notnormalized()) {
                while (f.hasMoreElements()) {
                    f.nextElement().getVector().normalize();
                }
            }
        }
        VectorStoreWriter.writeVectorsInLuceneFormat(this.flagConfig.elementalvectorfile() + ".bin", this.flagConfig, this.elementalTermVectors);
        if (this.permutationCache != null) {
            VectorType typeA = this.flagConfig.vectortype();
            this.flagConfig.setVectortype(VectorType.PERMUTATION);
            VectorStoreWriter.writeVectorsInLuceneFormat(this.flagConfig.permutationcachefile() + ".bin", this.flagConfig, this.permutationCache);
            this.flagConfig.setVectortype(typeA);
        }
        if (this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) && this.flagConfig.docindexing().equals((Object)DocVectors.DocIndexingStrategy.INMEMORY)) {
            Enumeration<ObjectVector> f = this.embeddingDocVectors.getAllVectors();
            File vectorFile = new File(VectorStoreUtils.getStoreFileName(this.flagConfig.docvectorsfile(), this.flagConfig));
            String parentPath = vectorFile.getParent();
            if (parentPath == null) {
                parentPath = "";
            }
            FSDirectory fsDirectory = FSDirectory.open(FileSystems.getDefault().getPath(parentPath, new String[0]));
            IndexOutput outputStream = fsDirectory.createOutput(vectorFile.getName(), IOContext.DEFAULT);
            VerbatimLogger.info("Writing vectors incrementally to file " + vectorFile + " ... ");
            outputStream.writeString(VectorStoreWriter.generateHeaderString(this.flagConfig));
            while (f.hasMoreElements()) {
                ObjectVector nextObjectVector = f.nextElement();
                Vector nextVector = nextObjectVector.getVector();
                if (!this.flagConfig.notnormalized()) {
                    nextVector.normalize();
                }
                int docID = (Integer)nextObjectVector.getObject();
                String docName = "" + docID;
                outputStream.writeString(docName);
                nextVector.writeToLuceneStream(outputStream);
            }
            VerbatimLogger.info("Finished writing vectors.\n");
            outputStream.close();
            fsDirectory.close();
        }
    }

    private void processEmbeddings(Vector embeddingVector, ArrayList<Vector> contextVectors, ArrayList<Integer> contextLabels, double learningRate, BLAS blas, int[] permutation, int[] inversePermutation) {
        double scalarProduct = 0.0;
        double error = 0.0;
        int counter = 0;
        for (Vector contextVec : contextVectors) {
            Vector duplicateContextVec = contextVec.copy();
            scalarProduct = VectorUtils.scalarProduct(embeddingVector, duplicateContextVec, this.flagConfig, blas, permutation);
            if (!this.flagConfig.vectortype().equals((Object)VectorType.BINARY)) {
                error = scalarProduct > 6.0 ? (double)(contextLabels.get(counter++) - 1) : (scalarProduct < -6.0 ? (double)contextLabels.get(counter++).intValue() : (double)contextLabels.get(counter++).intValue() - this.sigmoidTable.sigmoid(scalarProduct));
            } else {
                scalarProduct = Math.max(scalarProduct, 0.0);
                error = (double)contextLabels.get(counter++).intValue() - scalarProduct;
                error = Math.round(error * 100.0);
            }
            if (error == 0.0) continue;
            VectorUtils.superposeInPlace(embeddingVector, contextVec, this.flagConfig, blas, learningRate * error, inversePermutation);
            VectorUtils.superposeInPlace(duplicateContextVec, embeddingVector, this.flagConfig, blas, learningRate * error, permutation);
        }
    }

    private void processEmbeddings(ArrayList<Vector> embeddingVectors, ArrayList<Vector> contextVectors, ArrayList<Integer> contextLabels, double learningRate, BLAS blas, int[] permutation, int[] inversePermutation) {
        double scalarProduct = 0.0;
        double error = 0.0;
        int counter = 0;
        Vector embeddingVector = VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension());
        float weightReduction = 1.0f;
        for (int v = 0; v < embeddingVectors.size(); ++v) {
            if (this.flagConfig.balanced_subwords) {
                if (v > 0) {
                    weightReduction = 1.0f / (float)embeddingVectors.size();
                }
            } else {
                weightReduction = 1.0f / (float)embeddingVectors.size();
            }
            embeddingVector.superpose(embeddingVectors.get(v), weightReduction, null);
        }
        for (Vector contextVec : contextVectors) {
            Vector duplicateContextVec = contextVec.copy();
            scalarProduct = VectorUtils.scalarProduct(embeddingVector, duplicateContextVec, this.flagConfig, blas, permutation);
            if (!this.flagConfig.vectortype().equals((Object)VectorType.BINARY)) {
                error = scalarProduct > 6.0 ? (double)(contextLabels.get(counter++) - 1) : (scalarProduct < -6.0 ? (double)contextLabels.get(counter++).intValue() : (double)((float)((double)contextLabels.get(counter++).intValue() - this.sigmoidTable.sigmoid(scalarProduct))));
            } else {
                scalarProduct = Math.max(scalarProduct, 0.0);
                error = (double)contextLabels.get(counter++).intValue() - scalarProduct;
                error = Math.round(error * 100.0);
            }
            if (error == 0.0) continue;
            VectorUtils.superposeInPlace(embeddingVector, contextVec, this.flagConfig, blas, learningRate * error, inversePermutation);
            weightReduction = 1.0f;
            for (int v = 0; v < embeddingVectors.size(); ++v) {
                if (this.flagConfig.balanced_subwords && v > 0) {
                    weightReduction = 1.0f / (float)embeddingVectors.size();
                }
                VectorUtils.superposeInPlace(duplicateContextVec, embeddingVectors.get(v), this.flagConfig, blas, (double)weightReduction * learningRate * error, permutation);
            }
        }
    }

    private void processTermPositionVector(DocIdTerms terms, String field, BLAS blas) throws ArrayIndexOutOfBoundsException, IOException {
        BytesRef text;
        if (terms == null) {
            return;
        }
        Hashtable<Integer, String> localTermPositions = new Hashtable<Integer, String>();
        ArrayList<Integer> thePositions = new ArrayList<Integer>();
        TermsEnum termsEnum = terms.terms.iterator();
        Integer docID = terms.docID;
        while ((text = termsEnum.next()) != null) {
            PostingsEnum docsAndPositions;
            String theTerm = text.utf8ToString();
            if (!this.semanticTermVectors.containsVector(theTerm) || (docsAndPositions = termsEnum.postings(null)) == null) continue;
            docsAndPositions.nextDoc();
            int freq = docsAndPositions.freq();
            for (int x = 0; x < freq; ++x) {
                int thePosition = docsAndPositions.nextPosition();
                if (this.subsamplingProbabilities != null && this.subsamplingProbabilities.containsKey(field + ":" + theTerm)) {
                    StringBuilder stringBuilder = new StringBuilder();
                    if (this.random.nextDouble() <= this.subsamplingProbabilities.get(stringBuilder.append(field).append(":").append(theTerm).toString())) {
                        if (!this.flagConfig.exactwindowpositions()) continue;
                        localTermPositions.put(thePosition, "_BLANK_");
                        thePositions.add(thePosition);
                        continue;
                    }
                }
                localTermPositions.put(thePosition, theTerm);
                thePositions.add(thePosition);
            }
        }
        Collections.sort(thePositions);
        for (int occupiedPositionNumber = 0; occupiedPositionNumber < thePositions.size(); ++occupiedPositionNumber) {
            int focusposn = (Integer)thePositions.get(occupiedPositionNumber);
            String focusterm = (String)localTermPositions.get(focusposn);
            if (this.flagConfig.exactwindowpositions() && focusterm.equals("_BLANK_")) continue;
            int effectiveWindowRadius = this.flagConfig.windowradius();
            if (this.flagConfig.subsampleinwindow) {
                effectiveWindowRadius = this.random.nextInt(this.flagConfig.windowradius()) + 1;
            }
            int windowstart = Math.max(0, occupiedPositionNumber - effectiveWindowRadius);
            int windowend = Math.min(occupiedPositionNumber + effectiveWindowRadius, thePositions.size() - 1);
            for (int cursorPositionNumber = windowstart; cursorPositionNumber <= windowend; ++cursorPositionNumber) {
                if (cursorPositionNumber == occupiedPositionNumber && (!this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS) || !this.flagConfig.docindexing().equals((Object)DocVectors.DocIndexingStrategy.INMEMORY))) continue;
                String coterm = (String)localTermPositions.get(thePositions.get(cursorPositionNumber));
                if (this.flagConfig.exactwindowpositions() && coterm.equals("_BLANK_")) continue;
                int[] permutation = null;
                int[] inversePermutation = null;
                int desiredPermutation = cursorPositionNumber - occupiedPositionNumber;
                if (this.flagConfig.positionalmethod().equals((Object)PositionalMethod.PERMUTATION) || this.flagConfig.positionalmethod().equals((Object)PositionalMethod.PROXIMITY)) {
                    permutation = ((PermutationVector)this.permutationCache.getVector(desiredPermutation)).getCoordinates();
                    inversePermutation = ((PermutationVector)this.permutationCache.getVector("_" + desiredPermutation)).getCoordinates();
                    if (permutation == null) {
                        VerbatimLogger.info("null permutation");
                    }
                    if (inversePermutation == null) {
                        VerbatimLogger.info("null inverse permutation");
                    }
                } else if (this.flagConfig.positionalmethod().equals((Object)PositionalMethod.DIRECTIONAL)) {
                    permutation = ((PermutationVector)this.permutationCache.getVector((int)Math.signum(desiredPermutation))).getCoordinates();
                    inversePermutation = ((PermutationVector)this.permutationCache.getVector("_" + (int)Math.signum(desiredPermutation))).getCoordinates();
                    if (permutation == null) {
                        VerbatimLogger.info("null permutation");
                    }
                    if (inversePermutation == null) {
                        VerbatimLogger.info("null inverse permutation");
                    }
                }
                Vector toSuperpose = this.elementalTermVectors.getVector(coterm);
                if (this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS)) {
                    ArrayList<Vector> contextVectors = new ArrayList<Vector>();
                    ArrayList<Integer> contextLabels = new ArrayList<Integer>();
                    contextVectors.add(toSuperpose);
                    contextLabels.add(1);
                    while (contextVectors.size() <= this.flagConfig.negsamples) {
                        Vector randomTerm = null;
                        double max = this.totalPool;
                        while (randomTerm == null) {
                            String testTerm;
                            double test = this.random.nextDouble() * max;
                            if (this.termDic.ceilingEntry(test) == null || (testTerm = this.termDic.ceilingEntry(test).getValue()).equals(coterm)) continue;
                            randomTerm = this.elementalTermVectors.getVector(testTerm);
                        }
                        contextVectors.add(randomTerm);
                        contextLabels.add(0);
                    }
                    if (cursorPositionNumber != occupiedPositionNumber) {
                        if (this.flagConfig.subword_embeddings()) {
                            ArrayList<String> subWords = this.getComponentNgrams(focusterm);
                            ArrayList<Vector> subWordVectors = new ArrayList<Vector>();
                            subWordVectors.add(this.semanticTermVectors.getVector(focusterm));
                            for (String subword : subWords) {
                                subWordVectors.add(this.subwordEmbeddingVectors.getVector(subword, false));
                            }
                            this.processEmbeddings(subWordVectors, contextVectors, contextLabels, this.alpha, blas, permutation, inversePermutation);
                        } else {
                            this.processEmbeddings(this.semanticTermVectors.getVector(focusterm), contextVectors, contextLabels, this.alpha, blas, permutation, inversePermutation);
                        }
                    }
                    if (!this.flagConfig.docindexing().equals((Object)DocVectors.DocIndexingStrategy.INMEMORY)) continue;
                    if (!this.embeddingDocVectors.containsVector(docID)) {
                        this.embeddingDocVectors.putVector(docID, VectorFactory.generateRandomVector(this.flagConfig.vectortype(), this.flagConfig.dimension(), this.flagConfig.seedlength, this.random));
                    }
                    this.processEmbeddings(this.embeddingDocVectors.getVector(docID), contextVectors, contextLabels, this.alpha, blas, permutation, inversePermutation);
                    continue;
                }
                float globalweight = this.luceneUtils.getGlobalTermWeight(new Term(field, coterm));
                if (this.flagConfig.positionalmethod() == PositionalMethod.BASIC || this.flagConfig.positionalmethod() == PositionalMethod.PERMUTATIONPLUSBASIC) {
                    this.semanticTermVectors.getVector(focusterm).superpose(toSuperpose, globalweight, null);
                }
                if (this.flagConfig.positionalmethod() == PositionalMethod.PERMUTATION || this.flagConfig.positionalmethod() == PositionalMethod.PERMUTATIONPLUSBASIC || this.flagConfig.positionalmethod() == PositionalMethod.PROXIMITY) {
                    Enumeration<ObjectVector> theVecs = this.permutationCache.getAllVectors();
                    permutation = ((PermutationVector)this.permutationCache.getVector(cursorPositionNumber - occupiedPositionNumber)).getCoordinates();
                    this.semanticTermVectors.getVector(focusterm).superpose(toSuperpose, globalweight, permutation);
                    continue;
                }
                if (this.flagConfig.positionalmethod() != PositionalMethod.DIRECTIONAL) continue;
                permutation = ((PermutationVector)this.permutationCache.getVector((int)Math.signum(cursorPositionNumber - occupiedPositionNumber))).getCoordinates();
                this.semanticTermVectors.getVector(focusterm).superpose(toSuperpose, globalweight, permutation);
            }
        }
        this.totalDocCount.incrementAndGet();
    }

    private class TrainTermVectorThread
    implements Runnable {
        int dcnt = 0;
        int threadno = 0;
        double time = 0.0;
        BLAS blas = null;

        public TrainTermVectorThread(int threadno) {
            this.threadno = threadno;
            this.blas = BLAS.getInstance();
            this.time = System.currentTimeMillis();
        }

        @Override
        public void run() {
            while (!TermTermVectorsFromLucene.this.queueExhausted()) {
                for (String field : TermTermVectorsFromLucene.this.flagConfig.contentsfields()) {
                    try {
                        DocIdTerms terms = TermTermVectorsFromLucene.this.drawFromQueue();
                        if (terms == null) continue;
                        TermTermVectorsFromLucene.this.processTermPositionVector(terms, field, this.blas);
                    }
                    catch (IOException | ArrayIndexOutOfBoundsException e) {
                        e.printStackTrace();
                    }
                }
                ++this.dcnt;
                if (this.dcnt % 10000 == 0 || this.dcnt < 10000 && this.dcnt % 1000 == 0) {
                    VerbatimLogger.info("[T" + this.threadno + "] processed " + this.dcnt + " documents in " + ("" + ((double)System.currentTimeMillis() - this.time) / 60000.0).replaceAll("\\..*", "") + " min..");
                }
                if (this.threadno != 0 || (long)this.dcnt % TermTermVectorsFromLucene.this.tpd_average != 0L || !TermTermVectorsFromLucene.this.flagConfig.encodingmethod().equals((Object)EncodingMethod.EMBEDDINGS)) continue;
                double proportionComplete = (double)TermTermVectorsFromLucene.this.totalDocCount.get() / (double)((1 + TermTermVectorsFromLucene.this.flagConfig.trainingcycles()) * TermTermVectorsFromLucene.this.luceneUtils.getNumDocs());
                TermTermVectorsFromLucene.this.alpha = TermTermVectorsFromLucene.this.initial_alpha * (1.0 - proportionComplete);
                if (TermTermVectorsFromLucene.this.alpha < TermTermVectorsFromLucene.this.minimum_alpha) {
                    TermTermVectorsFromLucene.this.alpha = TermTermVectorsFromLucene.this.minimum_alpha;
                }
                if (this.dcnt % 10000 != 0 && (this.dcnt >= 10000 || this.dcnt % 1000 != 0)) continue;
                VerbatimLogger.info("..Updated alpha to " + TermTermVectorsFromLucene.this.alpha + "..");
            }
        }
    }

    private class DocIdTerms {
        int docID;
        Terms terms;

        public DocIdTerms(int docID, Terms terms) {
            this.docID = docID;
            this.terms = terms;
        }
    }

    public static enum EncodingMethod {
        RANDOM_INDEXING,
        EMBEDDINGS;

    }

    public static enum PositionalMethod {
        BASIC,
        DIRECTIONAL,
        PERMUTATION,
        PERMUTATIONPLUSBASIC,
        PROXIMITY;

    }
}

