/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.semanticvectors.orthography;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import pitt.search.semanticvectors.FlagConfig;
import pitt.search.semanticvectors.VectorStoreDeterministic;
import pitt.search.semanticvectors.VectorStoreOrthographical;
import pitt.search.semanticvectors.VectorStoreRAM;
import pitt.search.semanticvectors.VectorStoreWriter;
import pitt.search.semanticvectors.vectors.Vector;
import pitt.search.semanticvectors.vectors.VectorFactory;

public class CharRepresentation {
    public static final String ENCODING = "utf8";
    private FlagConfig flagConfig;
    private VectorStoreDeterministic elementalCharVectors;
    private VectorStoreRAM semanticCharVectors;
    private VectorStoreOrthographical semanticTermVectors;

    public CharRepresentation(FlagConfig config) {
        this.flagConfig = config;
        this.elementalCharVectors = new VectorStoreDeterministic(this.flagConfig);
        this.semanticCharVectors = new VectorStoreRAM(this.flagConfig);
    }

    private static String letterAt(String source, int i) {
        String ret = "" + source.charAt(i);
        return ret;
    }

    private void addStringToCharRep(String inputString) {
        for (int i = 0; i < inputString.length(); ++i) {
            Vector summand;
            if (this.semanticCharVectors.getVector(CharRepresentation.letterAt(inputString, i)) == null) {
                this.semanticCharVectors.putVector(CharRepresentation.letterAt(inputString, i), VectorFactory.createZeroVector(this.flagConfig.vectortype(), this.flagConfig.dimension()));
            }
            if (i != 0) {
                summand = this.elementalCharVectors.getVector(CharRepresentation.letterAt(inputString, i - 1));
                this.semanticCharVectors.getVector(CharRepresentation.letterAt(inputString, i)).superpose(summand, 1.0, null);
            }
            if (i == inputString.length() - 1) continue;
            summand = this.elementalCharVectors.getVector(CharRepresentation.letterAt(inputString, i + 1));
            this.semanticCharVectors.getVector(CharRepresentation.letterAt(inputString, i)).superpose(summand, 1.0, null);
        }
    }

    private void addHtmlFileToCharRep(File inputFile) throws IOException {
        Document doc = Jsoup.parse(inputFile, ENCODING);
        String innerText = doc.text();
        StringTokenizer tokenizer = new StringTokenizer(innerText, " ");
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            this.addStringToCharRep(token);
        }
    }

    private void addFileToCharRep(File inputFile) throws IOException {
        String line;
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile)));
        while ((line = reader.readLine()) != null) {
            this.addStringToCharRep(line);
        }
        reader.close();
    }

    private void addStringToTermRep(String text) {
        StringTokenizer tokenizer = new StringTokenizer(text, " ");
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            if (this.semanticTermVectors.containsVector(token)) continue;
            this.semanticTermVectors.getVector(token);
        }
    }

    private void addHtmlFileToTermRep(File inputFile) throws IOException {
        Document doc = Jsoup.parse(inputFile, ENCODING);
        String innerText = doc.text();
        this.addStringToTermRep(innerText);
    }

    private void addFileToTermRep(File inputFile) throws IOException {
        String line;
        BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(inputFile), ENCODING));
        while ((line = reader.readLine()) != null) {
            this.addStringToTermRep(line);
        }
        reader.close();
    }

    public static void listDirRecursive(File rootDir, List<File> listToAppend) {
        File[] contents = rootDir.listFiles();
        for (File file : rootDir.listFiles()) {
            if (file.isFile()) {
                listToAppend.add(file);
                continue;
            }
            if (!file.isDirectory()) continue;
            CharRepresentation.listDirRecursive(file, listToAppend);
        }
    }

    public static void main(String[] args) throws IOException {
        FlagConfig flagConfig = FlagConfig.getFlagConfig(args);
        ArrayList<File> files = new ArrayList<File>();
        File rootDir = new File(flagConfig.remainingArgs[0]);
        if (!rootDir.exists()) {
            throw new IllegalArgumentException("Not a file or directory: '" + args[0] + "'.");
        }
        CharRepresentation.listDirRecursive(rootDir, files);
        CharRepresentation charRepresentation = new CharRepresentation(flagConfig);
        for (File file : files) {
            System.out.println("Indexing chars from: " + file.getAbsolutePath());
            charRepresentation.addHtmlFileToCharRep(file);
        }
        VectorStoreWriter.writeVectors("charvectors", flagConfig, charRepresentation.semanticCharVectors);
        charRepresentation.semanticTermVectors = new VectorStoreOrthographical(flagConfig, charRepresentation.semanticCharVectors);
        for (File file : files) {
            System.out.println("Indexing words from: " + file.getAbsolutePath());
            charRepresentation.addHtmlFileToTermRep(file);
        }
        VectorStoreWriter.writeVectors("termvectors_semchar", flagConfig, charRepresentation.semanticTermVectors);
        charRepresentation.semanticTermVectors = new VectorStoreOrthographical(flagConfig, charRepresentation.elementalCharVectors);
        for (File file : files) {
            System.out.println("Indexing words from: " + file.getAbsolutePath());
            charRepresentation.addHtmlFileToTermRep(file);
        }
        VectorStoreWriter.writeVectors("termvectors_elemchar", flagConfig, charRepresentation.semanticTermVectors);
    }
}

