/*
 * Decompiled with CFR 0.152.
 */
package pitt.search.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import pitt.search.semanticvectors.utils.VerbatimLogger;

public class LuceneIndexFromSemrepTriples {
    static Path INDEX_DIR = FileSystems.getDefault().getPath("predication_index", new String[0]);

    private LuceneIndexFromSemrepTriples() {
    }

    public static void main(String[] args) {
        String usage = "java pitt.search.lucene.LuceneIndexFromTriples [triples text file] ";
        if (args.length == 0) {
            System.err.println("Usage: " + usage);
            System.exit(1);
        }
        if (Files.exists(INDEX_DIR, new LinkOption[0])) {
            throw new IllegalArgumentException("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first");
        }
        try {
            IndexWriterConfig writerConfig = new IndexWriterConfig(new WhitespaceAnalyzer());
            IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), writerConfig);
            File triplesTextFile = new File(args[0]);
            if (!triplesTextFile.exists() || !triplesTextFile.canRead()) {
                writer.close();
                throw new IOException("Document file '" + triplesTextFile.getAbsolutePath() + "' does not exist or is not readable, please check the path");
            }
            System.out.println("Indexing to directory '" + INDEX_DIR + "'...");
            LuceneIndexFromSemrepTriples.indexDoc(writer, triplesTextFile);
            writer.close();
        }
        catch (IOException e) {
            System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        }
    }

    static void indexDoc(IndexWriter fsWriter, File triplesTextFile) throws IOException {
        String lineIn;
        BufferedReader theReader = new BufferedReader(new FileReader(triplesTextFile));
        int linecnt = 0;
        while ((lineIn = theReader.readLine()) != null) {
            StringTokenizer theTokenizer = new StringTokenizer(lineIn, "\t");
            if (++linecnt % 10000 == 0 || linecnt < 10000 && linecnt % 1000 == 0) {
                VerbatimLogger.info(linecnt + " ... ");
            }
            try {
                if (theTokenizer.countTokens() < 3) {
                    VerbatimLogger.warning("Line in predication file does not have three delimited fields: " + lineIn + "\n");
                    lineIn = theReader.readLine();
                    continue;
                }
                String subject = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_").replaceAll("\\|\\|\\|.*", "");
                String subject_CUI = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
                String subject_semtype = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
                String predicate = theTokenizer.nextToken().trim().toUpperCase().replaceAll(" ", "_");
                String object = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_").replaceAll("\\|\\|\\|.*", "");
                String object_CUI = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
                String object_semtype = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
                String PMID = theTokenizer.nextToken();
                String source = theTokenizer.nextToken();
                Document doc = new Document();
                doc.add(new TextField("subject", subject, Field.Store.YES));
                doc.add(new TextField("subject_CUI", subject_CUI, Field.Store.YES));
                doc.add(new TextField("subject_semtype", subject_semtype, Field.Store.YES));
                doc.add(new TextField("predicate", predicate, Field.Store.YES));
                doc.add(new TextField("object", object, Field.Store.YES));
                doc.add(new TextField("object_CUI", object_CUI, Field.Store.YES));
                doc.add(new TextField("object_semtype", object_semtype, Field.Store.YES));
                doc.add(new TextField("predication", subject + predicate + object, Field.Store.NO));
                doc.add(new TextField("PMID", PMID, Field.Store.YES));
                FieldType ft = new FieldType();
                ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
                ft.setStored(true);
                ft.setTokenized(true);
                ft.setStoreTermVectors(true);
                ft.setStoreTermVectorPositions(true);
                Field contentsField = new Field("source", source, ft);
                doc.add(contentsField);
                fsWriter.addDocument(doc);
            }
            catch (Exception e) {
                System.out.println(lineIn);
                e.printStackTrace();
            }
        }
        VerbatimLogger.info("\n");
        theReader.close();
    }
}

