/*
 * Decompiled with CFR 0.152.
 */
package edu.berkeley.nlp.lm.phrasetable;

import edu.berkeley.nlp.lm.WordIndexer;
import edu.berkeley.nlp.lm.collections.Iterators;
import edu.berkeley.nlp.lm.io.IOUtils;
import edu.berkeley.nlp.lm.io.LmReader;
import edu.berkeley.nlp.lm.io.LmReaderCallback;
import edu.berkeley.nlp.lm.phrasetable.MosesPhraseTableReaderCallback;
import edu.berkeley.nlp.lm.phrasetable.PhraseTableCounts;
import edu.berkeley.nlp.lm.util.Logger;
import java.io.IOException;
import java.util.Arrays;

public class MosesPhraseTableReader<W>
implements LmReader<PhraseTableCounts, MosesPhraseTableReaderCallback<W>> {
    static final String SEP_WORD = "<<sep>>";
    private final WordIndexer<W> wordIndexer;
    private final String file;

    public MosesPhraseTableReader(String file, WordIndexer<W> wordIndexer) {
        this.file = file;
        this.wordIndexer = wordIndexer;
    }

    @Override
    public void parse(MosesPhraseTableReaderCallback<W> callback) {
        this.readFromFiles(callback);
    }

    private void readFromFiles(LmReaderCallback<PhraseTableCounts> callback) {
        Logger.startTrack("Reading from file " + this.file, new Object[0]);
        try {
            Iterable<String> allLinesIterator = Iterators.able(IOUtils.lineIterator(this.file));
            this.countPhrases(allLinesIterator, callback);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        Logger.endTrack();
    }

    private void countPhrases(Iterable<String> allLinesIterator, LmReaderCallback<PhraseTableCounts> callback) {
        long numLines = 0L;
        for (String line : allLinesIterator) {
            if (numLines % 10000L == 0L) {
                Logger.logs("On line " + numLines);
            }
            ++numLines;
            String[] parts = line.trim().split("\\|\\|\\|");
            if (parts.length != 5 && parts.length != 3) {
                throw new IllegalArgumentException("Bad Moses phrase table file line " + line);
            }
            assert (parts.length == 3 || parts.length == 5);
            if (parts.length == 5) {
                parts[2] = parts[4];
            }
            String[] src = parts[0].trim().split("\\s+");
            int[] srcInts = WordIndexer.StaticMethods.toArrayFromStrings(this.wordIndexer, Arrays.asList(src));
            String[] trg = parts[1].trim().split("\\s+");
            int[] trgInts = WordIndexer.StaticMethods.toArrayFromStrings(this.wordIndexer, Arrays.asList(trg));
            int sepIndex = this.wordIndexer.getOrAddIndexFromString(SEP_WORD);
            String[] featStrings = parts[2].trim().split("\\s+");
            float[] features = new float[featStrings.length];
            for (int i = 0; i < featStrings.length - 1; ++i) {
                try {
                    Float val = Float.valueOf(Float.parseFloat(featStrings[i]));
                    if (val.isInfinite() || val.isNaN()) {
                        Logger.warn("Non-finite feature: " + featStrings[i]);
                        continue;
                    }
                    features[i] = (float)(-Math.log(val.floatValue()));
                    continue;
                }
                catch (NumberFormatException n) {
                    throw new RuntimeException("Bad Moses phrase table file line: " + line);
                }
            }
            int[] concat = new int[srcInts.length + trgInts.length + 1];
            System.arraycopy(srcInts, 0, concat, 0, srcInts.length);
            concat[srcInts.length] = sepIndex;
            System.arraycopy(trgInts, 0, concat, srcInts.length + 1, trgInts.length);
            callback.call(concat, 0, concat.length, new PhraseTableCounts(features), line);
        }
        callback.cleanup();
    }
}

