/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.secondstring.expt;

import com.wcohen.secondstring.expt.Blocker;
import com.wcohen.secondstring.expt.MatchData;
import com.wcohen.secondstring.tokens.SimpleTokenizer;
import com.wcohen.secondstring.tokens.Token;
import com.wcohen.secondstring.tokens.Tokenizer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class TokenBlocker
extends Blocker {
    private static double defaultMaxFraction = 1.0;
    private ArrayList pairList;
    private Tokenizer tokenizer;
    double maxFraction;
    private static final Set STOPWORD_TOKEN_MARKER;
    int numCorrectPairs;

    public TokenBlocker(Tokenizer tokenizer, double d) {
        this.tokenizer = tokenizer;
        this.maxFraction = d;
    }

    public TokenBlocker() {
        this(SimpleTokenizer.DEFAULT_TOKENIZER, defaultMaxFraction);
    }

    public void block(MatchData matchData) {
        Set<Integer> set;
        int n;
        Token[] tokenArray;
        String string;
        this.numCorrectPairs = this.countCorrectPairs(matchData);
        this.pairList = new ArrayList();
        if (!this.clusterMode && matchData.numSources() != 2) {
            throw new IllegalArgumentException("need exactly two sources out of clusterMode");
        }
        if (this.clusterMode && matchData.numSources() != 1) {
            throw new IllegalArgumentException("need exactly one source in clusterMode");
        }
        String string2 = matchData.getSource(0);
        String string3 = string = this.clusterMode ? matchData.getSource(0) : matchData.getSource(1);
        if (matchData.numInstances(string2) > matchData.numInstances(string)) {
            String string4 = string2;
            string2 = string;
            string = string4;
        }
        double d = (double)matchData.numInstances(string2) * this.maxFraction;
        TreeMap<Token, Set> treeMap = new TreeMap<Token, Set>();
        int n2 = 0;
        while (n2 < matchData.numInstances(string2)) {
            tokenArray = this.tokenizer.tokenize(matchData.getInstance(string2, n2).getText().unwrap());
            n = 0;
            while (n < tokenArray.length) {
                set = (Set)treeMap.get(tokenArray[n]);
                if (set != STOPWORD_TOKEN_MARKER && set == null) {
                    set = new TreeSet<Integer>();
                    treeMap.put(tokenArray[n], set);
                }
                set.add(new Integer(n2));
                if ((double)set.size() > d) {
                    treeMap.put(tokenArray[n], STOPWORD_TOKEN_MARKER);
                }
                ++n;
            }
            ++n2;
        }
        tokenArray = new TreeSet();
        n = 0;
        while (n < matchData.numInstances(string)) {
            set = matchData.getInstance(string, n);
            tokenArray.clear();
            Token[] tokenArray2 = this.tokenizer.tokenize(((MatchData.Instance)((Object)set)).getText().unwrap());
            int n3 = 0;
            while (n3 < tokenArray2.length) {
                Set set2 = (Set)treeMap.get(tokenArray2[n3]);
                if (set2 != null && set2 != STOPWORD_TOKEN_MARKER) {
                    Iterator iterator = set2.iterator();
                    while (iterator.hasNext()) {
                        Integer n4 = (Integer)iterator.next();
                        int n5 = n4;
                        if (tokenArray.contains(n4) || string2 == string && n5 <= n) continue;
                        MatchData.Instance instance = matchData.getInstance(string2, n5);
                        this.pairList.add(new Blocker.Pair((MatchData.Instance)((Object)set), instance));
                        tokenArray.add(n4);
                    }
                }
                ++n3;
            }
            ++n;
        }
    }

    public int size() {
        return this.pairList.size();
    }

    public Blocker.Pair getPair(int n) {
        return (Blocker.Pair)this.pairList.get(n);
    }

    public String toString() {
        return "[TokenBlocker:clusterMode=" + this.clusterMode + ",maxFraction=" + this.maxFraction + "]";
    }

    public int numCorrectPairs() {
        return this.numCorrectPairs;
    }

    private void showIndex(Map map) {
        Iterator iterator = map.keySet().iterator();
        while (iterator.hasNext()) {
            Token token = (Token)iterator.next();
            System.out.print(token.toString());
            Set set = (Set)map.get(token);
            Iterator iterator2 = set.iterator();
            while (iterator2.hasNext()) {
                Integer n = (Integer)iterator2.next();
                System.out.print(" " + n);
            }
            System.out.println();
        }
    }

    static {
        try {
            String string = System.getProperty("blockerMaxFraction");
            if (string != null) {
                defaultMaxFraction = Double.parseDouble(string);
            }
        }
        catch (NumberFormatException numberFormatException) {
            // empty catch block
        }
        STOPWORD_TOKEN_MARKER = new HashSet();
    }
}

