/*
 * Decompiled with CFR 0.152.
 */
package net.psammead.mwapi.scrapper;

import au.id.jericho.lib.html.Attribute;
import au.id.jericho.lib.html.Element;
import au.id.jericho.lib.html.Segment;
import au.id.jericho.lib.html.Source;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.psammead.mwapi.config.ConfigInfo;
import net.psammead.mwapi.connection.TitleUtil;
import net.psammead.mwapi.net.IllegalFormException;
import net.psammead.mwapi.net.JerichoUtil;
import net.psammead.mwapi.scrapper.BasicInfo;
import net.psammead.mwapi.scrapper.HttpResult;
import net.psammead.mwapi.scrapper.HttpUtil;
import net.psammead.mwapi.scrapper.HttpUtilCommons;
import net.psammead.mwapi.scrapper.SiteInfo;
import net.psammead.mwapi.ui.UnsupportedURLException;
import net.psammead.util.IOUtil;
import net.psammead.util.Logger;
import net.psammead.util.XMLCodec;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Scrapper {
    private static final Logger log = new Logger(Scrapper.class);
    private HttpUtil http = new HttpUtilCommons();
    private final String Q_VALUE = "'((?:[^'\\\\]*+|\\\\.)*)'";
    private final String BASE = "'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',";
    private final String PLAIN = "'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n";
    private final String HASHED = "#'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n";
    private final String SLASHED = "/\\* '((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)', \\*/\n";
    private final String COMBINED = "/\\* '((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)', \\*/\n|#'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n|'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n";

    public Scrapper() throws MalformedURLException {
        this.http.useSystemProxy();
    }

    public BasicInfo fetchBasicInfo(String protocol, String host, String prettyPath) throws IOException, IllegalFormException {
        URL url = new URL(protocol + host + prettyPath);
        HttpResult content = this.http.download(url);
        Source source = JerichoUtil.createSource(content.body, log);
        Element form = JerichoUtil.fetchForm((Segment)source, "searchform", "searchform", -1);
        String searchAction = JerichoUtil.fetchAttributeValue(form.getStartTag(), "action");
        String specialNs = searchAction.replaceAll(".*/(.*):.*", "$1");
        return new BasicInfo(content.charset, specialNs);
    }

    public SiteInfo fetchSiteInfo(String protocol, String host, String prettyPath) throws IOException {
        URL url = new URL(protocol + host + prettyPath + "Special:Export?action=submit&pages=23kl5jskdjfhskdfhslkfjsdkqweuh23&curonly=checked");
        HttpResult content = this.http.download(url);
        Source source = JerichoUtil.createSource(content.body, log);
        Element siteinfo = JerichoUtil.firstElement((Segment)source, "siteinfo");
        HashMap<Integer, String> nameSpaces = new HashMap<Integer, String>();
        List elements = siteinfo.findAllElements("namespace");
        for (Element element : elements) {
            Attribute key = element.getAttributes().get("key");
            if (key == null) {
                throw new RuntimeException("namespace.key not found");
            }
            int index = Integer.parseInt(key.getValue());
            String name = element.getContent().toString();
            nameSpaces.put(new Integer(index), name);
        }
        String sitename = JerichoUtil.firstElementText((Segment)siteinfo, "sitename");
        String base = JerichoUtil.firstElementText((Segment)siteinfo, "base");
        String generator = JerichoUtil.firstElementText((Segment)siteinfo, "generator");
        String titleCase = JerichoUtil.firstElementText((Segment)siteinfo, "case");
        String specialNS = (String)nameSpaces.get(-1);
        Map<String, String> specialPages = this.fetchSpecialPages(protocol, host, prettyPath, content.charset, specialNS);
        return new SiteInfo(sitename, base, generator, titleCase, specialPages, nameSpaces);
    }

    public Map<String, String> fetchSpecialPages(String protocol, String host, String prettyPath, String charset, String specialNS) throws IOException {
        HashMap<String, String> out = new HashMap<String, String>();
        for (int i = 0; i < ConfigInfo.SPECIAL_PAGES.length; ++i) {
            String canonical = ConfigInfo.SPECIAL_PAGES[i];
            String localized = this.fetchSpecialPage(protocol, host, prettyPath, charset, specialNS, canonical);
            out.put(canonical, localized);
        }
        return out;
    }

    public String fetchSpecialPage(String protocol, String host, String prettyPath, String charset, String specialNS, String canonical) throws IOException {
        try {
            URL url = new URL(protocol + host + prettyPath + TitleUtil.encodeTitle(specialNS + ":" + canonical, charset));
            String location = this.http.redirectsTo(url);
            if (location == null) {
                return canonical;
            }
            String raw = location.replaceAll(".*/", "");
            String title = TitleUtil.spaces(TitleUtil.decodeTitle(raw, charset));
            return title.replaceAll(".*:", "");
        }
        catch (UnsupportedURLException e) {
            IOException ee = new IOException("cannot decode specialPage title: " + canonical);
            ee.initCause(e);
            throw ee;
        }
    }

    private final String q_decode(String s) {
        return s.replaceAll("\\\\'", "'");
    }

    private Map<String, String> fetchMessagesPHP(String protocol, String host, String rawPath, String uselang) throws IOException {
        URL url = new URL(protocol + host + rawPath + "?title=Special:Allmessages&ot=php&useskin=monobook&uselang=" + uselang);
        HttpResult content = this.http.download(url);
        if (!content.body.matches("(?s).*<!-- start content -->.*")) {
            IOUtil.writeStringToFile((File)new File("/tmp/scrapped.html"), (String)content.body, (String)"UTF-8");
            throw new RuntimeException("### start content not found");
        }
        if (!content.body.matches("(?s).*<!-- end content -->.*")) {
            IOUtil.writeStringToFile((File)new File("/tmp/scrapped.html"), (String)content.body, (String)"UTF-8");
            throw new RuntimeException("### end content not found");
        }
        if (!content.body.matches("(?s).*\n\\$(wgAllMessages|messages).*? = array\\(\n(.*,\n)\\);.*")) {
            IOUtil.writeStringToFile((File)new File("/tmp/scrapped.html"), (String)content.body, (String)"UTF-8");
            throw new RuntimeException("### content not found");
        }
        Pattern pattern = Pattern.compile(".*?<!-- start content -->.*?\n\\$(?:wgAllMessages|messages).*? = array\\(\n(.*,\n)\\);.*?<!-- end content -->.*?", 32);
        Matcher matcher = pattern.matcher(content.body);
        if (!matcher.matches()) {
            throw new RuntimeException("### no content matches found: " + url);
        }
        String decoded = XMLCodec.decode((String)matcher.group(1), (boolean)true, (boolean)false);
        HashMap<String, String> out = new HashMap<String, String>();
        pattern = Pattern.compile("/\\* '((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)', \\*/\n|#'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n|'((?:[^'\\\\]*+|\\\\.)*)' => '((?:[^'\\\\]*+|\\\\.)*)',\n", 32);
        matcher = pattern.matcher(decoded);
        while (matcher.find()) {
            String value;
            String key;
            if (matcher.group(1) != null) {
                key = matcher.group(1);
                value = matcher.group(2);
            } else if (matcher.group(3) != null) {
                key = matcher.group(3);
                value = matcher.group(4);
            } else {
                if (matcher.group(5) == null) continue;
                key = matcher.group(5);
                value = matcher.group(6);
            }
            value = this.q_decode(value);
            out.put(key, value);
        }
        return out;
    }

    private Map<String, String> fetchMessagesXML(String protocol, String host, String rawPath, String uselang) throws IOException {
        URL url = new URL(protocol + host + rawPath + "?title=Special:Allmessages&ot=xml&uselang=" + uselang);
        HttpResult content = this.http.download(url);
        HashMap<String, String> out = new HashMap<String, String>();
        Source source = JerichoUtil.createSource(content.body, log);
        List elements = source.findAllElements("message");
        for (Element element : elements) {
            String key = element.getAttributeValue("name");
            String value = JerichoUtil.decodedTextOnly(source, element.getContent());
            out.put(key, value);
        }
        return out;
    }

    public Map<String, String> fetchMessages(String protocol, String host, String rawPath, String uselang) throws IOException {
        Map<String, String> messagesXML = this.fetchMessagesXML(protocol, host, rawPath, uselang);
        if (!messagesXML.isEmpty()) {
            return messagesXML;
        }
        log.info(host + ": could not get XML messages, trying PHP");
        Map<String, String> messagesPHP = this.fetchMessagesPHP(protocol, host, rawPath, uselang);
        if (!messagesPHP.isEmpty()) {
            return messagesPHP;
        }
        throw new IOException("could not fetch messages");
    }
}

