package cn.com.duiba.spider.util.maiquan.spider;

import cn.com.duiba.spider.util.maiquan.constant.ContentSource;
import cn.com.duiba.spider.util.maiquan.dto.DefaultDto;
import cn.com.duiba.spider.util.maiquan.proxy.Data5UProxyProvider;
import com.google.common.collect.Lists;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.util.List;
import java.util.Map;

/**
 * @author: Administrator
 * @date: 2019-07-09 9:58
 */
public class SougouWeiXinSpider extends AbstractSpider {

    public static final SougouWeiXinSpider SPIDER = new SougouWeiXinSpider();
    private static final String SOUGOU_HOST = "https://weixin.sogou.com";
    private static final String SOUGOU_URL = SOUGOU_HOST + "/weixin?type=2&s_from=input&query=%s&ie=utf8";
    private static final int K = 51;
    private static final String RANDOM_PARAM = "&k=51&h=%s";

    private SougouWeiXinSpider() {
    }

    @Override
    protected ContentSource getContentSource() {
        return ContentSource.SOUGOUWEIXIN;
    }

    @Override
    public DefaultDto unitedStatesSmash(RequestParam param) {
        throw new RuntimeException("不支持方法");
    }

    @Override
    public List<DefaultDto> unitedStatesSmashPlus(RequestParam param) {
        Data5UProxyProvider.Proxy p = null;
        try {
            p = Data5UProxyProvider.getProxy();
            if (p != null) {
                param.setProxy(true);
                param.setHost(p.getHost());
                param.setPort(p.getPort());
            }
        } catch (Exception e) {
            //do nothing
        }
        final Data5UProxyProvider.Proxy proxy = p;
        param.setUrl(String.format(SOUGOU_URL, param.getUrl()));
        Document document;
        Connection.Response response;
        try {
            response = doRequest(param);
            document = response.parse();
        } catch (Exception e) {
            return Lists.newArrayList();
        }
        Elements lis = document.body().select(".news-list").select("li");
        if (lis == null || lis.isEmpty()) {
            return Lists.newArrayList();
        }
        List<DefaultDto> defaultDtos = Lists.newArrayList();
        Map<String, String> headers = param.getHeaders();
        headers.put("Referer", "https://weixin.sogou.com/weixin");
        lis.stream().limit(5).forEach(li -> {
            try {
                String imgSrc = "https:" + StringEscapeUtils.unescapeHtml(li.select("img").attr("src"));
                String title = li.select("h3").select("a").text();
                String path = li.select("h3").select("a").attr("href");
                String text = li.select(".txt-info").text();
                String url = addRandomParam(SOUGOU_HOST + path);
                DefaultDto defaultDto = new DefaultDto(getRealUrl(url, headers, response.cookies(), proxy), ContentSource.SOUGOUWEIXIN.getCode());
                defaultDto.setPostTitle(title);
                defaultDto.setSourceAbstract(text);
                defaultDto.setContentType("0");
                defaultDto.setImageList(Lists.newArrayList(imgSrc));
                defaultDtos.add(defaultDto);
            } catch (Exception e) {
                //do nothing
            }
        });
        return defaultDtos;
    }

    private String getRealUrl(String url, Map<String, String> headers, Map<String, String> cookies, Data5UProxyProvider.Proxy proxy) throws Exception {
        Connection connection = Jsoup.connect(url).ignoreContentType(true).ignoreHttpErrors(true)
                .headers(headers).timeout(3000).cookies(cookies);
        if (proxy != null) {
            connection.proxy(proxy.getHost(), proxy.getPort());
        }
        String body = connection.execute().body();
        return body.split("var url = '';")[1].split(" url.replace")[0]
                .replaceAll("url \\+= '", "")
                .replaceAll("';", "")
                .replaceAll("\\r", "")
                .replaceAll("\\n", "")
                .replaceAll(" ", "")
                .replace("@", "");

    }

    public static void main(String[] args) {
        String body = "<script>\n" +
                "    var url = '';\n" +
                "    url += 'http://mp.w';\n" +
                "    url += 'eixin.qq.co';\n" +
                "    url += 'm/s?src=11&';\n" +
                "    url += 'timestamp=1';\n" +
                "    url += '562739655&v';\n" +
                "    url += 'er=1719&sig';\n" +
                "    url += 'nature=V-dG';\n" +
                "    url += 'LMk-mIsFY1V';\n" +
                "    url += '13iLZPuOEeX';\n" +
                "    url += 'CQ304Uqilvm';\n" +
                "    url += 'khPx86gXa9oa3GcUuigkhRNon1NfhbcC-mQ8bq*COz*DQ5ug3P4LzTxBN8aM2VgGg37-LJhRkxuIuX2OH7V3yJoSbl5&new=1';\n" +
                "    url.replace(\"@\", \"\");\n" +
                "    window.location.replace(url)\n" +
                "</script>";
        String s = body.split("var url = '';")[1].split(" url.replace")[0]
                .replaceAll("url \\+= '", "")
                .replaceAll("';", "")
                .replaceAll("\\r", "")
                .replaceAll("\\n", "")
                .replaceAll(" ", "");
        System.err.println(s);
    }

    private static String addRandomParam(String url) {
        int i = StringUtils.indexOf(url, "url=") + K + 30;
        String substring = url.substring(i, i + 1);
        return url + String.format(RANDOM_PARAM, substring);
    }

    @Override
    protected void checkHost(String url) {
        //do nothing
    }
}
