package cn.com.duiba.spider.util.maiquan;

import cn.com.duiba.spider.util.maiquan.constant.ContentSource;
import cn.com.duiba.spider.util.maiquan.dto.DefaultDto;
import cn.com.duiba.spider.util.maiquan.spider.AbstractSpider;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;

import java.io.IOException;
import java.util.List;


/**
 * @author: sjx
 * @date: 2019-05-27 14:49
 */
public class MaiQuanSpiderUtil {

    private static final String TX_NEWS_SEARCH_URL = "https://w.inews.qq.com/searchPage?pagefrom=top";


    private MaiQuanSpiderUtil() {
    }

    /**
     * 解析给定url对应的内容源类型
     *
     * @param url
     * @return ContentSource
     */
    public static ContentSource resloveSourceByUrl(String url) {
        if (StringUtils.isNotBlank(url)) {
            String host = StringUtils.split(url, "?")[0];
            if (StringUtils.indexOf(host, "www.meipian") > -1) {
                //美篇
                return ContentSource.MEIPIAN;
            }

            if (StringUtils.indexOf(host, "mp.weixin.qq.com") > -1) {
                //公众号
                return ContentSource.GONGZHONGHAO;
            }

            if (StringUtils.indexOf(host, "pipix.com") > -1 || StringUtils.indexOf(host, "ppx.ixigua.com") > -1) {
                //皮皮虾
                return ContentSource.PIPIXIA;
            }

            if (StringUtils.indexOf(host, "http://app.tiaoba360.com") > -1) {
                //跳吧广场舞
                return ContentSource.TIAOBA;
            }

            if (StringUtils.indexOf(host, "//www.pearvideo.com") > -1) {
                //梨视频
                return ContentSource.LISHIPIN;
            }

            if (StringUtils.indexOf(host, "//www.ixigua.com") > -1) {
                //西瓜视频
                return ContentSource.UNKNOWN;
            }
        }
        //未知
        return ContentSource.UNKNOWN;
    }

    /**
     * 根据指定参数爬取内容
     *
     * @param param
     * @return
     */
    public static DefaultDto smash(AbstractSpider.RequestParam param) {
        return resloveSourceByUrl(param.getUrl()).getSpider().unitedStatesSmash(param);
    }


    /**
     * 根据url爬取内容
     *
     * @param url
     * @return
     */
    public static DefaultDto smash(String url) {
        return smash(new AbstractSpider.RequestParam(url));
    }

    /**
     * 根据指定参数爬取内容，异常时会降级为ContentSource.UNKNOWN类型的内容(仅源网页的连接、标题和icon)
     *
     * @param param
     * @return
     */
    public static DefaultDto smashOnErrorWithUnknown(AbstractSpider.RequestParam param) {
        return resloveSourceByUrl(param.getUrl()).getSpider().smashOnErrorWithUnknown(param);
    }

    /**
     * 根据url爬取内容，异常时会降级为ContentSource.UNKNOWN类型的内容(仅源网页的连接、标题和icon)
     *
     * @param url
     * @return
     */
    public static DefaultDto smashOnErrorWithUnknown(String url) {
        return smashOnErrorWithUnknown(new AbstractSpider.RequestParam(url));
    }

    /**
     * 搜狗微信搜索关键字相关文章
     *
     * @param keywords 关键字列表
     * @return
     */
    public static List<DefaultDto> recommandBySougou(List<String> keywords) {
        if (CollectionUtils.isEmpty(keywords)) {
            return Lists.newArrayList();
        }
        String keyword = StringUtils.join(keywords, " ");
        return recommandBySougou(keyword);
    }

    /**
     * 获取腾讯热搜关键词
     *
     * @return 关键词列表
     */
    public static List<String> hotWord4TencentNews() {
        List<String> hotWords = Lists.newArrayList();
        try {
            Connection.Response execute = Jsoup.connect(TX_NEWS_SEARCH_URL).ignoreContentType(true)
                    .ignoreHttpErrors(true).timeout(3000).execute();
            JSONObject topWords = JSON.parseObject(execute.body()).getJSONObject("topWords");
            JSONArray alternates = topWords.getJSONArray("alternate");
            for (int i = 0; i < alternates.size(); i++) {
                JSONObject a = alternates.getJSONObject(i);
                hotWords.add(a.getString("word"));
            }
        } catch (IOException e) {
            //do nothing
        }
        return hotWords;
    }

    /**
     * 搜狗微信搜索关键字相关文章
     *
     * @param keyword 关键字，多个用空格分隔，如：a b c ...
     * @return
     */
    public static List<DefaultDto> recommandBySougou(String keyword) {
        return ContentSource.SOUGOUWEIXIN.getSpider().unitedStatesSmashPlus(new AbstractSpider.RequestParam(keyword));
    }

    public static void main(String[] args) throws Exception {
        DefaultDto smash = smash("https://www.ixigua.com/i6702308019285279243");
        System.err.println(JSON.toJSONString(smash));
        System.err.println(hotWord4TencentNews());
        //System.err.println(resloveSourceByUrl("https://www.meipian9.cn/11q3y9n2?from=singlemessage&share_from=others&share_user_mpuuid=a50e6309ca97cba747cc8d44371fbf63&user_id=56720922&utm_medium=meipian_android&utm_source=singlemessage&uuid=8e102f928432853af43058d41fbfc5f4&v=4.12.0"));
        /*AbstractSpider.RequestParam param = new AbstractSpider.RequestParam("https://www.meipian9.cn/11q3y9n2?from=singlemessage&share_from=others&share_user_mpuuid=a50e6309ca97cba747cc8d44371fbf63&user_id=56720922&utm_medium=meipian_android&utm_source=singlemessage&uuid=8e102f928432853af43058d41fbfc5f4&v=4.12.0");
        DefaultDto smash = smash(param);
        System.err.println(JSON.toJSONString(smash));

        AbstractSpider.RequestParam param1 = new AbstractSpider.RequestParam("http://m.baidu.com/s?word=%E7%99%BE%E5%BA%A6&from=1017188g&ua=bd_meizu*m9_chrome&tn=ntc");
        DefaultDto smash1 = smash(param1);
        System.err.println(JSON.toJSONString(smash1));

        AbstractSpider.RequestParam param2 = new AbstractSpider.RequestParam("https://mp.weixin.qq.com/s?__biz=MzA5OTA0NDIyMQ==&mid=2653902389&idx=1&sn=e2065071c204a51188a7fa1369982ba6&chksm=8b53a827bc2421315c57d6acd26a353d2b378bf778db0277953d0a8fb667f13070d9b29179c7");
        DefaultDto smash2 = smash(param2);
        System.err.println(JSON.toJSONString(smash2));

        AbstractSpider.RequestParam param2 = new AbstractSpider.RequestParam("https://mp.weixin.qq.com/s/UPQ2Ok2vcYAnta1USOTyQw");
        DefaultDto smash2 = smash(param2);
        System.err.println(JSON.toJSONString(smash2));*/

        //List<DefaultDto> defaultDtos = recommandBySougou(Lists.newArrayList("鸡汤", "哲理"));
        //System.err.println(JSON.toJSONString(defaultDtos));


        /*Map<String, String> headers = Maps.newHashMap();
        headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36");
        String SOUGOU_HOST = "https://weixin.sogou.com";
        String SOUGOU_URL = SOUGOU_HOST + "/weixin?type=2&s_from=input&query=鸡汤&ie=utf8";
        Connection connection = Jsoup.connect(SOUGOU_URL).proxy("127.0.0.1",7078).headers(headers).ignoreHttpErrors(true).ignoreContentType(true);
        Connection.Response execute = connection.execute();


        System.err.println(execute.body());
        headers.put("Referer", "https://weixin.sogou.com/weixin");
        String url = "https://weixin.sogou.com/link?url=dn9a_-gY295K0Rci_xozVXfdMkSQTLW6cwJThYulHEtVjXrGTiVgSwaaJJjZkknWkkGOFBoxxK5AqmMXYXFtRlqXa8Fplpd9RKKS0BiZmXk1GQwF_3wFzeHoXxAIPuUaDw6uXN1FjujMgtEdpUJH_ZMo8C9wKC1RZm0q4cTpYFLxF3x8pZEqi2-SE2yzSfpUveAxcRtCfvRi2qgzcR0QNFsWTgCDX8CVGiiBwLlvBy_hOchlSGkGHh4HL9hwVvbA7s0iSTZH4oBQ_LeJW-Rhtg..&type=2&query=%E9%B8%A1%E6%B1%A4 %E5%93%B2%E7%90%86&k=51&h=m";
        Connection headers1 = Jsoup.connect(url).ignoreHttpErrors(true).ignoreContentType(true)
                .headers(headers);
        String body = headers1.proxy("127.0.0.1",7078).cookies(execute.cookies()).execute().body();
        System.err.println(body);*/
    }
}
