package cn.com.duiba.spider.util.maiquan.spider;

import cn.com.duiba.spider.util.maiquan.ImageUtil;
import cn.com.duiba.spider.util.maiquan.constant.ContentSource;
import cn.com.duiba.spider.util.maiquan.dto.DefaultDto;
import cn.com.duiba.spider.util.maiquan.exception.ErrorCode;
import cn.com.duiba.spider.util.maiquan.exception.MaiQuanSpiderException;
import cn.com.duiba.spider.util.maiquan.spider.AbstractSpider;
import cn.com.duiba.spider.util.maiquan.tts.TextUtil;
import cn.com.duiba.spider.util.maiquan.tts.baidu.keyword.BaiduKeywordUtil;
import cn.com.duiba.spider.util.maiquan.tts.baidu.summary.BaiduSummaryUtil;
import com.alibaba.fastjson.JSON;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:cn/com/duiba/spider/util/maiquan/spider/GongZhongHaoSpider.class */
public class GongZhongHaoSpider extends AbstractSpider {
    private static final String WXB_USER_INFO_URL = "https://data.wxb.com/account/index/%s?is_new=1";
    public static final GongZhongHaoSpider SPIDER = new GongZhongHaoSpider();

    private GongZhongHaoSpider() {
    }

    @Override // cn.com.duiba.spider.util.maiquan.spider.AbstractSpider
    public ContentSource getContentSource() {
        return ContentSource.GONGZHONGHAO;
    }

    @Override // cn.com.duiba.spider.util.maiquan.spider.AbstractSpider
    public DefaultDto unitedStatesSmash(AbstractSpider.RequestParam requestParam) {
        DefaultDto defaultDto = new DefaultDto(requestParam.getUrl(), Integer.valueOf(getContentSource().getCode()));
        Document doRequestDocument = doRequestDocument(requestParam);
        Elements select = doRequestDocument.body().select("div");
        Elements select2 = select.select(".rich_media_title").select("#activity-name");
        if (CollectionUtils.isEmpty(select2)) {
            throw new MaiQuanSpiderException(ErrorCode.E004.getCode(), "公众号文章标题获取失败");
        }
        String text = select2.text();
        defaultDto.setPostTitle(text);
        defaultDto.setSourceId(DigestUtils.md5Hex(text));
        Elements select3 = select.select(".rich_media_content");
        if (CollectionUtils.isEmpty(select3)) {
            throw new MaiQuanSpiderException(ErrorCode.E004.getCode(), "公众号文章内容获取失败");
        }
        defaultDto.setAuthor(new DefaultDto.Author(select.select(".profile_nickname").text(), getHeadImg(doRequestDocument.toString().split("var user_name = \"")[1].split("\";")[0])));
        List<String> list = (List) ((Element) select3.get(0)).select("img").stream().filter(element -> {
            try {
                return Integer.valueOf(StringUtils.replace(element.attr("width"), "px", "")).intValue() - 300 >= 0;
            } catch (Exception e) {
                return true;
            }
        }).map(element2 -> {
            return element2.attr("data-src");
        }).filter(str -> {
            return (StringUtils.containsIgnoreCase(str, "mmbiz_gif") || StringUtils.containsIgnoreCase(str, "wx_fmt=gif")) ? false : true;
        }).collect(Collectors.toList());
        defaultDto.setImageList(list);
        List<DefaultDto.Image> listByUrls = ImageUtil.listByUrls(list);
        defaultDto.setImages(listByUrls);
        if (list.size() != listByUrls.size()) {
            defaultDto.setImageList((List) listByUrls.stream().map((v0) -> {
                return v0.getUrl();
            }).collect(Collectors.toList()));
        }
        ArrayList newArrayList = Lists.newArrayList();
        text4Elements(select3, newArrayList);
        StringBuilder append = new StringBuilder(text).append("。");
        newArrayList.forEach(str2 -> {
            append.append(str2).append("\n");
        });
        defaultDto.setFullText(append.toString());
        defaultDto.setSourceAbstract(BaiduSummaryUtil.summary4Text(text, append.toString()));
        try {
            defaultDto.setTags(BaiduKeywordUtil.tag4Text(defaultDto.getPostTitle(), defaultDto.getFullText()));
        } catch (IOException e) {
        }
        return defaultDto;
    }

    private String getHeadImg(String str) {
        if (StringUtils.isNotBlank(str)) {
            return doRequestJSON(new AbstractSpider.RequestParam(String.format(WXB_USER_INFO_URL, str))).getJSONObject("data").getString("avatar");
        }
        throw new RuntimeException("头像获取失败");
    }

    private void text4Elements(Elements elements, List<String> list) {
        Iterator it = elements.iterator();
        while (it.hasNext()) {
            String text = ((Element) it.next()).text();
            if (StringUtils.isNotBlank(text)) {
                list.add(TextUtil.joinSplitor4Text(text));
            }
        }
    }

    @Override // cn.com.duiba.spider.util.maiquan.spider.AbstractSpider
    protected void checkHost(String str) {
    }

    public static void main(String[] strArr) {
        DefaultDto unitedStatesSmash = SPIDER.unitedStatesSmash(new AbstractSpider.RequestParam("https://mp.weixin.qq.com/s/NMocN_ciG9p6o0IVBZbiEQ"));
        unitedStatesSmash.setFullText("");
        System.err.println(JSON.toJSONString(unitedStatesSmash));
    }
}
