package cn.com.duiba.nezha.alg.alg.material;

import cn.com.duiba.nezha.alg.alg.vo.material.*;
import cn.com.duiba.nezha.alg.common.util.AssertUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;
import java.util.stream.Collectors;

public class MaterialMatch {

    private static final Logger logger = LoggerFactory.getLogger(MaterialMatch.class);

    // 广告利益点素材集合（亦可通过排序阶段使用素材标签进行区分）
    public static Set<Long> AdvertRelatedMaterialSet = new HashSet<Long>(){
        {
            add(28548L);
            add(28549L);
            add(28550L);
            add(28551L);
            add(28552L);
            add(28553L);
            add(28562L);
            add(28563L);
            add(28564L);
            add(28565L);
            add(28566L);
            add(28567L);
            add(28966L);
            add(28967L);
            add(28968L);
            add(28969L);
            add(28970L);
            add(28971L);
            add(28574L);
            add(28575L);
            add(28576L);
            add(28577L);
            add(28578L);
            add(28579L);

            add(28985L);
            add(28986L);
            add(28987L);
            add(28989L);
            add(28990L);
            add(28992L);

            add(28614L);
            add(28615L);
            add(28616L);
            add(28617L);
            add(28618L);
            add(28619L);
            add(28622L);
            add(28623L);
            add(28624L);
            add(28625L);
            add(28626L);
            add(28627L);
            add(28654L);
            add(28655L);
            add(28656L);
            add(28657L);
            add(28658L);
            add(28659L);
            add(28661L);
            add(28662L);
            add(28663L);
            add(28664L);
            add(28665L);
            add(28666L);
            add(28874L);
            add(28875L);
            add(28876L);
            add(28877L);
            add(28878L);
            add(28879L);
            add(28880L);
            add(28881L);
            add(28882L);
            add(28883L);
            add(28884L);
            add(28885L);
            add(28892L);
            add(28893L);
            add(28894L);
            add(28895L);
            add(28914L);
            add(28982L);
            add(28896L);
            add(28897L);
            add(28898L);
            add(28899L);
            add(28900L);
            add(28901L);
            add(28902L);
            add(28903L);
            add(28904L);
            add(28905L);
            add(28906L);
            add(28907L);
            add(28960L);
            add(28961L);
            add(28962L);
            add(28963L);
            add(28964L);
            add(28965L);
            add(28921L);
            add(28922L);
            add(28923L);
            add(28924L);
            add(28925L);
            add(28950L);

            add(28983L);
            add(28984L);
            add(28988L);
            add(28991L);
            add(28993L);
            add(28994L);

            add(28932L);
            add(28933L);
            add(28934L);
            add(28935L);
            add(28936L);
            add(28937L);
            add(28938L);
            add(28939L);
            add(28940L);
            add(28941L);
            add(28942L);
            add(28943L);
            add(28954L);
            add(28955L);
            add(28956L);
            add(28957L);
            add(28958L);
            add(28959L);
        }
    };


    /**
     * 召回
     *
     * @param materialRecallDo
     * @return MaterialExtractDo （包括ctr top100优质及试投内容池）
     */
//    public static MaterialExtractDo match(MaterialRecallDo materialRecallDo) {
//        return onlineVersion(materialRecallDo);
//    }

    /**
     * 召回
     *
     * @param materialRecallDo
     * @return MaterialExtractDo （包括ctr top100优质及试投内容池）
     */
    public static MaterialExtractDo match(MaterialRecallDo materialRecallDo) {
        return onlineVersionAddNewUsrRclWay(materialRecallDo);
    }

    /*
     * @description 随机策略，用于在初始版本上线，获取训练样本
     * @return 返回值说明
     * @date 2020/7/21
     */
    private static MaterialExtractDo randomStrategy(List<MaterialStatInfo> materialList) {
        MaterialExtractDo materialExtractDo = new MaterialExtractDo();

        // 随机策略，将可投素材放进作为优质素材池,ctr直接设置为0；同时试投池设置为空，不用做试投
        List<MaterialMatchDo> rawMaterialMatchDoList = new ArrayList<>(materialList.size() + 10);
        List<Long> materialExposeDoList = new ArrayList<>();
        materialList.forEach(materialStatInfo -> {
            MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(), 0.0, 0L, 0L);
            rawMaterialMatchDoList.add(materialMatchDo);
        });
        materialExtractDo.setMaterialMatchDoList(rawMaterialMatchDoList);
        materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
        return materialExtractDo;
    }

    /*
     * @description 线上真实版本
     * @return 返回值说明
     * @date 2020/7/21
     */
    private static MaterialExtractDo onlineVersion(MaterialRecallDo materialRecallDo) {
        List<MaterialStatInfo> materialList = materialRecallDo.getMaterialList() ;
        List<MaterialMatchDo> rawMaterialMatchDoList = new ArrayList<>(600);
        List<Long> materialExposeDoList = new ArrayList<>(600);
        List<MaterialMatchDo> advertRelatedMaterialDoList = new ArrayList<>(30);
        MaterialExtractDo materialExtractDo = new MaterialExtractDo();
        Map<Long,MaterialMatchDo> materialMatchDoMap = new HashMap<>();
        double exposeMaxCount = 500;
        // 实时数据为空的处理方式
        if (!AssertUtil.isNotEmpty(materialList) || materialList.size() == 0) {
            // 空ctr优质
            materialExtractDo.setMaterialMatchDoList(rawMaterialMatchDoList);
            // 空试投
            materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
            materialExtractDo.setMaterialCostMatchDoList(materialRecallDo.getMaterialCostMatchDoList());
            materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);
            materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
            return materialExtractDo;
        }

        int materialSize = materialList.size();
        int shortCutSize = 100;
        // 可投素材池的个数少于100，直接全量作为优质素材池；同时试投池设置为空，不用做试投
        if (materialSize <= shortCutSize) {
            materialList.stream()
                    .filter(material -> !AdvertRelatedMaterialSet.contains(material.getMaterialId()))
                    .forEach(materialStatInfo -> {
                double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(), wilsonScore,
                        materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                rawMaterialMatchDoList.add(materialMatchDo);
                materialMatchDoMap.put(materialStatInfo.getMaterialId(),materialMatchDo);
            });
            materialExtractDo.setMaterialMatchDoList(rawMaterialMatchDoList);
            // 空试投
            materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
            // 空离线召回
            materialExtractDo.setMaterialCostMatchDoList(new ArrayList<>());
            // 可投列表集合
            materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
            // 广告利益点素材集合
            materialList.stream()
                    .filter(material -> AdvertRelatedMaterialSet.contains(material.getMaterialId()))
                    .forEach(materialStatInfo -> {
                                double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                                MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(),
                                        wilsonScore, materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                                advertRelatedMaterialDoList.add(materialMatchDo);
                            });
            materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);

            return materialExtractDo;
        }

        // 可投素材池大于100，筛选ctr top50素材池 + 试投素材池
        Set<Long> materialSet = new HashSet<>();
        for (MaterialStatInfo materialStatInfo : materialList) {
            double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
            MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(), wilsonScore,
                    materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
            Long materialId = materialStatInfo.getMaterialId();
            materialSet.add(materialId);
            if (AdvertRelatedMaterialSet.contains(materialId)) {
                advertRelatedMaterialDoList.add(materialMatchDo);
            } else {
                rawMaterialMatchDoList.add(materialMatchDo);
            }
            // 试投池的逻辑为：最近三天的曝光量少于exposeMaxCount
            if (materialStatInfo.getExposeCnt() <= exposeMaxCount) {
                materialExposeDoList.add(materialId);
            }
            materialMatchDoMap.put(materialId, materialMatchDo);
        }
        // 根据ctr排序做截断
        int ctrSize = shortCutSize / 2;
        List<MaterialMatchDo> filterMaterialMatchDoList = rawMaterialMatchDoList
                .stream()
                .sorted(Comparator.comparing(MaterialMatchDo::getCtr).reversed())
                .limit(ctrSize).collect(Collectors.toList());

        materialExtractDo.setMaterialMatchDoList(filterMaterialMatchDoList);
        materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
        materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
        // 需要对离线召回的结果做线上可投素材集合的交集（过滤部分被屏蔽的素材以及广告利益点素材）
        List<MaterialCostMatchDo> materialCostMatchDos = materialRecallDo.getMaterialCostMatchDoList();
        if (AssertUtil.isNotEmpty(materialList) && AssertUtil.isNotEmpty(materialCostMatchDos)) {
            List<MaterialCostMatchDo> filterMaterialCostMatchDos =  materialCostMatchDos.stream()
                    .filter(materialCostMatchDo -> materialSet.contains(materialCostMatchDo.getMaterialId())
                            && !AdvertRelatedMaterialSet.contains(materialCostMatchDo.getMaterialId()))
                    .collect(Collectors.toList());
            materialExtractDo.setMaterialCostMatchDoList(filterMaterialCostMatchDos);
        } else {
            materialExtractDo.setMaterialCostMatchDoList(new ArrayList<>());
        }
        materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);
        return materialExtractDo;
    }

    /*
     * @description 线上真实版本
     * @return 返回值说明
     * @date 2020/11/30
     */
    private static MaterialExtractDo onlineVersionAddNewUsrRclWay(MaterialRecallDo materialRecallDo) {
        List<MaterialStatInfo> materialList = materialRecallDo.getMaterialList() ;
        List<MaterialMatchDo> rawMaterialMatchDoList = new ArrayList<>(600);
//        List<MaterialCTRMatchNewUsrDo> rawMaterialCTRMatchNewUsrDoList = new ArrayList<>(600);
        List<Long> materialExposeDoList = new ArrayList<>(600);
        List<MaterialMatchDo> advertRelatedMaterialDoList = new ArrayList<>(30);
        MaterialExtractDo materialExtractDo = new MaterialExtractDo();
        Map<Long,MaterialMatchDo> materialMatchDoMap = new HashMap<>();
        double exposeMaxCount = 500;

        // 新增针对新用户的素材分时召回通道，用于对新用户的入口素材推荐
//        List<MaterialCTRMatchNewUsrDo> materialCTRMatchNewUsrDos = materialRecallDo.getMaterialCTRMatchNewUsrDoList();
//        if(AssertUtil.isNotEmpty(materialCTRMatchNewUsrDos)){
////            materialCTRMatchNewUsrDos
////                    .forEach(materialCTRMatchNewUsrDo -> {
////                        double wilsonScoreForNewUsr = calWilsonScore(materialCTRMatchNewUsrDo.getExposeCnt(), materialCTRMatchNewUsrDo.getClickCnt());
////                        MaterialCTRMatchNewUsrDo materialCTRMatchNewUsrDoWilsonCtr  = new MaterialCTRMatchNewUsrDo(materialCTRMatchNewUsrDo.getSlotId(),
////                                materialCTRMatchNewUsrDo.getMaterialId(), materialCTRMatchNewUsrDo.getHoursBucket(), materialCTRMatchNewUsrDo.getExposeCnt(),
////                                materialCTRMatchNewUsrDo.getClickCnt(), wilsonScoreForNewUsr, materialCTRMatchNewUsrDo.getDaysBeforeToday());
////                        rawMaterialCTRMatchNewUsrDoList.add(materialCTRMatchNewUsrDoWilsonCtr);
////                    });
////            materialExtractDo.setMaterialCTRMatchNewUsrDoList(rawMaterialCTRMatchNewUsrDoList);
//            materialExtractDo.setMaterialCTRMatchNewUsrDoList(materialCTRMatchNewUsrDos);
//        }else{
//            materialExtractDo.setMaterialCTRMatchNewUsrDoList(new ArrayList<>());
//        }

        // 实时数据为空的处理方式
        if (!AssertUtil.isNotEmpty(materialList) || materialList.size() == 0) {
            // 空ctr优质
            materialExtractDo.setMaterialMatchDoList(rawMaterialMatchDoList);
            // 空试投
            materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
            materialExtractDo.setMaterialCostMatchDoList(materialRecallDo.getMaterialCostMatchDoList());
            materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);
            materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
            return materialExtractDo;
        }

        int materialSize = materialList.size();
        int shortCutSize = 100;
        // 可投素材池的个数少于100，直接全量作为优质素材池；同时试投池设置为空，不用做试投
        if (materialSize <= shortCutSize) {
            materialList.stream()
                    .filter(material -> !AdvertRelatedMaterialSet.contains(material.getMaterialId()))
                    .forEach(materialStatInfo -> {
                        double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                        MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(), wilsonScore,
                                materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                        rawMaterialMatchDoList.add(materialMatchDo);
                        materialMatchDoMap.put(materialStatInfo.getMaterialId(),materialMatchDo);
                    });
            materialExtractDo.setMaterialMatchDoList(rawMaterialMatchDoList);
            // 空试投
            materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
            // 空离线召回
            materialExtractDo.setMaterialCostMatchDoList(new ArrayList<>());
            // 可投列表集合
            materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
            // 广告利益点素材集合
            materialList.stream()
                    .filter(material -> AdvertRelatedMaterialSet.contains(material.getMaterialId()))
                    .forEach(materialStatInfo -> {
                        double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                        MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(),
                                wilsonScore, materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
                        advertRelatedMaterialDoList.add(materialMatchDo);
                    });
            materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);

            return materialExtractDo;
        }

        // 可投素材池大于100，筛选ctr top50素材池 + 试投素材池
        Set<Long> materialSet = new HashSet<>();
        for (MaterialStatInfo materialStatInfo : materialList) {
            double wilsonScore = calWilsonScore(materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
            MaterialMatchDo materialMatchDo = new MaterialMatchDo(materialStatInfo.getMaterialId(), wilsonScore,
                    materialStatInfo.getExposeCnt(), materialStatInfo.getClickCnt());
            Long materialId = materialStatInfo.getMaterialId();
            materialSet.add(materialId);
            if (AdvertRelatedMaterialSet.contains(materialId)) {
                advertRelatedMaterialDoList.add(materialMatchDo);
            } else {
                rawMaterialMatchDoList.add(materialMatchDo);
            }
            // 试投池的逻辑为：最近三天的曝光量少于exposeMaxCount
            if (materialStatInfo.getExposeCnt() <= exposeMaxCount) {
                materialExposeDoList.add(materialId);
            }
            materialMatchDoMap.put(materialId, materialMatchDo);
        }
        // 根据ctr排序做截断
        int ctrSize = shortCutSize / 2;
        List<MaterialMatchDo> filterMaterialMatchDoList = rawMaterialMatchDoList
                .stream()
                .sorted(Comparator.comparing(MaterialMatchDo::getCtr).reversed())
                .limit(ctrSize).collect(Collectors.toList());

        materialExtractDo.setMaterialMatchDoList(filterMaterialMatchDoList);
        materialExtractDo.setMaterialExposeDoList(materialExposeDoList);
        materialExtractDo.setMaterialMatchDoMap(materialMatchDoMap);
        // 需要对离线召回的结果做线上可投素材集合的交集（过滤部分被屏蔽的素材以及广告利益点素材）
        List<MaterialCostMatchDo> materialCostMatchDos = materialRecallDo.getMaterialCostMatchDoList();
        if (AssertUtil.isNotEmpty(materialList) && AssertUtil.isNotEmpty(materialCostMatchDos)) {
            List<MaterialCostMatchDo> filterMaterialCostMatchDos =  materialCostMatchDos.stream()
                    .filter(materialCostMatchDo -> materialSet.contains(materialCostMatchDo.getMaterialId())
                            && !AdvertRelatedMaterialSet.contains(materialCostMatchDo.getMaterialId()))
                    .collect(Collectors.toList());
            materialExtractDo.setMaterialCostMatchDoList(filterMaterialCostMatchDos);
        } else {
            materialExtractDo.setMaterialCostMatchDoList(new ArrayList<>());
        }
        materialExtractDo.setMaterialAdvertRelatedDoList(advertRelatedMaterialDoList);
        return materialExtractDo;
    }

    /*
     * @description 计算calWilson分数
     * @return 返回值说明
     * @date 2020/7/21
     */
    private static double calWilsonScore(Long exposeCnt, Long clickCnt) {
        // 计算 wilson 置信分数，取90%置信度
        if (AssertUtil.isAnyEmpty(exposeCnt,clickCnt)) {
            return 0.0;
        }
        if (exposeCnt < clickCnt) {
            return 0.0;
        }
        if (exposeCnt == 0) {
            return 0.0;
        }
        double ratio = clickCnt * 1.0 / exposeCnt;
        double faithLevel = 1.645;
        double faithSquare = faithLevel * faithLevel;
        return (ratio + (faithSquare / (2 * exposeCnt)) - faithLevel * Math.sqrt(4 * exposeCnt * ratio * (1 - ratio) + faithSquare) / (2 * exposeCnt))
                / (1 + faithSquare / exposeCnt);
    }

}
