package cn.com.duiba.wolf.utils;

import com.alibaba.dubbo.common.utils.StringUtils;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.commons.lang.StringEscapeUtils;
import org.owasp.html.*;

import java.util.Map;
import java.util.Set;

/**
 * Html 工具类
 * Created by wenqi.huang on 16/8/29.
 */
public class HtmlUtils {

    private static PolicyFactory policyBuilder = new HtmlPolicyBuilder()
            .allowAttributes("src","title").onElements("img")
            .allowAttributes("href").onElements("a")
            .allowAttributes("class","id").onElements("span")
            //.allowAttributes("class").globally()
            // Allow some URLs through.
            .allowStandardUrlProtocols()
            .allowElements(
                    "a", "label", "h1", "h2", "h3", "h4", "h5", "h6",
                    "p", "i", "b", "u", "strong", "em",
                    "sub", "sup", "strike", "center", "blockquote",
                    "hr", "br", "font", "span", "div", "img",
                    "ul", "ol", "li",
                    "dd", "dt", "dl",
                    "tbody", "thead", "tfoot","table", "td", "th", "tr","colgroup","col", "fieldset", "legend", "samp","cite", "pre", "code","big","small"
            )
            .toFactory();

    /**
     * 过滤掉不合法的标签,只保留白名单中的标签,比如 <a></a> <b></b> <p></p>等标签,具体标签见代码。
     *
     * @param htmlString html代码
     * @return 安全的html
     */
    public static String cleanXss(String htmlString) {
        if(StringUtils.isBlank(htmlString)){
            return htmlString;
        }
        htmlString = preprocess(htmlString);
        return policyBuilder.sanitize(htmlString);
    }

    //TODO 发现这个框架会把中文？！变成乱码,已经向官方组织提问,暂时先这样处理下
    @Deprecated
    private static String preprocess(String htmlString){
        htmlString = org.apache.commons.lang.StringUtils.replace(htmlString, "？","?");
        htmlString = org.apache.commons.lang.StringUtils.replace(htmlString, "！","!");//￥
        return htmlString;
    }

    /**
     * 过滤掉jsonString中不合法的标签,只保留白名单中的标签,比如 <a></a> <b></b> <p></p>等标签,具体标签见代码。
     * <br/><br/>
     * 举例:输入 {"name":"jack","address":"hangzhou &lt;script&gt;alert('i am bad');&lt;/script&gt;"}
     * <br/>
     * 输出:{"name":"jack","address":"hangzhou"}
     *
     * @param jsonString json字符串
     * @return 安全的json字符串
     */
    public static String cleanXssOfJsonString(String jsonString) {
        if(StringUtils.isBlank(jsonString)){
            return jsonString;
        }

        jsonString = preprocess(jsonString);

        if(jsonString.startsWith("[")){//jsonArray
            JSONArray jsonArray = cleanXssOfJsonArray(JSONArray.fromObject(jsonString));
            return jsonArray.toString();

        }else if(jsonString.startsWith("{")){//jsonObject
            JSONObject jsonObject = cleanXssOfJsonObject(JSONObject.fromObject(jsonString));
            return jsonObject.toString();
        }

        return jsonString;
    }

    private static JSONArray cleanXssOfJsonArray(JSONArray jsonArray) {
        if(jsonArray == null || jsonArray.isEmpty()){
            return jsonArray;
        }
        JSONArray retJsonArray = new JSONArray();
        for(int i=0;i<jsonArray.size();i++){
            Object object = jsonArray.get(i);
            if(object instanceof JSONArray){
                retJsonArray.add(cleanXssOfJsonArray((JSONArray)object));
            }else if(object instanceof JSONObject){
                retJsonArray.add(cleanXssOfJsonObject((JSONObject)object));
            }else if(object instanceof String){
                retJsonArray.add(cleanXss((String)object));
            }else{
                retJsonArray.add(object);
            }
        }
        return retJsonArray;
    }

    private static JSONObject cleanXssOfJsonObject(JSONObject jsonObject) {
        if(jsonObject == null || jsonObject.isEmpty()){
            return jsonObject;
        }
        JSONObject retJsonObject = new JSONObject();
        Set<Map.Entry<String, Object>> entrySet = jsonObject.entrySet();
        for(Map.Entry<String, Object> entry : entrySet){
            String key = entry.getKey();
            Object object = entry.getValue();
            if(object instanceof JSONArray){
                retJsonObject.put(key,cleanXssOfJsonArray((JSONArray)object));
            }else if(object instanceof JSONObject){
                retJsonObject.put(key,cleanXssOfJsonObject((JSONObject)object));
            }else if(object instanceof String){
                retJsonObject.put(key,cleanXss((String)object));
            }else{
                retJsonObject.put(key,object);
            }
        }

        return retJsonObject;
    }

    public static void main(String[] args){
        String html = "<img class='a' src='b.jpg'></img><script>alert('？？');</script><a class='a'  href='a.html'>？？？http://www.baidu.com？</a><div></div><h1 class='aa'>hello world发达</h1><h2 style='display:none;'></h2>";

        String cleanedHtml = HtmlUtils.cleanXss(html);
        System.out.println(cleanedHtml);


        JSONObject jsonObject1 = new JSONObject();
        jsonObject1.put("name","jack");
        jsonObject1.put("address",html);

        JSONArray nameArr = new JSONArray();
        nameArr.add("hello");
        nameArr.add("hello2");
        nameArr.add(jsonObject1);

        JSONObject jsonObject = new JSONObject();
        jsonObject.put("name","");
        jsonObject.put("address",html);
        JSONArray jsonArray = new JSONArray();
        jsonArray.add(jsonObject);
        jsonArray.add(jsonObject);
        jsonArray.add(jsonObject);
        String cleanedJsonString = HtmlUtils.cleanXssOfJsonString(jsonArray.toString());
        System.out.println(cleanedJsonString);

        cleanedJsonString = HtmlUtils.cleanXssOfJsonString(jsonObject.toString());
        System.out.println();
        System.out.println(cleanedJsonString);

    }

}
