package cn.com.duiba.nezha.compute.common.util

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.recommendation.Rating
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD

import scala.collection.Map
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._

/**
 * Created by pc on 2016/11/23.
 */
object SampleParser {


  def ratingParse(data: RDD[String], implicitPrefs: Boolean, sep: String, reduceValue: Double): RDD[Rating] = {

    /**
     * *MovieLens ratings are on a scale of
     * Must see
     * Will enjoy
     * It's okay
     * Fairly bad
     * Awful
     */

    val ratings = data.map { line =>
      val fields = line.split(sep)
      if (implicitPrefs) {
        Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - reduceValue)
      } else {
        Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble)
      }
    }

    ratingsDescPrint(ratings)
    ratings
  }

  def appRatingParse(data: RDD[String], implicitPrefs: Boolean, sep: String, reduceValue: Double): RDD[(Int, Rating)] = {

    /**
     * *MovieLens ratings are on a scale of
     * Must see
     * Will enjoy
     * It's okay
     * Fairly bad
     * Awful
     */

    val appRatings = data.map { line =>
      val fields = line.split(sep)
      if (implicitPrefs) {
        (fields(0).toInt, Rating(fields(1).toInt, fields(2).toInt, fields(3).toDouble - reduceValue))
      } else {
        (fields(0).toInt, Rating(fields(1).toInt, fields(2).toInt, fields(3).toDouble))
      }
    }

    appRatings
  }

  def ratingsDescPrint(data: RDD[Rating]): Unit = {
    //计算一共有多少样本数
    val numRatings = data.count()
    //计算一共有多少用户
    val numUsers = data.map(_.user).distinct().count()
    //计算应该有多少物品
    val numActivityTopics = data.map(_.product).distinct().count()

    println(s"Got $numRatings ratings from $numUsers users on $numActivityTopics numActivityTopics.")

  }


  def lRCTRSmapleParse(data: RDD[List[String]], featureIdxList: List[String], featuerIdxLocMap: Map[String, Int], dictUtil: CategoryFeatureDictUtil): RDD[(List[String], LabeledPoint)] = {

    val parsedData = data.map(line => {
      val pbuf = new ListBuffer[String]

      featureIdxList.foreach(featureIdx => {
        pbuf.append(line(featuerIdxLocMap(featureIdx)).toLowerCase)
      })

      val value = dictUtil.oneHotDoubleEncode(featureIdxList.asJava, pbuf.toList.asJava).toList

      (line, LabeledPoint(LabelUtil.getLabel(line(0)), Vectors.dense(value.toArray)))
    })

    parsedData
  }

  def lRCTRSmaple(data: RDD[List[String]], featureIdxList: List[String], featuerIdxLocMap: Map[String, Int], dictUtil: CategoryFeatureDictUtil): RDD[LabeledPoint] = {

    val parsedData = lRCTRSmapleParse(data, featureIdxList, featuerIdxLocMap, dictUtil)
    val retData = parsedData.map(_._2)
    retData
  }


}
