package spark.jobserver;

import com.typesafe.config.Config;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.spark.SparkContext;
import org.apache.spark.ml.clustering.KMeans;
import org.apache.spark.ml.feature.StandardScaler;
import org.apache.spark.ml.feature.VectorAssembler;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import scala.Array$;
import scala.Option;
import scala.Predef$;
import scala.Tuple3;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import spark.jobserver.NamedRddSupport;

/* compiled from: KMeansExample.scala */
/* loaded from: input_file:spark/jobserver/KMeansExample$.class */
public final class KMeansExample$ implements SparkJob, NamedRddSupport {
    public static final KMeansExample$ MODULE$ = null;
    private final int NUM_ITERATIONS;
    private final int K;
    private final AtomicReference<NamedRdds> namedRddsPrivate;

    static {
        new KMeansExample$();
    }

    public AtomicReference<NamedRdds> namedRddsPrivate() {
        return this.namedRddsPrivate;
    }

    public void spark$jobserver$NamedRddSupport$_setter_$namedRddsPrivate_$eq(AtomicReference atomicReference) {
        this.namedRddsPrivate = atomicReference;
    }

    public NamedRdds namedRdds() {
        return NamedRddSupport.class.namedRdds(this);
    }

    public int NUM_ITERATIONS() {
        return this.NUM_ITERATIONS;
    }

    public int K() {
        return this.K;
    }

    public SparkJobValidation validate(SparkContext sparkContext, Config config) {
        return SparkJobValid$.MODULE$;
    }

    public Tuple3<String[], String[], Object> runJob(SparkContext sparkContext, Config config) {
        SQLContext sQLContext = new SQLContext(sparkContext);
        NamedRdds namedRdds = namedRdds();
        Option option = namedRdds.get("kmeans", namedRdds.get$default$2("kmeans"));
        if (option.isDefined()) {
            RDD cache = ((RDD) option.get()).cache();
            return sampleAndReturn(sQLContext.createDataFrame(cache, ((Row[]) cache.take(1))[0].schema()));
        }
        DataFrame limit = sQLContext.read().parquet(Predef$.MODULE$.wrapRefArray(new String[]{"s3n://us-east-1.elasticmapreduce.samples/flightdata/input"})).limit((int) 5000000.0d);
        Column[] columnArr = (Column[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(limit.dtypes()).filter(new KMeansExample$$anonfun$1())).map(new KMeansExample$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).map(new KMeansExample$$anonfun$3(limit), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)));
        DataFrame cache2 = new VectorAssembler().setInputCols((String[]) Predef$.MODULE$.refArrayOps(columnArr).map(new KMeansExample$$anonfun$4(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).setOutputCol("Features").transform(limit.select(Predef$.MODULE$.wrapRefArray(columnArr)).repartition(50).na().fill(0.0d)).cache();
        StandardScaler standardScaler = new StandardScaler();
        standardScaler.setWithMean(false);
        standardScaler.setWithStd(true);
        standardScaler.setInputCol("Features");
        standardScaler.setOutputCol("ScaledFeatures");
        DataFrame cache3 = standardScaler.fit(cache2).transform(cache2).cache();
        cache3.foreach(new KMeansExample$$anonfun$runJob$1());
        cache2.unpersist();
        KMeans kMeans = new KMeans();
        kMeans.setK(K());
        kMeans.setMaxIter(NUM_ITERATIONS());
        kMeans.setFeaturesCol("ScaledFeatures");
        kMeans.setPredictionCol("Output");
        DataFrame cache4 = kMeans.fit(cache3).transform(cache3).cache();
        NamedRdds namedRdds2 = namedRdds();
        namedRdds2.update("kmeans", new KMeansExample$$anonfun$runJob$2(cache4), namedRdds2.update$default$3(), namedRdds2.update$default$4());
        return sampleAndReturn(cache4);
    }

    public Tuple3<String[], String[], Object> sampleAndReturn(DataFrame dataFrame) {
        DataFrame sample = dataFrame.drop("Features").drop("ScaledFeatures").sample(false, 1000 / dataFrame.count());
        return new Tuple3<>(sample.columns(), sample.toJSON().collect(), BoxesRunTime.boxToLong(sample.count()));
    }

    private KMeansExample$() {
        MODULE$ = this;
        NamedRddSupport.class.$init$(this);
        this.NUM_ITERATIONS = 100;
        this.K = 7;
    }
}
