Skip to content

Commit 0225342

Browse files
author
Andrea Baita
committed
Update to spark 3.4.1, remove obsolete sbt plugins
1 parent db01dcf commit 0225342

File tree

8 files changed

+36
-63
lines changed

8 files changed

+36
-63
lines changed

build.sbt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,16 @@ lazy val root = Project("spark-knn", file(".")).
88
lazy val core = knnProject("spark-knn-core").
99
settings(
1010
name := "spark-knn",
11-
spName := "saurfang/spark-knn",
1211
credentials += Credentials(Path.userHome / ".ivy2" / ".sbtcredentials"),
1312
licenses += "Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")
1413
).
1514
settings(Dependencies.core).
1615
settings(
17-
scalafixDependencies in ThisBuild += "org.scalatest" %% "autofix" % "3.1.0.0",
16+
ThisBuild / scalafixDependencies += "org.scalatest" %% "autofix" % "3.1.0.1",
1817
addCompilerPlugin(scalafixSemanticdb) // enable SemanticDB
1918
)
2019

2120
lazy val examples = knnProject("spark-knn-examples").
2221
dependsOn(core).
2322
settings(fork in run := true, coverageExcludedPackages := ".*examples.*").
24-
settings(Dependencies.examples).
25-
settings(SparkSubmit.settings: _*)
23+
settings(Dependencies.examples)

project/Common.scala

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,19 @@ import com.typesafe.sbt.GitVersioning
22
import sbt._
33
import Keys._
44
import com.typesafe.sbt.GitPlugin.autoImport._
5-
import sbtsparkpackage.SparkPackagePlugin.autoImport._
65

76
import scala.language.experimental.macros
87
import scala.reflect.macros.Context
98

109
object Common {
1110
val commonSettings = Seq(
1211
organization in ThisBuild := "com.github.saurfang",
13-
javacOptions ++= Seq("-source", "1.8", "-target", "1.8"),
14-
scalacOptions ++= Seq("-target:jvm-1.8", "-deprecation", "-feature"),
12+
javacOptions ++= Seq("-source", "11", "-target", "11"),
13+
scalacOptions ++= Seq("-deprecation", "-feature"),
1514
//git.useGitDescribe := true,
1615
git.baseVersion := "0.0.1",
1716
parallelExecution in test := false,
18-
updateOptions := updateOptions.value.withCachedResolution(true),
19-
sparkVersion := "3.1.2",
20-
sparkComponents += "mllib",
21-
spIgnoreProvided := true
17+
updateOptions := updateOptions.value.withCachedResolution(true)
2218
)
2319

2420
def knnProject(path: String): Project = macro knnProjectMacroImpl

project/Dependencies.scala

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,27 @@ import Keys._
33

44
object Dependencies {
55
val Versions = Seq(
6-
crossScalaVersions := Seq("2.12.8", "2.11.12"),
6+
crossScalaVersions := Seq("2.12.18", "2.13.12"),
77
scalaVersion := crossScalaVersions.value.head
88
)
99

1010
object Compile {
11-
val breeze_natives = "org.scalanlp" %% "breeze-natives" % "1.0" % "provided"
11+
val spark_version = "3.4.1"
12+
val spark_core = "org.apache.spark" %% "spark-core" % spark_version % "provided"
13+
val spark_mllib = "org.apache.spark" %% "spark-mllib" % spark_version % "provided"
14+
val breeze = "org.scalanlp" %% "breeze" % "2.1.0" % "provided"
15+
val netlib = "com.github.fommil.netlib" % "core" % "1.1.2"
1216

1317
object Test {
14-
val scalatest = "org.scalatest" %% "scalatest" % "3.1.0" % "test"
15-
val sparktest = "org.apache.spark" %% "spark-core" % "3.1.2" % "test" classifier "tests"
18+
val scalatest = "org.scalatest" %% "scalatest" % "3.2.17" % "test"
19+
val sparktest = "org.apache.spark" %% "spark-core" % spark_version % "test" classifier "tests"
1620
}
1721
}
1822

1923
import Compile._
2024
import Test._
2125
val l = libraryDependencies
2226

23-
val core = l ++= Seq(scalatest, sparktest)
24-
val examples = core +: (l ++= Seq(breeze_natives))
27+
val core = l ++= Seq(spark_core, spark_mllib, scalatest, sparktest)
28+
val examples = core +: (l ++= Seq(breeze, netlib))
2529
}

project/SparkSubmit.scala

Lines changed: 0 additions & 25 deletions
This file was deleted.

project/build.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
sbt.version = 0.13.18
1+
sbt.version = 1.9.7

project/plugins.sbt

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,6 @@
1-
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.3")
2-
3-
addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0")
4-
5-
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "0.8.5")
6-
7-
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")
8-
9-
addSbtPlugin("com.github.saurfang" % "sbt-spark-submit" % "0.0.4")
10-
11-
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.0")
12-
13-
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0"
14-
excludeAll ExclusionRule(organization = "com.danieltrinh"))
15-
libraryDependencies += "org.scalariform" %% "scalariform" % "0.1.8"
16-
17-
resolvers += "bintray-spark-packages" at "https://dl.bintray.com/spark-packages/maven/"
18-
addSbtPlugin("org.spark-packages" % "sbt-spark-package" % "0.2.6")
19-
20-
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.9.4")
1+
addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0")
2+
addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "1.0.2")
3+
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.4")
4+
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
5+
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.9")
6+
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.1")

spark-knn-core/src/main/scala/org/apache/spark/ml/classification/KNNClassifier.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import org.apache.spark.sql.types.{DoubleType, StructType}
1313
import org.apache.spark.sql.{DataFrame, Dataset, Row}
1414
import org.apache.spark.storage.StorageLevel
1515
import org.apache.spark.SparkException
16+
import org.apache.spark.sql.functions.col
1617

1718
import scala.collection.mutable.ArrayBuffer
1819

@@ -77,6 +78,12 @@ with KNNParams with HasWeightCol {
7778
/** @group setParam */
7879
def setSeed(value: Long): this.type = set(seed, value)
7980

81+
/** reimplemented as it has been removed removed from spark 3.4.0 */
82+
private def extractLabeledPoints(dataset: Dataset[_]): RDD[LabeledPoint] = {
83+
import dataset.sparkSession.implicits._
84+
dataset.select(col($(labelCol)), col($(featuresCol))).as[LabeledPoint].rdd
85+
}
86+
8087
override protected def train(dataset: Dataset[_]): KNNClassificationModel = {
8188
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
8289
val instances = extractLabeledPoints(dataset).map {

spark-knn-examples/src/main/scala/org/apache/spark/ml/classification/NaiveKNN.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import org.apache.spark.sql.types.{ArrayType, DoubleType, StructType}
1414
import org.apache.spark.sql.{DataFrame, Dataset, Row}
1515
import org.apache.spark.storage.StorageLevel
1616
import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
17+
import org.apache.spark.sql.functions.col
1718

1819
import scala.collection.mutable.ArrayBuffer
1920

@@ -26,6 +27,12 @@ class NaiveKNNClassifier(override val uid: String, val distanceMetric: DistanceM
2627

2728
override def copy(extra: ParamMap): NaiveKNNClassifier = defaultCopy(extra)
2829

30+
/** reimplemented as it has been removed removed from spark 3.4.0 */
31+
private def extractLabeledPoints(dataset: Dataset[_]): RDD[LabeledPoint] = {
32+
import dataset.sparkSession.implicits._
33+
dataset.select(col($(labelCol)), col($(featuresCol))).as[LabeledPoint].rdd
34+
}
35+
2936
override protected def train(dataset: Dataset[_]): NaiveKNNClassifierModel = {
3037
// Extract columns from data. If dataset is persisted, do not persist oldDataset.
3138
val instances = extractLabeledPoints(dataset).map {

0 commit comments

Comments
 (0)