build.sbt
name := "BigData"
version := "0.1"
scalaVersion := "2.12.7"
libraryDependencies += "com.github.tototoshi" %% "scala-csv" % "1.3.5"
// https://mvnrepository.com/artifact/org.apache.spark/spark-core
libraryDependencies += "org.apache.spark" %% "spark-core" % "2.4.0"
// https://mvnrepository.com/artifact/org.apache.spark/spark-sql
libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.0"
// https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc
libraryDependencies += "com.microsoft.sqlserver" % "mssql-jdbc" % "6.1.0.jre8" % Test
SparkMSSQL.scala
import org.apache.spark.sql.SparkSession
object SparkMSSQL {
def main(args: Array[String]): Unit = {
val spark = SparkSession.
builder.master("local[*]")
.appName("Simple Application")
.getOrCreate()
val url = "jdbc:sqlserver://localhost;databaseName=scalatest;integratedSecurity=true";
// Define database table to load into DataFrame
val jdbcDbTable = "dbo.user_profiles"
val df = spark
.read
.format("jdbc")
.option("url", url)
.option("dbtable", "dbo.user_profiles")
.load()
df.printSchema()
}
}
编译后出错
Exception in thread "main" java.sql.SQLException: No suitable driver
at java.sql.DriverManager.getDriver(DriverManager.java:315)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.$anonfun$driverClass$2(JDBCOptions.scala:105)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.<init>(JDBCOptions.scala:105)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.<init>(JDBCOptions.scala:35)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:32)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:318)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:167)
请指教,我的代码有什么问题?
最佳答案
首先,您的 jdbc 驱动程序位于 test
范围内,因此该 jar 可能不会在运行时加载。而且,spark 需要驱动程序类信息来创建 JDBC 连接,因此请尝试将以下选项添加到 DF 初始值设定项:
.option("driver", "com.microsoft.sqlserver.jdbc.SQLServerDriver")
关于sql-server - SparkSQL MS SQL Server,编译后获取消息 "No suitable driver",我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53577243/