package com.jxd
import org.apache.spark.SparkContext import org.apache.spark.SparkConf import java.sql.Connection import java.sql.DriverManager object hello { def main(args: Array[String]): Unit = { var conf = new SparkConf().setAppName("Hello World") var sc = new SparkContext(conf) var input = sc.textFile("test/hello", 2) var count = input.flatMap(name => name.split(" ")).map((_, 1)).reduceByKey(((a, b) => a + b)) count.foreachPartition(insertToMysql)
} def insertToMysql(iterator: Iterator[(String, Int)]): Unit = { val driver = "com.mysql.jdbc.Driver" val url = "jdbc:mysql://192.168.10.58:3306/test" val username = "root" val password = "1" var connectionMqcrm: Connection = null Class.forName(driver) connectionMqcrm = DriverManager.getConnection(url, username, password) val sql = "INSERT INTO t_spark (`name`,`num`) VALUES (?,?)" iterator.foreach(data => { val statement = connectionMqcrm.prepareStatement(sql) statement.setString(1, data._1) statement.setInt(2, data._2) var result = statement.executeUpdate() if (result == 1) { println("写入mysql成功.............") } }) } }
Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at java.net.URLClassLoader.findClass(URLClassLoader.java:381) at java.lang.ClassLoader.loadClass(ClassLoader.java:424) at java.lang.ClassLoader.loadClass(ClassLoader.java:357) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at com.jxd.hello$.insertToMysql(hello.scala:22) at com.jxd.hello$$anonfun$main$1.apply(hello.scala:13) at com.jxd.hello$$anonfun$main$1.apply(hello.scala:13) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926) at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at org.apache.spark.scheduler.Task.run(Task.scala:108) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
spark1.4以前版本 在/spark/jars 加入mysql驱动 并不起作用需在提交任务指定mysql驱动包
例如:
spark-submit --master spark://192.168.10.160:7077 --driver-class-path /usr/spark/jars/mysql-connector-java-5.1.18-bin.jar --class com.jxd.hello /usr/spark/wc.jar /usr/spark/test/hello
高版本如 spark2.2已经修复此问题
注意:集群中每一个server都得加入mysql驱动包(建议先加一个 然后采用远程复制)
集群所有节点加完mysql驱动包后直接提交即可
park-submit --master spark://192.168.10.160:7077 --class com.jxd.hello /usr/spark/wc.jar /usr/spark/test/hello |