class ProcessAvro(sparkSession: SparkSession, fileName: String) {
val df = readAvro()
def readAvro(): sql.DataFrame = {
val df = sparkSession.read.format("avro")
.load(fileName)
df
}
}
Exception in thread "main" org.apache.spark.sql.AnalysisException: Failed to find data source: avro. Avro is built-in but external data source module since Spark 2.4. Please deploy the application as per the deployment section of "Apache Avro Data Source Guide".;
Problem: SPARK does not know where to find the avro source.
Solution: https://bmwieczorek.wordpress.com/tag/avro/
scala> val df = spark.read.format("avro").load("myclass_avro")
org.apache.spark.sql.AnalysisException: Failed to find data source: avro. Please find an Avro package at http://spark.apache.org/third-party-projects.html;
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:634)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:190)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:174)
... 49 elided
scala> val df = spark.read.format("com.databricks.spark.avro").load("myclass_avro")
df: org.apache.spark.sql.DataFrame = [mystring: string, myboolean: boolean ... 4 more fields]
scala> df.show
+--------+---------+-----+------+----------+------------------+
|mystring|myboolean|myint|myLong| myfloat| mydouble|
+--------+---------+-----+------+----------+------------------+
| a| true| 1| 1|0.14285715|0.1428571492433548|
| b| false| 2| 2| 2.0| 2.0|
+--------+---------+-----+------+----------+------------------+
Fix: qualify the format.
val df = sparkSession.read.format("com.databricks.spark.avro")
.load(fileName)
package org.example
import org.apache.spark
import org.apache.spark.sql
import org.apache.spark.sql.SparkSession
class ProcessAvro(sparkSession: SparkSession, fileName: String) {
val df = readAvro()
def readAvro(): sql.DataFrame = {
val df = sparkSession.read.format("com.databricks.spark.avro")
.load(fileName)
df
}
}
/**
* Hello world!
*
*/
object Main extends App{
println( "Hello World!" )
val sparkSession = SparkSession.builder
.appName("Simple Application")
.config("spark.master", "local")
.getOrCreate()
val processAvro = new ProcessAvro(sparkSession,"file.avro")
val df = processAvro.readAvro()
No comments:
Post a Comment