downloaded necessary libraries to connect redshift from locally installed spark cluster and launched pyspark with below command but i am getting below error message.
pyspark --conf spark.executor.extraClassPath=/usr/share/java/redshift-jdbc42-2.0.0.4.jar --driver-class-path /usr/share/java/redshift-jdbc42-2.0.0.4.jar --jars /usr/share/java/redshift-jdbc42-2.0.0.4.jar
df = spark.read.format("jdbc").option("url", "jdbc:redshift://host/dbname").option("driver", "com.amazon.redshift.jdbc42.driver").option("dbtable", "tablename").option("user", "username").option("password", "password").load()
error
File "/usr/local/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o42.load.
: java.lang.ClassNotFoundException: com.amazon.redshift.jdbc42.driver
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry$.register(DriverRegistry.scala:46)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.$anonfun$driverClass$1(JDBCOptions.scala:102)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.$anonfun$driverClass$1$adapted(JDBCOptions.scala:102)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.<init>(JDBCOptions.scala:102)
at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.<init>(JDBCOptions.scala:38)
at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:32)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:354)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:326)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$3(DataFrameReader.scala:308)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:308)
at org.apache.spark
Not
"com.amazon.redshift.jdbc42.driver"
, but"com.amazon.redshift.jdbc42.Driver"
.