Spark: IllegalArgumentException: "Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder':"

73 views Asked by At

During initiating my Spark session, I'm getting an error message related to Hive.

My Spark session config is as follows:

# Assuming available resources: 250 vcores, 1TB memory, 2 people sharing the queue

# Optimazed for 250M rows, 1,000 columns, 50% sparsity

 

from os import environ

from os.path import abspath

from pyspark.sql import SparkSession

import re

import getpass

 

environ["PYSPARK_PYTHON"]="/opt/cloudera/parcels/Anaconda/envs/python37/bin/python"

environ["PYSPARK_DRIVER_PYTHON"]="/opt/cloudera/parcels/Anaconda/envs/python37/bin/python"

environ["SPARK_HOME"]="/opt/cloudera/parcels/CDH/lib/spark"

environ["YARN_CONF_DIR"]="/etc/hive/conf"

environ["ARROW_LIBHDFS_DIR"]="/opt/cloudera/parcels/CDH/lib/hadoop/lib/native/"

environ["HTTPS_PROXY"]=https://proxy.p1at.s-group.cc:8080

environ["HTTP_PROXY"]=http://proxy.p1at.s-group.cc:8080

environ["HADOOP_CONF_DIR"]="/etc/hive/conf"

 

spark = SparkSession \

  .builder \

  .config("spark.master", "yarn") \

  .appName("CRM: AT-PRK-EY-LFTO") \

  .config("spark.yarn.queue","root.project.mars") \

  .config("spark.sql.warehouse.dir", abspath('spark-warehouse')) \

  .config("spark.executor.memory", "16g") \

  .config("spark.driver.memory", "20g") \

  .config("spark.dynamicAllocation.enabled", "true") \

  .config("spark.dynamicAllocation.minExecutors", "1") \

  .config("spark.dynamicAllocation.maxExecutors", "25") \

  .config("spark.executor.cores", "5") \

  .config("spark.memory.offHeap.enabled", "true") \

  .config("spark.memory.offHeap.size", "20g") \

  .config("spark.default.parallelism", "750") \

  .config("spark.sql.shuffle.partitions", "750") \

  .config("spark.kryoserializer.buffer.max", "512m") \

  .config("spark.network.timeout", "300s") \

  .config("spark.executor.heartbeatInterval", "60s") \

  .config("spark.speculation", "true") \

  .config("spark.speculation.interval", "10000ms") \

  .config("spark.speculation.multiplier", "1.5") \

  .config("spark.speculation.quantile", "0.75") \

  .config("spark.sql.files.maxPartitionBytes", "512m") \

  .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC -XX:G1HeapRegionSize=16m -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=35") \

  .config("spark.driver.extraJavaOptions", "-XX:+UseG1GC -XX:G1HeapRegionSize=16m -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=35") \

  .enableHiveSupport() \

  .getOrCreate()

 

print("Spark Session OK - for all settings run spark.sparkContext.getConf().getAll()")

print(f'- Spark UI link: http://bdagb1node03.eb.lan.at:18088/history/{spark.sparkContext.applicationId}')

print(f'- application_id: {spark.sparkContext.applicationId}')

 

# .config('spark.submit.deployMode', deployMode)\

The error message is:


Py4JJavaError Traceback (most recent call last)

/opt/cloudera/parcels/MINIFORGE-2022.2/envs/crm_at_prk/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)

62 try:

63 return f(*a, **kw)

64 except py4j.protocol.Py4JJavaError as e:

/opt/cloudera/parcels/MINIFORGE-2022.2/envs/crm_at_prk/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)

327 "An error occurred while calling {0}{1}{2}.\n".

328 format(target_id, ".", name), value)

329 else:

Py4JJavaError: An error occurred while calling o140.sessionState.

java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder':

During handling of the above exception, another exception occurred:

IllegalArgumentException Traceback (most recent call last)

/tmp/ipykernel_72512/653573609.py in

42 .config("spark.sql.files.maxPartitionBytes", "512m") \

43 .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC -XX:G1HeapRegionSize=16m -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=35") \

44 .config("spark.driver.extraJavaOptions", "-XX:+UseG1GC -XX:G1HeapRegionSize=16m -XX:MaxGCPauseMillis=500 -XX:InitiatingHeapOccupancyPercent=35") \

45 .enableHiveSupport() \

46 .getOrCreate()

/opt/cloudera/parcels/MINIFORGE-2022.2/envs/crm_at_prk/lib/python3.7/site-packages/pyspark/sql/session.py in getOrCreate(self)

181 session = SparkSession(sc)

182 for key, value in self._options.items():

183 session._jsparkSession.sessionState().conf().setConfString(key, value)

184 for key, value in self._options.items():

185 session.sparkContext._conf.set(key, value)

/opt/cloudera/parcels/MINIFORGE-2022.2/envs/crm_at_prk/lib/python3.7/site-packages/py4j/java_gateway.py in call(self, *args)

1321 answer = self.gateway_client.send_command(command)

1322 return_value = get_return_value(

1323 answer, self.gateway_client, self.target_id, self.name)

1324

1325 for temp_arg in temp_args:

/opt/cloudera/parcels/MINIFORGE-2022.2/envs/crm_at_prk/lib/python3.7/site-packages/pyspark/sql/utils.py in deco(*a, **kw)

77 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)

78 if s.startswith('java.lang.IllegalArgumentException: '):

79 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)

80 raise

81 return deco

IllegalArgumentException: "Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder':"

I'm running no more Spark applications under my user. Also, if you can validate or give me a feedback on the spark session configuration, that'd be great appreciated!

0

There are 0 answers