Linked Questions

Popular Questions

win10, python 3.9.7

Just installed pyspark and tried my first three lines of code below:

import pyspark
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[1], line 4
      1 import pyspark
      2 from pyspark.sql import SparkSession
----> 4 spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()

File ~\Anaconda3\envs\pyspark_env\Lib\site-packages\pyspark\sql\session.py:269, in SparkSession.Builder.getOrCreate(self)
    267     sparkConf.set(key, value)
    268 # This SparkContext may be an existing one.
--> 269 sc = SparkContext.getOrCreate(sparkConf)
    270 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
    271 # by all sessions.
    272 session = SparkSession(sc, options=self._options)

File ~\Anaconda3\envs\pyspark_env\Lib\site-packages\pyspark\context.py:483, in SparkContext.getOrCreate(cls, conf)
    481 with SparkContext._lock:
    482     if SparkContext._active_spark_context is None:
--> 483         SparkContext(conf=conf or SparkConf())
    484     assert SparkContext._active_spark_context is not None
    485     return SparkContext._active_spark_context

File ~\Anaconda3\envs\pyspark_env\Lib\site-packages\pyspark\context.py:195, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls, udf_profiler_cls)
    189 if gateway is not None and gateway.gateway_parameters.auth_token is None:
    190     raise ValueError(
    191         "You are trying to pass an insecure Py4j gateway to Spark. This"
    192         " is not allowed as it is a security risk."
    193     )
--> 195 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
    196 try:
    197     self._do_init(
    198         master,
    199         appName,
   (...)
    208         udf_profiler_cls,
    209     )

File ~\Anaconda3\envs\pyspark_env\Lib\site-packages\pyspark\context.py:417, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
    415 with SparkContext._lock:
    416     if not SparkContext._gateway:
--> 417         SparkContext._gateway = gateway or launch_gateway(conf)
    418         SparkContext._jvm = SparkContext._gateway.jvm
    420     if instance:

File ~\Anaconda3\envs\pyspark_env\Lib\site-packages\pyspark\java_gateway.py:99, in launch_gateway(conf, popen_kwargs)
     96     proc = Popen(command, **popen_kwargs)
     97 else:
     98     # preexec_fn not supported on Windows
---> 99     proc = Popen(command, **popen_kwargs)
    101 # Wait for the file to appear, or for the process to exit, whichever happens first.
    102 while not proc.poll() and not os.path.isfile(conn_info_file):

File ~\Anaconda3\envs\pyspark_env\Lib\subprocess.py:1024, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)
   1020         if self.text_mode:
   1021             self.stderr = io.TextIOWrapper(self.stderr,
   1022                     encoding=encoding, errors=errors)
-> 1024     self._execute_child(args, executable, preexec_fn, close_fds,
   1025                         pass_fds, cwd, env,
   1026                         startupinfo, creationflags, shell,
   1027                         p2cread, p2cwrite,
   1028                         c2pread, c2pwrite,
   1029                         errread, errwrite,
   1030                         restore_signals,
   1031                         gid, gids, uid, umask,
   1032                         start_new_session, process_group)
   1033 except:
   1034     # Cleanup if the child failed starting.
   1035     for f in filter(None, (self.stdin, self.stdout, self.stderr)):

File ~\Anaconda3\envs\pyspark_env\Lib\subprocess.py:1493, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_gid, unused_gids, unused_uid, unused_umask, unused_start_new_session, unused_process_group)
   1491 # Start the process
   1492 try:
-> 1493     hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
   1494                              # no special security
   1495                              None, None,
   1496                              int(not close_fds),
   1497                              creationflags,
   1498                              env,
   1499                              cwd,
   1500                              startupinfo)
   1501 finally:
   1502     # Child is launched. Close the parent's copy of those pipe
   1503     # handles that only the child should have open.  You need
   (...)
   1506     # pipe will not close when the child process exits and the
   1507     # ReadFile will hang.
   1508     self._close_pipe_fds(p2cread, p2cwrite,
   1509                          c2pread, c2pwrite,
   1510                          errread, errwrite)

FileNotFoundError: [WinError 2] The system cannot find the file specified


I installed pyspark in another Anaconda environment but still received the same error.

I also tried the following code:

import os
print(os.environ.get("SPARK_HOME"))
print(os.path.join(os.environ.get("SPARK_HOME"), './bin/spark-submit.cmd'))

and got:

C:\Spark\spark-3.1.2-bin-hadoop2.7

C:\Spark\spark-3.1.2-bin-hadoop2.7./bin/spark-submit.cmd

Related Questions