Spark launcher handle not updating state on Standalone cluster mode

434 views Asked by At

I'm trying to programmatically submit Spark jobs using the Spark Launcher library in a spring web application.

Everything works fine with yarn-client, yarn-cluster and standalone-client modes. However, when using standalone-cluster mode, the getState() of SparkAppHandle stays in UNKNOWN forever. Any advice? Thanks.

Here is the code of the Service

import org.apache.spark.launcher.SparkAppHandle;
import org.apache.spark.launcher.SparkLauncher;
import org.springframework.stereotype.Service;


@Service
public class SparkServices {


    public String launchJob(String master, String mode) throws Exception {


       SparkAppHandle handle =  new SparkLauncher()
                .setAppName("test1")
                .setSparkHome("/usr/local/spark")
                .setAppResource("hdfs://nn:9000/spark-application.jar")
                .setMainClass("my.App")
                .setMaster(master)
                .setDeployMode(mode)
                .setConf("spark.executor.instances", "2")
                .setConf("spark.driver.memory", "2g")
                .setConf("spark.driver.cores", "1")
                .setConf("spark.executor.memory", "2g")
                .setConf("spark.executor.cores", "1")
                .addAppArgs("hdfs://nn:9000/spark-project/files/")
                .setVerbose(true)
                .startApplication(new SparkAppHandle.Listener() {
                    @Override
                    public void stateChanged(SparkAppHandle sparkAppHandle) {
                        System.out.println("state >>> " + sparkAppHandle.getState());
                    }

                    @Override
                    public void infoChanged(SparkAppHandle sparkAppHandle) {
                        System.out.println("info >>> " + sparkAppHandle.getState());
                    }
                });

        while (!handle.getState().isFinal()){
            System.out.println("state >>> " + handle.getState());
            Thread.sleep(10000);
        }

        return "finished with >>>" + handle.getState();

    }


}

And the code of Controller

import org.springframework.beans.factory.annotation.Autowired;

import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;


@RestController
public class TaskController {

    @Autowired
    private SparkServices sparkServices;


    @GetMapping("/sparkJobs/{master}/{mode}")
    public String sparkJob(@PathVariable("master") String master, @PathVariable("mode") String mode) throws Exception {
        return sparkServices.launchJob(master, mode);
    }
}
0

There are 0 answers