INTERNAL: Write rejected

115 views Asked by At

In some cases we get a "Write rejected" error on jobs when loading or joining data from BigQuery, even so the same job works fine with different tables of the same size.

I added the detailed error message:

java.lang.RuntimeException: java.io.IOException: INTERNAL: Write rejected (writer id not found) when talking to tcp://localhost:12345 at 
com.google.common.base.Throwables.propagate(Throwables.java:160) at com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:232) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey$ConstructUnionTableFn.processElement(CoGroupByKey.java:185) Caused by: java.io.IOException: INTERNAL: Write rejected (writer id not found) when talking to tcp://localhost:12345 at 
com.google.cloud.dataflow.sdk.runners.worker.ApplianceShuffleWriter.write(Native Method) at 
com.google.cloud.dataflow.sdk.runners.worker.ChunkingShuffleEntryWriter.writeChunk(ChunkingShuffleEntryWriter.java:71) at 
com.google.cloud.dataflow.sdk.runners.worker.ChunkingShuffleEntryWriter.put(ChunkingShuffleEntryWriter.java:54) at
com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink$ShuffleSinkWriter.add(ShuffleSink.java:227) at 
com.google.cloud.dataflow.sdk.runners.worker.ShuffleSink$ShuffleSinkWriter.add(ShuffleSink.java:154) at 
com.google.cloud.dataflow.sdk.util.common.worker.WriteOperation.process(WriteOperation.java:90) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:230) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey$ConstructUnionTableFn.processElement(CoGroupByKey.java:185) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.invokeProcessElement(DoFnRunner.java:171) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.processElement(DoFnRunner.java:156) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn.processElement(NormalParDoFn.java:262) at 
com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation.process(ParDoOperation.java:52) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:230) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.dubsmash.analytics.functions.extracts.ExtractSessionFn.processElement(ExtractSessionFn.java:13) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.invokeProcessElement(DoFnRunner.java:171) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.processElement(DoFnRunner.java:156) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn.processElement(NormalParDoFn.java:262) at 
com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation.process(ParDoOperation.java:52) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:230) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.google.cloud.dataflow.sdk.transforms.Keys$1.processElement(Keys.java:65) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.invokeProcessElement(DoFnRunner.java:171) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.processElement(DoFnRunner.java:156) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn.processElement(NormalParDoFn.java:262) at 
com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation.process(ParDoOperation.java:52) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:230) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn$ExtractOutputDoFn.processElement(CombineValuesFn.java:223) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.invokeProcessElement(DoFnRunner.java:171) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.processElement(DoFnRunner.java:156) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn.processElement(NormalParDoFn.java:262) at 
com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation.process(ParDoOperation.java:52) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:230) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn$2.output(NormalParDoFn.java:195) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnContext.outputWindowedValue(DoFnRunner.java:310) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner$DoFnProcessContext.output(DoFnRunner.java:478) at 
com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn$MergeAccumulatorsDoFn.processElement(CombineValuesFn.java:201) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.invokeProcessElement(DoFnRunner.java:171) at 
com.google.cloud.dataflow.sdk.util.DoFnRunner.processElement(DoFnRunner.java:156) at 
com.google.cloud.dataflow.sdk.runners.worker.NormalParDoFn.processElement(NormalParDoFn.java:262) at 
com.google.cloud.dataflow.sdk.util.common.worker.ParDoOperation.process(ParDoOperation.java:52) at 
com.google.cloud.dataflow.sdk.util.common.worker.OutputReceiver.process(OutputReceiver.java:147) at 
com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation.runReadLoop(ReadOperation.java:184) at 
com.google.cloud.dataflow.sdk.util.common.worker.ReadOperation.start(ReadOperation.java:121) at 
com.google.cloud.dataflow.sdk.util.common.worker.MapTaskExecutor.execute(MapTaskExecutor.java:66) at 
com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.doWork(DataflowWorker.java:130) at 
com.google.cloud.dataflow.sdk.runners.worker.DataflowWorker.getAndPerformWork(DataflowWorker.java:95) at 
com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness$WorkerThread.call(DataflowWorkerHarness.java:139) at 
com.google.cloud.dataflow.sdk.runners.worker.DataflowWorkerHarness$WorkerThread.call(DataflowWorkerHarness.java:124) at 
java.util.concurrent.FutureTask.run(FutureTask.java:266) at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
1

There are 1 answers

2
Jeremy Lewi On

This exception should be a transient, non-fatal error. Your job should continue to execute and complete successfully.

Please let us know if your job doesn't complete successfully.