Donate. I desperately need donations to survive due to my health

Get paid by answering surveys Click here

Click here to donate

Remote/Work from Home jobs

org.apache.spark.SparkException: Job aborted. while saving data

 val res = spark.sql(""" select  Site_ID,Output_ID, Component_ID,Qty, UOM, Alt1_ID, Alt1_Qty, Alt1_Code, Alt2_ID, Alt2_Qty, Alt2_Code,Effective_Date, Expiration_Date,Type from stpo """)

    val output1 = res.rdd.map(_.toSeq.map(_+"").reduce(_+","+_))
val header: RDD[String] = sc.parallelize(Array("Site_ID,Output_ID,Component_ID,Qty,UOM,Alt1_ID,Alt1_Qty,Alt1_Code,Alt2_ID,Alt2_Qty,Alt2_Code,Effective_Date,Expiration_Date,Type"))
val output = header.union(output1).toDS


output.coalesce(1).write.text("adls/BuDataLake/KOP3")

I am writing into text format in spark databrick on azure platform and always Getting error as

org.apache.spark.SparkException: Job aborted.
    at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:224)
    at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:187)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:110)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:108)
    at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:128)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:150)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:138)
    at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:190)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:187)
    at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:138)
    at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:108)
    at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:108)
    at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:683)
    at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:683)
    at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:89)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:175)
    at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:84)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:126)
    at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:683)

What could br the possible solution for it?

Comments