I wrote the two functions below:
def dataproc_first_job (self,task_id, app,job):
return DataProcSparkOperator(
task_id=task_id,
dataproc_spark_jars=self.jar,
cluster_name=self.cluster,
main_class=self.main_name,
dataproc_spark_properties={
'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s' % (app, job) },
trigger_rule=TriggerRule.ALL_DONE
)
def dataproc_second_job (self,task_id, app,job,prefix,dataset):
return DataProcSparkOperator(
task_id=task_id,
dataproc_spark_jars=self.jar,
cluster_name=self.cluster,
main_class=self.main_name,
dataproc_spark_properties={
'spark.driver.extraJavaOptions': '-DAppConfig.appName=%s -DTrmRaw.Options.jobName=%s -DTrm.Options.prefix=%s -DTrm.Metadata.outputBase=%s' %(app, domain, job, prefix, dataset) },
trigger_rule=TriggerRule.ALL_DONE
)
My aim is to refactor the python code to just use one function instead of two. I thought about using decorators . I am not sure if this is the best solution.
Any ideas how to cope with this please?