21
21
b) run $python main.py
22
22
c) move dags folder generated to the dag buckets in composer like:
23
23
gsutil cp -r out gs://BUCKET_NAME/dags
24
- NOTE: the "number_of_operators_defined" variable in the configuration file
24
+ NOTE: the "number_of_operators_defined" variable in the configuration file
25
25
(config.json) allows to create up to 5 differents kind of task,
26
26
none has complex functionallity:
27
27
a) bash_operator_echo
32
32
"""
33
33
34
34
import json
35
+ import math
35
36
import random
37
+
36
38
import modules .initDag
37
39
import modules .operators
38
- import math
39
40
40
41
41
42
def get_config ():
42
43
"""module to read configs"""
43
- f = open (' config.json' , "r" )
44
+ f = open (" config.json" , "r" )
44
45
data = json .loads (f .read ())
45
46
f .close ()
46
47
return data
47
48
48
49
49
50
def get_init_content (i ):
50
- """Initialise test DAG with headers """
51
+ """Initialise test DAG with headers"""
51
52
modules .initDag .get_init_dag (i )
52
53
53
54
@@ -60,70 +61,74 @@ def get_task_dag(min_number_of_task_in_dag):
60
61
file .close ()
61
62
return data
62
63
63
- # build the dags
64
+
65
+ # build the dags
64
66
def main ():
65
67
"""main function to create test DAGs"""
66
68
# read config file
67
69
data = get_config ()
68
70
69
- number_of_dags_to_generate = data [' number_of_dags_to_generate' ]
70
- min_number_of_task_in_dag = data [' min_number_of_task_in_dag' ]
71
- max_number_of_task_in_dag = data [' max_number_of_task_in_dag' ]
72
- task_min_time_in_sec = data [' task_min_time_in_sec' ]
73
- task_max_time_in_sec = data [' task_max_time_in_sec' ]
74
- percentage_of_job_in_parallel = data [' percentage_of_job_in_parallel' ]
75
- number_of_operators_defined = data [' number_of_operators_defined' ]
76
- file_index = data [' file_start_index' ]
77
- schedules = data [' schedules' ]
71
+ number_of_dags_to_generate = data [" number_of_dags_to_generate" ]
72
+ min_number_of_task_in_dag = data [" min_number_of_task_in_dag" ]
73
+ max_number_of_task_in_dag = data [" max_number_of_task_in_dag" ]
74
+ task_min_time_in_sec = data [" task_min_time_in_sec" ]
75
+ task_max_time_in_sec = data [" task_max_time_in_sec" ]
76
+ percentage_of_job_in_parallel = data [" percentage_of_job_in_parallel" ]
77
+ number_of_operators_defined = data [" number_of_operators_defined" ]
78
+ file_index = data [" file_start_index" ]
79
+ schedules = data [" schedules" ]
78
80
79
81
# creatting DAG's files
80
82
for i in range (number_of_dags_to_generate ):
81
83
task_list = []
82
84
dagf = open (f"out/dagFile_{ file_index + i } .py" , "w+" )
83
85
dagf .write (
84
86
modules .initDag .get_init_dag (
85
- file_index + i ,
86
- schedules [ random . randrange ( 0 ,
87
- len ( schedules ) - 1 )]) )
87
+ file_index + i , schedules [ random . randrange ( 0 , len ( schedules ) - 1 )]
88
+ )
89
+ )
88
90
dagf .write (modules .operators .start_task ())
89
91
dagf .write (modules .operators .stop_task ())
90
92
for task_index in range (
91
- random .randrange (min_number_of_task_in_dag ,
92
- max_number_of_task_in_dag ) ):
93
+ random .randrange (min_number_of_task_in_dag , max_number_of_task_in_dag )
94
+ ):
93
95
task_list .append ("task_{index}" .format (index = task_index ))
94
- if ( task_index % number_of_operators_defined == 0 ) :
96
+ if task_index % number_of_operators_defined == 0 :
95
97
dagf .write (modules .operators .bash_operator_echo (task_index ))
96
- elif ( task_index % number_of_operators_defined == 1 ) :
98
+ elif task_index % number_of_operators_defined == 1 :
97
99
dagf .write (
98
100
modules .operators .bash_operator_sleep (
99
101
task_index ,
100
- random .randrange (task_min_time_in_sec ,
101
- task_max_time_in_sec )))
102
- elif (task_index % number_of_operators_defined == 2 ):
102
+ random .randrange (task_min_time_in_sec , task_max_time_in_sec ),
103
+ )
104
+ )
105
+ elif task_index % number_of_operators_defined == 2 :
103
106
dagf .write (
104
107
modules .operators .python_operator_task_sleep (
105
108
task_index ,
106
- random .randrange (task_min_time_in_sec ,
107
- task_max_time_in_sec )) )
108
- elif ( task_index % number_of_operators_defined == 3 ):
109
- dagf . write (
110
- modules .operators .bash_operator_task_ping (task_index ))
109
+ random .randrange (task_min_time_in_sec , task_max_time_in_sec ),
110
+ )
111
+ )
112
+ elif task_index % number_of_operators_defined == 3 :
113
+ dagf . write ( modules .operators .bash_operator_task_ping (task_index ))
111
114
else :
112
- dagf .write (
113
- modules . operators . python_operator_task_print ( task_index ))
114
- no_tasks_in_parallel = math . ceil ( percentage_of_job_in_parallel / 100 *
115
- len ( task_list ) )
115
+ dagf .write (modules . operators . python_operator_task_print ( task_index ))
116
+ no_tasks_in_parallel = math . ceil (
117
+ percentage_of_job_in_parallel / 100 * len ( task_list )
118
+ )
116
119
parallel_tasks = []
117
- if ( no_tasks_in_parallel > 1 ) :
120
+ if no_tasks_in_parallel > 1 :
118
121
for parallel_task_index in range (no_tasks_in_parallel ):
119
122
parallel_tasks .append (task_list .pop ())
120
- task_list .insert (random .randrange (1 ,
121
- len (task_list ) - 2 ),
122
- "[{task}]" .format (task = "," .join (parallel_tasks )))
123
- dagf .write ("\n \t chain(start_task,{tasks},stop_task)" .format (
124
- tasks = "," .join (task_list )))
123
+ task_list .insert (
124
+ random .randrange (1 , len (task_list ) - 2 ),
125
+ "[{task}]" .format (task = "," .join (parallel_tasks )),
126
+ )
127
+ dagf .write (
128
+ "\n \t chain(start_task,{tasks},stop_task)" .format (tasks = "," .join (task_list ))
129
+ )
125
130
dagf .close ()
126
131
127
132
128
133
if __name__ == "__main__" :
129
- main ()
134
+ main ()
0 commit comments