@@ -187,8 +187,36 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat
187187
188188 m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
189189 factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data,
190- m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size (), state.range (3 ) + 1 , true , true ,
191- false , 1e-3 , 1e-2 , 50 , tf_model_path, nondiff_func_type::Denoiser);
190+ m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size (), state.range (3 ), true , true , false ,
191+ 1e-3 , 1e-2 , 50 , tf_model_path, nondiff_func_type::Denoiser);
192+
193+ // Benchmark the application of the algorithm
194+ while (state.KeepRunning ()) {
195+ auto start = std::chrono::high_resolution_clock::now ();
196+ auto result = (*m_fb)();
197+ auto end = std::chrono::high_resolution_clock::now ();
198+ std::cout << " Converged? " << result.good << " , niters = " << result.niters << std::endl;
199+ state.SetIterationTime (b_utilities::duration (start, end, m_world));
200+ }
201+ }
202+
203+ BENCHMARK_DEFINE_F (AlgoFixtureMPI, FbOnnxDistributeGrid)(benchmark::State &state) {
204+ // Create the algorithm - has to be done there to reset the internal state.
205+ // If done in the fixture repeats would start at the solution and converge immediately.
206+
207+ // TODO: Wavelets are constructed but not used in the factory method
208+ auto const wavelets = factory::wavelet_operator_factory<Vector<t_complex>>(
209+ factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex);
210+
211+ t_real const beta = m_sigma * m_sigma;
212+ t_real const gamma = 0.0001 ;
213+
214+ std::string tf_model_path = purify::models_directory () + " /snr_15_model_dynamic.onnx" ;
215+
216+ m_fb = factory::fb_factory<sopt::algorithm::ImagingForwardBackward<t_complex>>(
217+ factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data,
218+ m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size (), state.range (3 ), true , true , false ,
219+ 1e-3 , 1e-2 , 50 , tf_model_path, nondiff_func_type::Denoiser);
192220
193221 // Benchmark the application of the algorithm
194222 while (state.KeepRunning ()) {
@@ -205,23 +233,42 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage)
205233 ->Args({128 , 10000 , 4 , 10 , 1 })
206234 ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
207235 ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
208- ->Args({1024 , static_cast <t_int>(1e8 ), 4 , 10 , 1 })
209- ->Args({1024 , static_cast <t_int>(1e9 ), 4 , 10 , 1 })
236+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
237+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
238+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
239+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
210240 ->UseManualTime()
211241 ->MinTime(60.0 )
212242 ->MinWarmUpTime(10.0 )
213243 ->Repetitions(3 ) // ->ReportAggregatesOnly(true)
214244 ->Unit(benchmark::kMillisecond );
215245
246+ BENCHMARK_REGISTER_F (AlgoFixtureMPI, FbOnnxDistributeGrid)
247+ // ->Apply(b_utilities::Arguments)
248+ ->Args({128 , 10000 , 4 , 10 , 1 })
249+ ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
250+ ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
251+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
252+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
253+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
254+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
255+ ->UseManualTime()
256+ ->MinTime(9.0 )
257+ ->MinWarmUpTime(1.0 )
258+ ->Repetitions(3 ) // ->ReportAggregatesOnly(true)
259+ ->Unit(benchmark::kMillisecond );
260+
216261#endif
217262
218263BENCHMARK_REGISTER_F (AlgoFixtureMPI, FbDistributeImage)
219264 // ->Apply(b_utilities::Arguments)
220265 ->Args({128 , 10000 , 4 , 10 , 1 })
221266 ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
222267 ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
223- ->Args({1024 , static_cast <t_int>(1e8 ), 4 , 10 , 1 })
224- ->Args({1024 , static_cast <t_int>(1e9 ), 4 , 10 , 1 })
268+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
269+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
270+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
271+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
225272 ->UseManualTime()
226273 ->MinTime(60.0 )
227274 ->MinWarmUpTime(10.0 )
@@ -233,8 +280,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid)
233280 ->Args({128 , 10000 , 4 , 10 , 2 })
234281 ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
235282 ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
236- ->Args({1024 , static_cast <t_int>(1e8 ), 4 , 10 , 2 })
237- ->Args({1024 , static_cast <t_int>(1e9 ), 4 , 10 , 2 })
283+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
284+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
285+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
286+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
238287 ->UseManualTime()
239288 ->MinTime(60.0 )
240289 ->MinWarmUpTime(10.0 )
@@ -246,8 +295,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage)
246295 ->Args({128 , 10000 , 4 , 10 , 1 })
247296 ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
248297 ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
249- ->Args({1024 , static_cast <t_int>(1e8 ), 4 , 10 , 1 })
250- ->Args({1024 , static_cast <t_int>(1e9 ), 4 , 10 , 1 })
298+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
299+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
300+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 1 })
301+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 1 })
251302 ->UseManualTime()
252303 ->MinTime(120.0 )
253304 ->MinWarmUpTime(10.0 )
@@ -259,8 +310,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid)
259310 ->Args({128 , 10000 , 4 , 10 , 2 })
260311 ->Args({1024 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
261312 ->Args({1024 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
262- ->Args({1024 , static_cast <t_int>(1e8 ), 4 , 10 , 2 })
263- ->Args({1024 , static_cast <t_int>(1e9 ), 4 , 10 , 2 })
313+ ->Args({2048 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
314+ ->Args({2048 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
315+ ->Args({4096 , static_cast <t_int>(1e6 ), 4 , 10 , 2 })
316+ ->Args({4096 , static_cast <t_int>(1e7 ), 4 , 10 , 2 })
264317 ->UseManualTime()
265318 ->MinTime(120.0 )
266319 ->MinWarmUpTime(10.0 )
0 commit comments