@@ -42,11 +42,14 @@ static int avx_component_register(void);
4242
4343#include <immintrin.h>
4444
45- static int has_intel_AVX512f_features (void )
45+ static uint32_t has_intel_AVX_features (void )
4646{
47- const unsigned long avx512_features = _FEATURE_AVX512F ;
47+ uint32_t flags = 0 ;
4848
49- return _may_i_use_cpu_feature ( avx512_features );
49+ flags |= _may_i_use_cpu_feature (_FEATURE_AVX512F ) ? OMPI_OP_AVX_HAS_AVX512_FLAG : 0 ;
50+ flags |= _may_i_use_cpu_feature (_FEATURE_AVX2 ) ? OMPI_OP_AVX_HAS_AVX2_FLAG : 0 ;
51+ flags |= _may_i_use_cpu_feature (_FEATURE_AVX ) ? OMPI_OP_AVX_HAS_AVX_FLAG : 0 ;
52+ return flags ;
5053}
5154#else /* non-Intel compiler */
5255#include <stdint.h>
@@ -73,22 +76,26 @@ static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
7376#endif
7477}
7578
76- static int has_intel_AVX512f_features (void )
79+ static uint32_t has_intel_AVX_features (void )
7780{
78- uint32_t abcd [4 ];
79- //uint32_t avx2_mask = (1 << 5); // AVX2
80- uint32_t avx2f_mask = (1 << 16 ); // AVX2F
81+ const uint32_t avx2f_mask = (1 << 16 ); // AVX512F
82+ const uint32_t avx2_mask = (1 << 5 ); // AVX2
83+ const uint32_t avx_mask = (1 << 28 ); // AVX
84+ uint32_t flags = 0 , abcd [4 ];
8185
8286#if defined(__APPLE__ )
83- uint32_t osxsave_mask = (1 << 27 ); // OSX.
84- run_cpuid ( 1 , 0 , abcd );
85- // OS supports extended processor state management ?
86- if ( (abcd [2 ] & osxsave_mask ) != osxsave_mask )
87- return 0 ;
87+ uint32_t osxsave_mask = (1 << 27 ); // OSX.
88+ run_cpuid ( 1 , 0 , abcd );
89+ // OS supports extended processor state management ?
90+ if ( (abcd [2 ] & osxsave_mask ) != osxsave_mask )
91+ return 0 ;
8892#endif /* defined(__APPLE__) */
8993
90- run_cpuid ( 7 , 0 , abcd );
91- return ((abcd [1 ] & avx2f_mask ) == avx2f_mask );
94+ run_cpuid ( 7 , 0 , abcd );
95+ flags |= ((abcd [1 ] & avx2f_mask ) == avx2f_mask ) ? OMPI_OP_AVX_HAS_AVX512_FLAG : 0 ;
96+ flags |= ((abcd [1 ] & avx2_mask ) == avx2_mask ) ? OMPI_OP_AVX_HAS_AVX2_FLAG : 0 ;
97+ flags |= ((abcd [1 ] & avx_mask ) == avx_mask ) ? OMPI_OP_AVX_HAS_AVX_FLAG : 0 ;
98+ return flags ;
9299}
93100#endif /* non-Intel compiler */
94101
@@ -119,16 +126,14 @@ ompi_op_avx_component_t mca_op_avx_component = {
119126 */
120127static int avx_component_open (void )
121128{
122- /* A first level check to see if avx is even available in this
123- process. E.g., you may want to do a first-order check to see
124- if hardware is available. If so, return OMPI_SUCCESS. If not,
125- return anything other than OMPI_SUCCESS and the component will
126- silently be ignored.
127-
128- Note that if this function returns non-OMPI_SUCCESS, then this
129- component won't even be shown in ompi_info output (which is
130- probably not what you want).
131- */
129+ mca_op_avx_component .flags = has_intel_AVX_features ();
130+ /* A first level check to see what level of AVX is available on the
131+ * hardware.
132+ *
133+ * Note that if this function returns non-OMPI_SUCCESS, then this
134+ * component won't even be shown in ompi_info output (which is
135+ * probably not what you want).
136+ */
132137 return OMPI_SUCCESS ;
133138}
134139
@@ -153,16 +158,15 @@ static int avx_component_close(void)
153158static int
154159avx_component_register (void )
155160{
156- mca_op_avx_component .double_supported = true ;
161+ mca_op_avx_component .flags = has_intel_AVX_features () ;
157162 (void ) mca_base_component_var_register (& mca_op_avx_component .super .opc_version ,
158- "double_supported " ,
159- "Whether the double precision data types are supported or not " ,
160- MCA_BASE_VAR_TYPE_BOOL , NULL , 0 , 0 ,
163+ "avx_support " ,
164+ "What level of AVX support should be used (combination of AVX 0x01, AVX2 0x02, AVX512f 0x04) " ,
165+ MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
161166 OPAL_INFO_LVL_9 ,
162- MCA_BASE_VAR_SCOPE_READONLY ,
163- & mca_op_avx_component .double_supported );
164-
165- return OMPI_SUCCESS ;
167+ MCA_BASE_VAR_SCOPE_CONSTANT ,
168+ & mca_op_avx_component .flags );
169+ return OMPI_SUCCESS ;
166170}
167171
168172/*
@@ -172,7 +176,7 @@ static int
172176avx_component_init_query (bool enable_progress_threads ,
173177 bool enable_mpi_thread_multiple )
174178{
175- if ( ! has_intel_AVX512f_features () )
179+ if ( 0 == mca_op_avx_component . flags )
176180 return OMPI_ERR_NOT_SUPPORTED ;
177181 return OMPI_SUCCESS ;
178182}
0 commit comments