Skip to content

Commit 075549f

Browse files
committed
Enable support for AVX2 and AVX.
On Intel processors detect at runtime the level of hardware support for vector instructions and use them accordingly. Signed-off-by: George Bosilca <[email protected]>
1 parent 37a96e9 commit 075549f

File tree

6 files changed

+822
-492
lines changed

6 files changed

+822
-492
lines changed

ompi/mca/op/avx/configure.m4

Lines changed: 85 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
2020
AC_CONFIG_FILES([ompi/mca/op/avx/Makefile])
2121

2222
op_avx_support=0
23+
op_avx2_support=0
24+
op_avx512_support=0
2325
OPAL_VAR_SCOPE_PUSH([op_avx_cflags_save])
2426

2527
AS_IF([test "$opal_cv_asm_arch" = "X86_64"],
@@ -34,11 +36,11 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
3436
__m512 vA, vB;
3537
_mm512_add_ps(vA, vB)
3638
]])],
37-
[op_avx_support=1
39+
[op_avx512_support=1
3840
AC_MSG_RESULT([yes])],
3941
[AC_MSG_RESULT([no])])
4042

41-
AS_IF([test $op_avx_support -eq 0],
43+
AS_IF([test $op_avx512_support -eq 0],
4244
[AC_MSG_CHECKING([for AVX512 support (with -march=skylake-avx512)])
4345
op_avx_cflags_save="$CFLAGS"
4446
CFLAGS="$CFLAGS -march=skylake-avx512"
@@ -48,18 +50,97 @@ AC_DEFUN([MCA_ompi_op_avx_CONFIG],[
4850
__m512 vA, vB;
4951
_mm512_add_ps(vA, vB)
5052
]])],
51-
[op_avx_support=1
53+
[op_avx512_support=1
5254
op_avx_CPPFLAGS="-march=skylake-avx512"
5355
AC_MSG_RESULT([yes])],
5456
[AC_MSG_RESULT([no])])
5557
CFLAGS="$op_avx_cflags_save"
5658
])
59+
#
60+
# No support for the AVX512 instruction set. Let's see if we can fall back
61+
# to an earlier instruction set (AVX2).
62+
#
63+
AS_IF([test $op_avx512_support -eq 0],
64+
[AC_MSG_CHECKING([for AVX2 support (no additional flags)])
65+
op_avx_cflags_save="$CFLAGS"
66+
AC_LINK_IFELSE(
67+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
68+
[[
69+
__m256 vA, vB;
70+
_mm256_add_ps(vA, vB)
71+
]])],
72+
[op_avx2_support=1
73+
AC_MSG_RESULT([yes])],
74+
[AC_MSG_RESULT([no])])
75+
CFLAGS="$op_avx_cflags_save"
76+
AS_IF([test $op_avx2_support -eq 0],
77+
[AC_MSG_CHECKING([for AVX2 support (with -mavx2)])
78+
op_avx_cflags_save="$CFLAGS"
79+
CFLAGS="$CFLAGS -mavx2"
80+
AC_LINK_IFELSE(
81+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
82+
[[
83+
__m256 vA, vB;
84+
_mm256_add_ps(vA, vB)
85+
]])],
86+
[op_avx2_support=1
87+
op_avx_CPPFLAGS="-mavx2"
88+
AC_MSG_RESULT([yes])],
89+
[AC_MSG_RESULT([no])])
90+
CFLAGS="$op_avx_cflags_save"
91+
])
92+
],
93+
[AC_MSG_NOTICE([Assume support for AVX2 (implied by support for AVX512)])
94+
op_avx2_support=1])
95+
#
96+
# No support for the AVX512 nor AVX2 instruction sets. Fall back
97+
# to an even earlier instruction set (AVX).
98+
#
99+
AS_IF([test $op_avx2_support -eq 0],
100+
[AC_MSG_CHECKING([for AVX support (no additional flags)])
101+
op_avx_cflags_save="$CFLAGS"
102+
AC_LINK_IFELSE(
103+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
104+
[[
105+
__m128 vA, vB;
106+
_mm128_add_ps(vA, vB)
107+
]])],
108+
[op_avx_support=1
109+
AC_MSG_RESULT([yes])],
110+
[AC_MSG_RESULT([no])])
111+
CFLAGS="$op_avx_cflags_save"
112+
AS_IF([test $op_avx_support -eq 0],
113+
[AC_MSG_CHECKING([for AVX support (with -mavx)])
114+
op_avx_cflags_save="$CFLAGS"
115+
CFLAGS="$CFLAGS -mavx"
116+
AC_LINK_IFELSE(
117+
[AC_LANG_PROGRAM([[#include <immintrin.h>]],
118+
[[
119+
__m128 vA, vB;
120+
_mm128_add_ps(vA, vB)
121+
]])],
122+
[op_avx_support=1
123+
op_avx_CPPFLAGS="-mavx"
124+
AC_MSG_RESULT([yes])],
125+
[AC_MSG_RESULT([no])])
126+
CFLAGS="$op_avx_cflags_save"
127+
])
128+
],
129+
[AC_MSG_NOTICE([Assume support for AVX (implied by support for AVX2)])
130+
op_avx_support=1])
131+
57132
AC_LANG_POP([C])
58133
])
59134

60135
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX512],
136+
[$op_avx512_support],
137+
[Whetever AVX512 is supported in the current build])
138+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX2],
139+
[$op_avx2_support],
140+
[Whetever AVX2 is supported in the current build])
141+
AC_DEFINE_UNQUOTED([OMPI_MCA_OP_HAVE_AVX],
61142
[$op_avx_support],
62-
[Whetever AVX512 is supported in the current compilation context])
143+
[Whetever AVX is supported in the current build])
63144
AC_SUBST([op_avx_CPPFLAGS])
64145
OPAL_VAR_SCOPE_POP
65146
AS_IF([test $op_avx_support -eq 1],

ompi/mca/op/avx/op_avx.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121

2222
BEGIN_C_DECLS
2323

24+
#define OMPI_OP_AVX_HAS_AVX512_FLAG 0x00000004
25+
#define OMPI_OP_AVX_HAS_AVX2_FLAG 0x00000002
26+
#define OMPI_OP_AVX_HAS_AVX_FLAG 0x00000001
2427
/**
2528
* Derive a struct from the base op component struct, allowing us to
2629
* cache some component-specific information on our well-known
@@ -37,9 +40,7 @@ typedef struct {
3740
avxs; replace them with whatever is relevant for your
3841
component. */
3942

40-
/** A simple boolean indicating whether double precision is
41-
supported. */
42-
bool double_supported;
43+
uint32_t flags; /* AVX capabilities supported by the processor */
4344
} ompi_op_avx_component_t;
4445

4546
/**

ompi/mca/op/avx/op_avx_component.c

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,14 @@ static int avx_component_register(void);
4242

4343
#include <immintrin.h>
4444

45-
static int has_intel_AVX512f_features(void)
45+
static uint32_t has_intel_AVX_features(void)
4646
{
47-
const unsigned long avx512_features = _FEATURE_AVX512F;
47+
uint32_t flags = 0;
4848

49-
return _may_i_use_cpu_feature( avx512_features );
49+
flags |= _may_i_use_cpu_feature(_FEATURE_AVX512F) ? OMPI_OP_AVX_HAS_AVX512_FLAG : 0;
50+
flags |= _may_i_use_cpu_feature(_FEATURE_AVX2) ? OMPI_OP_AVX_HAS_AVX2_FLAG : 0;
51+
flags |= _may_i_use_cpu_feature(_FEATURE_AVX) ? OMPI_OP_AVX_HAS_AVX_FLAG : 0;
52+
return flags;
5053
}
5154
#else /* non-Intel compiler */
5255
#include <stdint.h>
@@ -73,22 +76,26 @@ static void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd)
7376
#endif
7477
}
7578

76-
static int has_intel_AVX512f_features(void)
79+
static uint32_t has_intel_AVX_features(void)
7780
{
78-
uint32_t abcd[4];
79-
//uint32_t avx2_mask = (1 << 5); // AVX2
80-
uint32_t avx2f_mask = (1 << 16); // AVX2F
81+
const uint32_t avx2f_mask = (1 << 16); // AVX512F
82+
const uint32_t avx2_mask = (1 << 5); // AVX2
83+
const uint32_t avx_mask = (1 << 28); // AVX
84+
uint32_t flags = 0, abcd[4];
8185

8286
#if defined(__APPLE__)
83-
uint32_t osxsave_mask = (1 << 27); // OSX.
84-
run_cpuid( 1, 0, abcd );
85-
// OS supports extended processor state management ?
86-
if ( (abcd[2] & osxsave_mask) != osxsave_mask )
87-
return 0;
87+
uint32_t osxsave_mask = (1 << 27); // OSX.
88+
run_cpuid( 1, 0, abcd );
89+
// OS supports extended processor state management ?
90+
if ( (abcd[2] & osxsave_mask) != osxsave_mask )
91+
return 0;
8892
#endif /* defined(__APPLE__) */
8993

90-
run_cpuid( 7, 0, abcd );
91-
return ((abcd[1] & avx2f_mask) == avx2f_mask);
94+
run_cpuid( 7, 0, abcd );
95+
flags |= ((abcd[1] & avx2f_mask) == avx2f_mask) ? OMPI_OP_AVX_HAS_AVX512_FLAG : 0;
96+
flags |= ((abcd[1] & avx2_mask) == avx2_mask) ? OMPI_OP_AVX_HAS_AVX2_FLAG : 0;
97+
flags |= ((abcd[1] & avx_mask) == avx_mask) ? OMPI_OP_AVX_HAS_AVX_FLAG : 0;
98+
return flags;
9299
}
93100
#endif /* non-Intel compiler */
94101

@@ -119,16 +126,14 @@ ompi_op_avx_component_t mca_op_avx_component = {
119126
*/
120127
static int avx_component_open(void)
121128
{
122-
/* A first level check to see if avx is even available in this
123-
process. E.g., you may want to do a first-order check to see
124-
if hardware is available. If so, return OMPI_SUCCESS. If not,
125-
return anything other than OMPI_SUCCESS and the component will
126-
silently be ignored.
127-
128-
Note that if this function returns non-OMPI_SUCCESS, then this
129-
component won't even be shown in ompi_info output (which is
130-
probably not what you want).
131-
*/
129+
mca_op_avx_component.flags = has_intel_AVX_features();
130+
/* A first level check to see what level of AVX is available on the
131+
* hardware.
132+
*
133+
* Note that if this function returns non-OMPI_SUCCESS, then this
134+
* component won't even be shown in ompi_info output (which is
135+
* probably not what you want).
136+
*/
132137
return OMPI_SUCCESS;
133138
}
134139

@@ -153,16 +158,15 @@ static int avx_component_close(void)
153158
static int
154159
avx_component_register(void)
155160
{
156-
mca_op_avx_component.double_supported = true;
161+
mca_op_avx_component.flags = has_intel_AVX_features();
157162
(void) mca_base_component_var_register(&mca_op_avx_component.super.opc_version,
158-
"double_supported",
159-
"Whether the double precision data types are supported or not",
160-
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
163+
"avx_support",
164+
"What level of AVX support should be used (combination of AVX 0x01, AVX2 0x02, AVX512f 0x04)",
165+
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
161166
OPAL_INFO_LVL_9,
162-
MCA_BASE_VAR_SCOPE_READONLY,
163-
&mca_op_avx_component.double_supported);
164-
165-
return OMPI_SUCCESS;
167+
MCA_BASE_VAR_SCOPE_CONSTANT,
168+
&mca_op_avx_component.flags);
169+
return OMPI_SUCCESS;
166170
}
167171

168172
/*
@@ -172,7 +176,7 @@ static int
172176
avx_component_init_query(bool enable_progress_threads,
173177
bool enable_mpi_thread_multiple)
174178
{
175-
if( !has_intel_AVX512f_features() )
179+
if( 0 == mca_op_avx_component.flags )
176180
return OMPI_ERR_NOT_SUPPORTED;
177181
return OMPI_SUCCESS;
178182
}

0 commit comments

Comments
 (0)