Skip to content

Commit f09790d

Browse files
committed
Merge branch 'avx512' into devel
* Implemented AVX-512 optimized direct_fft and reverse_fft functions.
2 parents 27f26e6 + f343fc7 commit f09790d

File tree

19 files changed

+1824
-228
lines changed

19 files changed

+1824
-228
lines changed

.cproject

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
</option>
3131
<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="gnu.cpp.compiler.option.preprocessor.def.368930916" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
3232
<listOptionValue builtIn="false" value="LSP_TESTING"/>
33+
<listOptionValue builtIn="false" value="LSP_TRACE"/>
3334
<listOptionValue builtIn="false" value="LSP_IDE_DEBUG"/>
3435
</option>
3536
<option id="gnu.cpp.compiler.option.dialect.std.380024909" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" useByScannerDiscovery="true" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
@@ -45,6 +46,7 @@
4546
</option>
4647
<option IS_BUILTIN_EMPTY="false" IS_VALUE_EMPTY="false" id="gnu.c.compiler.option.preprocessor.def.symbols.1421051177" name="Defined symbols (-D)" superClass="gnu.c.compiler.option.preprocessor.def.symbols" useByScannerDiscovery="false" valueType="definedSymbols">
4748
<listOptionValue builtIn="false" value="LSP_TESTING"/>
49+
<listOptionValue builtIn="false" value="LSP_TRACE"/>
4850
<listOptionValue builtIn="false" value="LSP_IDE_DEBUG"/>
4951
</option>
5052
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2008435369" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
@@ -382,4 +384,4 @@
382384
</storageModule>
383385
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
384386
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
385-
</cproject>
387+
</cproject>

CHANGELOG

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*******************************************************************************
44

55
=== 1.0.29 ===
6+
* Implemented AVX-512 optimized direct_fft and reverse_fft functions.
67
* Added definition of MacOS dependencies in build scripts.
78

89
=== 1.0.28 ===

include/private/dsp/arch/aarch64/asimd/fft.h

+32-31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2025 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2025 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -42,15 +42,15 @@ namespace lsp
4242
{
4343
if (rank == 2)
4444
{
45-
float s0_re = src_re[0] + src_re[1];
46-
float s1_re = src_re[0] - src_re[1];
47-
float s2_re = src_re[2] + src_re[3];
48-
float s3_re = src_re[2] - src_re[3];
45+
float s0_re = src_re[0] + src_re[2];
46+
float s1_re = src_re[0] - src_re[2];
47+
float s2_re = src_re[1] + src_re[3];
48+
float s3_re = src_re[1] - src_re[3];
4949

50-
float s0_im = src_im[0] + src_im[1];
51-
float s1_im = src_im[0] - src_im[1];
52-
float s2_im = src_im[2] + src_im[3];
53-
float s3_im = src_im[2] - src_im[3];
50+
float s0_im = src_im[0] + src_im[2];
51+
float s1_im = src_im[0] - src_im[2];
52+
float s2_im = src_im[1] + src_im[3];
53+
float s3_im = src_im[1] - src_im[3];
5454

5555
dst_re[0] = s0_re + s2_re;
5656
dst_re[1] = s1_re + s3_im;
@@ -104,25 +104,25 @@ namespace lsp
104104
{
105105
if (rank == 2)
106106
{
107-
float s0_re = src_re[0] + src_re[1];
108-
float s1_re = src_re[0] - src_re[1];
109-
float s2_re = src_re[2] + src_re[3];
110-
float s3_re = src_re[2] - src_re[3];
111-
112-
float s0_im = src_im[0] + src_im[1];
113-
float s1_im = src_im[0] - src_im[1];
114-
float s2_im = src_im[2] + src_im[3];
115-
float s3_im = src_im[2] - src_im[3];
116-
117-
dst_re[0] = (s0_re + s2_re)*0.25f;
118-
dst_re[1] = (s1_re - s3_im)*0.25f;
119-
dst_re[2] = (s0_re - s2_re)*0.25f;
120-
dst_re[3] = (s1_re + s3_im)*0.25f;
121-
122-
dst_im[0] = (s0_im + s2_im)*0.25f;
123-
dst_im[1] = (s1_im + s3_re)*0.25f;
124-
dst_im[2] = (s0_im - s2_im)*0.25f;
125-
dst_im[3] = (s1_im - s3_re)*0.25f;
107+
float s0_re = src_re[0] + src_re[2];
108+
float s1_re = src_re[0] - src_re[2];
109+
float s2_re = src_re[1] + src_re[3];
110+
float s3_re = src_re[1] - src_re[3];
111+
112+
float s0_im = src_im[0] + src_im[2];
113+
float s1_im = src_im[0] - src_im[2];
114+
float s2_im = src_im[1] + src_im[3];
115+
float s3_im = src_im[1] - src_im[3];
116+
117+
dst_re[0] = (s0_re + s2_re) * 0.25f;
118+
dst_re[1] = (s1_re - s3_im) * 0.25f;
119+
dst_re[2] = (s0_re - s2_re) * 0.25f;
120+
dst_re[3] = (s1_re + s3_im) * 0.25f;
121+
122+
dst_im[0] = (s0_im + s2_im) * 0.25f;
123+
dst_im[1] = (s1_im + s3_re) * 0.25f;
124+
dst_im[2] = (s0_im - s2_im) * 0.25f;
125+
dst_im[3] = (s1_im - s3_re) * 0.25f;
126126
}
127127
else if (rank == 1)
128128
{
@@ -160,7 +160,8 @@ namespace lsp
160160

161161
dsp::normalize_fft2(dst_re, dst_im, rank);
162162
}
163-
}
164-
}
163+
164+
} /* namespace asimd */
165+
} /* namespace lsp */
165166

166167
#endif /* PRIVATE_DSP_ARCH_AARCH64_ASIMD_FFT_H_ */

include/private/dsp/arch/arm/neon-d32/fft.h

+32-31
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2025 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2025 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -44,15 +44,15 @@ namespace lsp
4444
{
4545
if (rank == 2)
4646
{
47-
float s0_re = src_re[0] + src_re[1];
48-
float s1_re = src_re[0] - src_re[1];
49-
float s2_re = src_re[2] + src_re[3];
50-
float s3_re = src_re[2] - src_re[3];
47+
float s0_re = src_re[0] + src_re[2];
48+
float s1_re = src_re[0] - src_re[2];
49+
float s2_re = src_re[1] + src_re[3];
50+
float s3_re = src_re[1] - src_re[3];
5151

52-
float s0_im = src_im[0] + src_im[1];
53-
float s1_im = src_im[0] - src_im[1];
54-
float s2_im = src_im[2] + src_im[3];
55-
float s3_im = src_im[2] - src_im[3];
52+
float s0_im = src_im[0] + src_im[2];
53+
float s1_im = src_im[0] - src_im[2];
54+
float s2_im = src_im[1] + src_im[3];
55+
float s3_im = src_im[1] - src_im[3];
5656

5757
dst_re[0] = s0_re + s2_re;
5858
dst_re[1] = s1_re + s3_im;
@@ -98,25 +98,25 @@ namespace lsp
9898
{
9999
if (rank == 2)
100100
{
101-
float s0_re = src_re[0] + src_re[1];
102-
float s1_re = src_re[0] - src_re[1];
103-
float s2_re = src_re[2] + src_re[3];
104-
float s3_re = src_re[2] - src_re[3];
105-
106-
float s0_im = src_im[0] + src_im[1];
107-
float s1_im = src_im[0] - src_im[1];
108-
float s2_im = src_im[2] + src_im[3];
109-
float s3_im = src_im[2] - src_im[3];
110-
111-
dst_re[0] = (s0_re + s2_re)*0.25f;
112-
dst_re[1] = (s1_re - s3_im)*0.25f;
113-
dst_re[2] = (s0_re - s2_re)*0.25f;
114-
dst_re[3] = (s1_re + s3_im)*0.25f;
115-
116-
dst_im[0] = (s0_im + s2_im)*0.25f;
117-
dst_im[1] = (s1_im + s3_re)*0.25f;
118-
dst_im[2] = (s0_im - s2_im)*0.25f;
119-
dst_im[3] = (s1_im - s3_re)*0.25f;
101+
float s0_re = src_re[0] + src_re[2];
102+
float s1_re = src_re[0] - src_re[2];
103+
float s2_re = src_re[1] + src_re[3];
104+
float s3_re = src_re[1] - src_re[3];
105+
106+
float s0_im = src_im[0] + src_im[2];
107+
float s1_im = src_im[0] - src_im[2];
108+
float s2_im = src_im[1] + src_im[3];
109+
float s3_im = src_im[1] - src_im[3];
110+
111+
dst_re[0] = (s0_re + s2_re) * 0.25f;
112+
dst_re[1] = (s1_re - s3_im) * 0.25f;
113+
dst_re[2] = (s0_re - s2_re) * 0.25f;
114+
dst_re[3] = (s1_re + s3_im) * 0.25f;
115+
116+
dst_im[0] = (s0_im + s2_im) * 0.25f;
117+
dst_im[1] = (s1_im + s3_re) * 0.25f;
118+
dst_im[2] = (s0_im - s2_im) * 0.25f;
119+
dst_im[3] = (s1_im - s3_re) * 0.25f;
120120
}
121121
else if (rank == 1)
122122
{
@@ -256,7 +256,8 @@ namespace lsp
256256

257257
packed_unscramble_reverse(dst, rank);
258258
}
259-
}
260-
}
259+
260+
} /* namespace neon_d32 */
261+
} /* namespace lsp */
261262

262263
#endif /* PRIVATE_DSP_ARCH_ARM_NEON_D32_FFT_H_ */

include/private/dsp/arch/x86/avx/fft.h

+29-28
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2025 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2025 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -67,15 +67,15 @@ namespace lsp
6767
{
6868
if (rank == 2)
6969
{
70-
float s0_re = src_re[0] + src_re[1];
71-
float s1_re = src_re[0] - src_re[1];
72-
float s2_re = src_re[2] + src_re[3];
73-
float s3_re = src_re[2] - src_re[3];
70+
float s0_re = src_re[0] + src_re[2];
71+
float s1_re = src_re[0] - src_re[2];
72+
float s2_re = src_re[1] + src_re[3];
73+
float s3_re = src_re[1] - src_re[3];
7474

75-
float s0_im = src_im[0] + src_im[1];
76-
float s1_im = src_im[0] - src_im[1];
77-
float s2_im = src_im[2] + src_im[3];
78-
float s3_im = src_im[2] - src_im[3];
75+
float s0_im = src_im[0] + src_im[2];
76+
float s1_im = src_im[0] - src_im[2];
77+
float s2_im = src_im[1] + src_im[3];
78+
float s3_im = src_im[1] - src_im[3];
7979

8080
dst_re[0] = s0_re + s2_re;
8181
dst_re[1] = s1_re + s3_im;
@@ -109,25 +109,25 @@ namespace lsp
109109
{
110110
if (rank == 2)
111111
{
112-
float s0_re = src_re[0] + src_re[1];
113-
float s1_re = src_re[0] - src_re[1];
114-
float s2_re = src_re[2] + src_re[3];
115-
float s3_re = src_re[2] - src_re[3];
112+
float s0_re = src_re[0] + src_re[2];
113+
float s1_re = src_re[0] - src_re[2];
114+
float s2_re = src_re[1] + src_re[3];
115+
float s3_re = src_re[1] - src_re[3];
116116

117-
float s0_im = src_im[0] + src_im[1];
118-
float s1_im = src_im[0] - src_im[1];
119-
float s2_im = src_im[2] + src_im[3];
120-
float s3_im = src_im[2] - src_im[3];
117+
float s0_im = src_im[0] + src_im[2];
118+
float s1_im = src_im[0] - src_im[2];
119+
float s2_im = src_im[1] + src_im[3];
120+
float s3_im = src_im[1] - src_im[3];
121121

122-
dst_re[0] = s0_re + s2_re;
123-
dst_re[1] = s1_re + s3_im;
124-
dst_re[2] = s0_re - s2_re;
125-
dst_re[3] = s1_re - s3_im;
122+
dst_re[0] = (s0_re + s2_re) * 0.25f;
123+
dst_re[1] = (s1_re - s3_im) * 0.25f;
124+
dst_re[2] = (s0_re - s2_re) * 0.25f;
125+
dst_re[3] = (s1_re + s3_im) * 0.25f;
126126

127-
dst_im[0] = s0_im + s2_im;
128-
dst_im[1] = s1_im - s3_re;
129-
dst_im[2] = s0_im - s2_im;
130-
dst_im[3] = s1_im + s3_re;
127+
dst_im[0] = (s0_im + s2_im) * 0.25f;
128+
dst_im[1] = (s1_im + s3_re) * 0.25f;
129+
dst_im[2] = (s0_im - s2_im) * 0.25f;
130+
dst_im[3] = (s1_im - s3_re) * 0.25f;
131131
}
132132
else if (rank == 1)
133133
{
@@ -270,5 +270,6 @@ namespace lsp
270270

271271
dsp::normalize_fft2(dst_re, dst_im, rank);
272272
}
273-
}
274-
}
273+
274+
} /* namespace avx */
275+
} /* namespace lsp */

include/private/dsp/arch/x86/avx2/fft/normalize.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ namespace lsp
120120
"%xmm4", "%xmm5", "%xmm6", "%xmm7"
121121
);
122122
}
123-
}
124-
}
123+
} /* namespace avx2 */
124+
} /* namespace lsp */
125125

126126
#endif /* PRIVATE_DSP_ARCH_X86_AVX2_FFT_NORMALIZE_H_ */

0 commit comments

Comments
 (0)