Skip to content

Commit 7db5651

Browse files
committed
Release 1.0.28
* The DSP library now builds for Apple M1 chips and above on MacOS. * Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions. * Implemented sign_min, sign_max, sign_minmax functions. * Updated build scripts. * Updated module versions in dependencies.
2 parents cfe499f + 32362fa commit 7db5651

File tree

192 files changed

+11106
-1853
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

192 files changed

+11106
-1853
lines changed

CHANGELOG

+7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
* RECENT CHANGES
33
*******************************************************************************
44

5+
=== 1.0.28 ===
6+
* The DSP library now builds for Apple M1 chips and above on MacOS.
7+
* Implemented abs_max2, abs_min2, abs_max3 and abs_min3 functions.
8+
* Implemented sign_min, sign_max, sign_minmax functions.
9+
* Updated build scripts.
10+
* Updated module versions in dependencies.
11+
512
=== 1.0.27 ===
613
* Updated build scripts.
714
* Updated module versions in dependencies.

README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ This library provides set of functions that perform SIMD-optimized
66
computing on several hardware architectures.
77

88
Currently supported set of SIMD extensions:
9-
* i586 architecture (32-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and AVX512;
10-
* x86_64 architecture (64-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and AVX512;
9+
* i586 architecture (32-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and partial support of AVX512;
10+
* x86_64 architecture (64-bit): SSE, SSE2, SSE3, AVX, AVX2, FMA3 and partial support of AVX512;
1111
* armv7 architecture (32-bit): NEON;
1212
* AArch64 architecture (64-bit): ASIMD.
1313

@@ -37,15 +37,16 @@ Current set of functions provided:
3737
The build and correct unit test execution has been confirmed for following platforms:
3838
* FreeBSD
3939
* GNU/Linux
40+
* MacOS
4041
* OpenBSD
4142
* Windows 32-bit
4243
* Windows 64-bit
4344

4445
## Supported architectures
4546

4647
The support of following list of hardware architectures has been implemented:
47-
* i386 (32-bit) - full support (AVX-512 on the way).
48-
* x86_64 (64-bit) - full support (AVX-512 on the way).
48+
* i386 (32-bit) - full support (SSE1-SSE3, AVX, AVX2, partial support for AVX-512).
49+
* x86_64 (64-bit) - full support (SSE1-SSE3, AVX, AVX2, partial support for AVX-512).
4950
* ARMv6A - full support.
5051
* ARMv7A - full support.
5152
* AArch64 - full support.

include/lsp-plug.in/dsp/common/pan.h

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
4+
*
5+
* This file is part of lsp-dsp-lib
6+
* Created on: 10 нояб. 2024 г.
7+
*
8+
* lsp-dsp-lib is free software: you can redistribute it and/or modify
9+
* it under the terms of the GNU Lesser General Public License as published by
10+
* the Free Software Foundation, either version 3 of the License, or
11+
* any later version.
12+
*
13+
* lsp-dsp-lib is distributed in the hope that it will be useful,
14+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
* GNU Lesser General Public License for more details.
17+
*
18+
* You should have received a copy of the GNU Lesser General Public License
19+
* along with lsp-dsp-lib. If not, see <https://www.gnu.org/licenses/>.
20+
*/
21+
22+
#ifndef LSP_PLUG_IN_DSP_COMMON_PAN_H_
23+
#define LSP_PLUG_IN_DSP_COMMON_PAN_H_
24+
25+
#include <lsp-plug.in/dsp/common/types.h>
26+
27+
LSP_DSP_LIB_BEGIN_NAMESPACE
28+
29+
#pragma pack(push, 1)
30+
31+
/**
32+
* Definition for the panorama calulation function (parallel form)
33+
*
34+
* @param dst destination buffer to store value
35+
* @param l left channel data
36+
* @param r right channel data
37+
* @param dfl default value if it is not possible to compute panorama
38+
* @param count number of samples to process
39+
*/
40+
typedef void (* LSP_DSP_LIB_TYPE(depan_t))(float *dst, const float *l, const float *r, float dfl, size_t count);
41+
42+
#pragma pack(pop)
43+
44+
LSP_DSP_LIB_END_NAMESPACE
45+
46+
/**
47+
* Calculate the linear pan law panorama position between left and right channels (parallel form):
48+
* pan = abs(R) / (abs(L) + abs(R))
49+
*
50+
* @param dst destination buffer to store value
51+
* @param l left channel data
52+
* @param r right channel data
53+
* @param dfl default value if it is not possible to compute panorama
54+
* @param count number of samples to process
55+
*/
56+
LSP_DSP_LIB_SYMBOL(void, depan_lin, float *dst, const float *l, const float *r, float dfl, size_t count);
57+
58+
/**
59+
* Calculate the equal power pan law (quadratic) panorama position between left and right channels (parallel form):
60+
* pan = R^2 / (L^2 + R^2)
61+
*
62+
* @param dst destination buffer to store value
63+
* @param l left channel data
64+
* @param r right channel data
65+
* @param dfl default value if it is not possible to compute panorama
66+
* @param count number of samples to process
67+
*/
68+
LSP_DSP_LIB_SYMBOL(void, depan_eqpow, float *dst, const float *l, const float *r, float dfl, size_t count);
69+
70+
#endif /* LSP_PLUG_IN_DSP_COMMON_PAN_H_ */

include/lsp-plug.in/dsp/common/pcomplex.h

+9
Original file line numberDiff line numberDiff line change
@@ -238,4 +238,13 @@ LSP_DSP_LIB_SYMBOL(void, pcomplex_r2c_div2, float *dst, const float *src, size_t
238238
*/
239239
LSP_DSP_LIB_SYMBOL(void, pcomplex_r2c_rdiv2, float *dst, const float *src, size_t count);
240240

241+
/** Compute complex correlation between two sources and store to the result array
242+
*
243+
* @param dst_corr array to store normalized correlation
244+
* @param src1 set of complex numbers
245+
* @param src2 set of complex numbers
246+
* @param count count number of elements to process
247+
*/
248+
LSP_DSP_LIB_SYMBOL(void, pcomplex_corr, float *dst_corr, const float *src1, const float *src2, size_t count);
249+
241250
#endif /* LSP_PLUG_IN_DSP_COMMON_PCOMPLEX_H_ */

include/lsp-plug.in/dsp/common/pmath/abs_vv.h

+34
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,22 @@ LSP_DSP_LIB_SYMBOL(void, abs_div2, float *dst, const float *src, size_t count);
8787
*/
8888
LSP_DSP_LIB_SYMBOL(void, abs_rdiv2, float *dst, const float *src, size_t count);
8989

90+
/** Calculate absolute values: dst[i] = max(abs(src[i]), dst[i])
91+
*
92+
* @param dst destination vector
93+
* @param src source vector
94+
* @param count number of elements
95+
*/
96+
LSP_DSP_LIB_SYMBOL(void, abs_max2, float *dst, const float *src, size_t count);
97+
98+
/** Calculate absolute values: dst[i] = min(abs(src[i]), dst[i])
99+
*
100+
* @param dst destination vector
101+
* @param src source vector
102+
* @param count number of elements
103+
*/
104+
LSP_DSP_LIB_SYMBOL(void, abs_min2, float *dst, const float *src, size_t count);
105+
90106
/** Calculate absolute values: dst[i] = src1[i] + abs(src2[i])
91107
*
92108
* @param dst destination vector
@@ -141,4 +157,22 @@ LSP_DSP_LIB_SYMBOL(void, abs_div3, float *dst, const float *src1, const float *s
141157
*/
142158
LSP_DSP_LIB_SYMBOL(void, abs_rdiv3, float *dst, const float *src1, const float *src2, size_t count);
143159

160+
/** Calculate absolute values: dst[i] = max(src1[i], abs(src2[i]))
161+
*
162+
* @param dst destination vector
163+
* @param src1 source vector 1
164+
* @param src2 source vector 2
165+
* @param count number of elements
166+
*/
167+
LSP_DSP_LIB_SYMBOL(void, abs_max3, float *dst, const float *src1, const float *src2, size_t count);
168+
169+
/** Calculate absolute values: dst[i] = min(src1[i], abs(src2[i]))
170+
*
171+
* @param dst destination vector
172+
* @param src1 source vector 1
173+
* @param src2 source vector 2
174+
* @param count number of elements
175+
*/
176+
LSP_DSP_LIB_SYMBOL(void, abs_min3, float *dst, const float *src1, const float *src2, size_t count);
177+
144178
#endif /* LSP_PLUG_IN_DSP_COMMON_PMATH_ABS_VV_H_ */

include/lsp-plug.in/dsp/common/search/minmax.h

+34-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2020 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2020 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -48,6 +48,14 @@ LSP_DSP_LIB_SYMBOL(float, max, const float *src, size_t count);
4848
*/
4949
LSP_DSP_LIB_SYMBOL(float, abs_max, const float *src, size_t count);
5050

51+
/** Get maximum ignoring sign: result = src[i] : abs(src[i]) -> max
52+
*
53+
* @param src source array
54+
* @param count number of elements
55+
* @return result
56+
*/
57+
LSP_DSP_LIB_SYMBOL(float, sign_max, const float *src, size_t count);
58+
5159
/** Get absolute minimum: result = min { abs(src[i]) }
5260
*
5361
* @param src source array
@@ -56,9 +64,19 @@ LSP_DSP_LIB_SYMBOL(float, abs_max, const float *src, size_t count);
5664
*/
5765
LSP_DSP_LIB_SYMBOL(float, abs_min, const float *src, size_t count);
5866

67+
/** Get maximum ignoring sign: result = src[i] : abs(src[i]) -> min
68+
*
69+
* @param src source array
70+
* @param count number of elements
71+
* @return result
72+
*/
73+
LSP_DSP_LIB_SYMBOL(float, sign_min, const float *src, size_t count);
74+
5975
/** Calculate min { src }, max { src }
6076
*
6177
* @param src source vector
78+
* @param min pointer to store minimum value
79+
* @param max pointer to store maximum value
6280
* @param count number of elements
6381
* @return maximum value
6482
*/
@@ -67,9 +85,23 @@ LSP_DSP_LIB_SYMBOL(void, minmax, const float *src, size_t count, float *min, flo
6785
/** Calculate min { abs(src) }, max { abs(src) }
6886
*
6987
* @param src source vector
88+
* @param min pointer to store minimum value
89+
* @param max pointer to store maximum value
7090
* @param count number of elements
7191
* @return maximum value
7292
*/
7393
LSP_DSP_LIB_SYMBOL(void, abs_minmax, const float *src, size_t count, float *min, float *max);
7494

95+
/** Calculate:
96+
* min = src[i] : abs(src[i]) -> min,
97+
* max = src[i] : abs(src[i]) -> max
98+
*
99+
* @param src source vector
100+
* @param min pointer to store minimum value
101+
* @param max pointer to store maximum value
102+
* @param count number of elements
103+
* @return maximum value
104+
*/
105+
LSP_DSP_LIB_SYMBOL(void, sign_minmax, const float *src, size_t count, float *min, float *max);
106+
75107
#endif /* LSP_PLUG_IN_DSP_COMMON_SEARCH_MINMAX_H_ */

include/lsp-plug.in/dsp/dsp.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
2-
* Copyright (C) 2023 Linux Studio Plugins Project <https://lsp-plug.in/>
3-
* (C) 2023 Vladimir Sadovnikov <[email protected]>
2+
* Copyright (C) 2024 Linux Studio Plugins Project <https://lsp-plug.in/>
3+
* (C) 2024 Vladimir Sadovnikov <[email protected]>
44
*
55
* This file is part of lsp-dsp-lib
66
* Created on: 31 мар. 2020 г.
@@ -58,6 +58,7 @@
5858
#include <lsp-plug.in/dsp/common/graphics.h>
5959
#include <lsp-plug.in/dsp/common/hmath.h>
6060
#include <lsp-plug.in/dsp/common/mix.h>
61+
#include <lsp-plug.in/dsp/common/pan.h>
6162
#include <lsp-plug.in/dsp/common/msmatrix.h>
6263
#include <lsp-plug.in/dsp/common/pcomplex.h>
6364
#include <lsp-plug.in/dsp/common/pmath.h>

include/lsp-plug.in/dsp/version.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
// Define version of headers
2626
#define LSP_DSP_LIB_MAJOR 1
2727
#define LSP_DSP_LIB_MINOR 0
28-
#define LSP_DSP_LIB_MICRO 27
28+
#define LSP_DSP_LIB_MICRO 28
2929

3030
#if defined(__WINDOWS__) || defined(__WIN32__) || defined(__WIN64__) || defined(_WIN64) || defined(_WIN32) || defined(__WINNT) || defined(__WINNT__)
3131
#define LSP_DSP_LIB_EXPORT_MODIFIER __declspec(dllexport)

include/private/dsp/arch/aarch64/asimd/correlation.h

+10-10
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ namespace lsp
139139

140140
static const float corr_const[] __lsp_aligned16 =
141141
{
142-
LSP_DSP_VEC8(1e-10f)
142+
LSP_DSP_VEC8(1e-18f)
143143
};
144144

145145
void corr_incr(dsp::correlation_t *corr, float *dst,
@@ -213,9 +213,9 @@ namespace lsp
213213
__ASM_EMIT("dup v0.4s, v9.s[3]") /* v0 = xv' = T[7] */
214214
__ASM_EMIT("dup v1.4s, v5.s[3]") /* v1 = xa' = BA[7] */
215215
__ASM_EMIT("dup v2.4s, v7.s[3]") /* v2 = xb' = BB[7] */
216-
__ASM_EMIT("ldp q14, q15, [%[CORR_CC]]") /* v14 = 1e-10, v15 = 1e-10 */
216+
__ASM_EMIT("ldp q14, q15, [%[CORR_CC]]") /* v14 = threshold, v15 = threshold */
217217

218-
__ASM_EMIT("fcmge v14.4s, v8.4s, v14.4s") /* v14 = T >= 1e-10 */
218+
__ASM_EMIT("fcmge v14.4s, v8.4s, v14.4s") /* v14 = T >= threshold */
219219
__ASM_EMIT("fcmge v15.4s, v9.4s, v15.4s")
220220
__ASM_EMIT("frsqrte v4.4s, v10.4s") /* v4 = x0 */
221221
__ASM_EMIT("frsqrte v5.4s, v11.4s")
@@ -233,7 +233,7 @@ namespace lsp
233233
__ASM_EMIT("fmul v11.4s, v5.4s, v13.4s")
234234
__ASM_EMIT("fmul v10.4s, v8.4s, v10.4s") /* v10 = T/svrtf(B) */
235235
__ASM_EMIT("fmul v11.4s, v9.4s, v11.4s")
236-
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= 1e-10) ? T/svrt(B) : 0 */
236+
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= threshold) ? T/svrt(B) : 0 */
237237
__ASM_EMIT("and v11.16b, v11.16b, v15.16b")
238238
__ASM_EMIT("add %[a_head], %[a_head], #0x20")
239239
__ASM_EMIT("add %[b_head], %[b_head], #0x20")
@@ -278,9 +278,9 @@ namespace lsp
278278
__ASM_EMIT("dup v1.4s, v4.s[3]") /* v1 = xa' = BA[7] */
279279
__ASM_EMIT("dup v2.4s, v6.s[3]") /* v2 = xb' = BB[7] */
280280
__ASM_EMIT("dup v0.4s, v8.s[3]") /* v0 = xv' = T[7] */
281-
__ASM_EMIT("ldr q14, [%[CORR_CC]]") /* v14 = 1e-10 */
281+
__ASM_EMIT("ldr q14, [%[CORR_CC]]") /* v14 = threshold */
282282

283-
__ASM_EMIT("fcmge v14.4s, v8.4s, v14.4s") /* v14 = T >= 1e-10 */
283+
__ASM_EMIT("fcmge v14.4s, v8.4s, v14.4s") /* v14 = T >= threshold */
284284
__ASM_EMIT("frsqrte v4.4s, v10.4s") /* v4 = x0 */
285285
__ASM_EMIT("fmul v6.4s, v4.4s, v10.4s") /* v6 = R * x0 */
286286
__ASM_EMIT("frsqrts v12.4s, v6.4s, v4.4s") /* v12 = (3 - R * x0 * x0) / 2 */
@@ -289,7 +289,7 @@ namespace lsp
289289
__ASM_EMIT("frsqrts v12.4s, v6.4s, v4.4s") /* v12 = (3 - R * x1 * x1) / 2 */
290290
__ASM_EMIT("fmul v10.4s, v4.4s, v12.4s") /* v10 = 1/svrtf(B) = x2 = x1 * (3 - R * x1 * x1) / 2 */
291291
__ASM_EMIT("fmul v10.4s, v8.4s, v10.4s") /* v10 = T/svrtf(B) */
292-
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= 1e-10) ? T/svrt(B) : 0 */
292+
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= threshold) ? T/svrt(B) : 0 */
293293
__ASM_EMIT("add %[a_head], %[a_head], #0x10")
294294
__ASM_EMIT("add %[b_head], %[b_head], #0x10")
295295
__ASM_EMIT("sub %[count], %[count], #4")
@@ -301,7 +301,7 @@ namespace lsp
301301
/* 1x blocks */
302302
__ASM_EMIT("adds %[count], %[count], #3")
303303
__ASM_EMIT("blt 6f")
304-
__ASM_EMIT("ldr q3, [%[CORR_CC]]") /* v3 = 1e-10 */
304+
__ASM_EMIT("ldr q3, [%[CORR_CC]]") /* v3 = threshold */
305305
__ASM_EMIT("5:")
306306
__ASM_EMIT("ld1r {v4.4s}, [%[a_head]]") /* v4 = ah0 */
307307
__ASM_EMIT("ld1r {v6.4s}, [%[b_head]]") /* v6 = bh0 */
@@ -319,7 +319,7 @@ namespace lsp
319319
__ASM_EMIT("fadd v0.4s, v12.4s, v0.4s") /* v0 = T = xv + DV */
320320
__ASM_EMIT("fmul v10.4s, v1.4s, v2.4s") /* v10 = B = BA * BB */
321321

322-
__ASM_EMIT("fcmge v14.4s, v0.4s, v3.4s") /* v14 = T >= 1e-10 */
322+
__ASM_EMIT("fcmge v14.4s, v0.4s, v3.4s") /* v14 = T >= threshold */
323323
__ASM_EMIT("frsqrte v4.4s, v10.4s") /* v4 = x0 */
324324
__ASM_EMIT("fmul v6.4s, v4.4s, v10.4s") /* v6 = R * x0 */
325325
__ASM_EMIT("frsqrts v12.4s, v6.4s, v4.4s") /* v12 = (3 - R * x0 * x0) / 2 */
@@ -328,7 +328,7 @@ namespace lsp
328328
__ASM_EMIT("frsqrts v12.4s, v6.4s, v4.4s") /* v12 = (3 - R * x1 * x1) / 2 */
329329
__ASM_EMIT("fmul v10.4s, v4.4s, v12.4s") /* v10 = 1/svrtf(B) = x2 = x1 * (3 - R * x1 * x1) / 2 */
330330
__ASM_EMIT("fmul v10.4s, v0.4s, v10.4s") /* v10 = T/svrtf(B) */
331-
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= 1e-10) ? T/svrt(B) : 0 */
331+
__ASM_EMIT("and v10.16b, v10.16b, v14.16b") /* v10 = (T >= threshold) ? T/svrt(B) : 0 */
332332
__ASM_EMIT("add %[a_head], %[a_head], #0x04")
333333
__ASM_EMIT("add %[b_head], %[b_head], #0x04")
334334
__ASM_EMIT("subs %[count], %[count], #1")

0 commit comments

Comments
 (0)