Skip to content

Commit 1fa9e56

Browse files
authored
Merge pull request #952 from soyersoyer/reverbspeed
Mixing/reverb speed improvements
2 parents 81e66a7 + d5ddae6 commit 1fa9e56

File tree

5 files changed

+174
-103
lines changed

5 files changed

+174
-103
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \
1010
mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \
1111
sysexfileloader.o performanceconfig.o perftimer.o \
1212
effect_platervbstereo.o uibuttons.o midipin.o \
13-
arm_float_to_q23.o \
13+
arm_float_to_q23.o arm_scale_zip_f32.o \
1414
net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o
1515

1616
OPTIMIZE = -O3

src/arm_scale_zip_f32.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#include "arm_scale_zip_f32.h"
2+
3+
/**
4+
Scale two vectors and zip after. For floating-point data, the algorithm used is:
5+
6+
<pre>
7+
pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale 0 <= n < blockSize.
8+
</pre>
9+
10+
*/
11+
12+
/**
13+
* @brief Scale two floating-point vector with a scalar and zip after.
14+
* @param[in] pSrc1 points to the input vector 1
15+
* @param[in] pSrc2 points to the input vector 2
16+
* @param[in] scale scale scalar
17+
* @param[out] pDst points to the output vector
18+
* @param[in] blockSize number of samples in the vector
19+
*/
20+
21+
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
22+
void arm_scale_zip_f32(
23+
const float32_t * pSrc1,
24+
const float32_t * pSrc2,
25+
float32_t scale,
26+
float32_t * pDst,
27+
uint32_t blockSize)
28+
{
29+
uint32_t blkCnt; /* Loop counter */
30+
31+
f32x2x2_t res;
32+
33+
/* Compute 2 outputs at a time */
34+
blkCnt = blockSize >> 1U;
35+
36+
while (blkCnt > 0U)
37+
{
38+
res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale);
39+
res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale);
40+
vst2_f32(pDst, res);
41+
42+
/* Increment pointers */
43+
pSrc1 += 2;
44+
pSrc2 += 2;
45+
pDst += 4;
46+
47+
/* Decrement the loop counter */
48+
blkCnt--;
49+
}
50+
51+
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
52+
** No loop unrolling is used. */
53+
blkCnt = blockSize & 1;
54+
55+
while (blkCnt > 0U)
56+
{
57+
*pDst++ = *pSrc1++ * scale;
58+
*pDst++ = *pSrc2++ * scale;
59+
60+
/* Decrement the loop counter */
61+
blkCnt--;
62+
}
63+
}
64+
#else
65+
void arm_scale_zip_f32(
66+
const float32_t * pSrc1,
67+
const float32_t * pSrc2,
68+
float32_t scale,
69+
float32_t * pDst,
70+
uint32_t blockSize)
71+
{
72+
uint32_t blkCnt; /* Loop counter */
73+
74+
blkCnt = blockSize;
75+
76+
while (blkCnt > 0U)
77+
{
78+
*pDst++ = *pSrc1++ * scale;
79+
*pDst++ = *pSrc2++ * scale;
80+
81+
/* Decrement the loop counter */
82+
blkCnt--;
83+
}
84+
}
85+
#endif

src/arm_scale_zip_f32.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#pragma once
2+
3+
#include "arm_math_types.h"
4+
5+
#ifdef __cplusplus
6+
extern "C"
7+
{
8+
#endif
9+
10+
/**
11+
* @brief Scale two floating-point vector with a scalar and zip after.
12+
* @param[in] pSrc1 points to the input vector 1
13+
* @param[in] pSrc2 points to the input vector 2
14+
* @param[in] scale scale scalar
15+
* @param[out] pDst points to the output vector
16+
* @param[in] blockSize number of samples in the vector
17+
*/
18+
void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize);
19+
20+
#ifdef __cplusplus
21+
}
22+
#endif

src/effect_mixer.hpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -124,31 +124,10 @@ template <int NN> class AudioStereoMixer : public AudioMixer<NN>
124124
assert(in);
125125

126126
// left
127-
arm_scale_f32(in, panorama[channel][0], tmp, buffer_length);
128-
if(multiplier[channel]!=UNITY_GAIN)
129-
arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
130-
arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
131-
// right
132-
arm_scale_f32(in, panorama[channel][1], tmp, buffer_length);
133-
if(multiplier[channel]!=UNITY_GAIN)
134-
arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
135-
arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
136-
}
137-
138-
void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR)
139-
{
140-
float32_t tmp[buffer_length];
141-
142-
assert(inL);
143-
assert(inR);
144-
145-
// left
146-
if(multiplier[channel]!=UNITY_GAIN)
147-
arm_scale_f32(inL,multiplier[channel],tmp,buffer_length);
127+
arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length);
148128
arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
149129
// right
150-
if(multiplier[channel]!=UNITY_GAIN)
151-
arm_scale_f32(inR,multiplier[channel],tmp,buffer_length);
130+
arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length);
152131
arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
153132
}
154133

@@ -168,6 +147,20 @@ template <int NN> class AudioStereoMixer : public AudioMixer<NN>
168147
arm_fill_f32(0.0f, sumbufR, buffer_length);
169148
}
170149

150+
void getBuffers(float32_t (*buffers[2]))
151+
{
152+
buffers[0] = sumbufL;
153+
buffers[1] = sumbufR;
154+
}
155+
156+
void zeroFill()
157+
{
158+
if(sumbufL)
159+
arm_fill_f32(0.0f, sumbufL, buffer_length);
160+
if(sumbufR)
161+
arm_fill_f32(0.0f, sumbufR, buffer_length);
162+
}
163+
171164
protected:
172165
using AudioMixer<NN>::sumbufL;
173166
using AudioMixer<NN>::multiplier;

src/minidexed.cpp

Lines changed: 50 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <stdio.h>
3131
#include <assert.h>
3232
#include "arm_float_to_q23.h"
33+
#include "arm_scale_zip_f32.h"
3334

3435
const char WLANFirmwarePath[] = "SD:firmware/";
3536
const char WLANConfigFile[] = "SD:wpa_supplicant.conf";
@@ -1354,33 +1355,20 @@ void CMiniDexed::ProcessSound (void)
13541355
float32_t tmp_float[nFrames*Channels];
13551356
int32_t tmp_int[nFrames*Channels];
13561357

1357-
if(nMasterVolume > 0.0)
1358+
// Convert dual float array (8 chan) to single int16 array (8 chan)
1359+
for(uint16_t i=0; i<nFrames;i++)
13581360
{
1359-
// Convert dual float array (8 chan) to single int16 array (8 chan)
1360-
for(uint16_t i=0; i<nFrames;i++)
1361+
// TGs will alternate on L/R channels for each output
1362+
// reading directly from the TG OutputLevel buffer with
1363+
// no additional processing.
1364+
for (uint8_t tg = 0; tg < Channels; tg++)
13611365
{
1362-
// TGs will alternate on L/R channels for each output
1363-
// reading directly from the TG OutputLevel buffer with
1364-
// no additional processing.
1365-
for (uint8_t tg = 0; tg < Channels; tg++)
1366-
{
1367-
if(nMasterVolume >0.0 && nMasterVolume <1.0)
1368-
{
1369-
tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
1370-
}
1371-
else if(nMasterVolume == 1.0)
1372-
{
1373-
tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i];
1374-
}
1375-
}
1366+
tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
13761367
}
1377-
arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
1378-
}
1379-
else
1380-
{
1381-
arm_fill_q31(0, tmp_int, nFrames*Channels);
13821368
}
13831369

1370+
arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
1371+
13841372
// Prevent PCM510x analog mute from kicking in
13851373
for (uint8_t tg = 0; tg < Channels; tg++)
13861374
{
@@ -1404,77 +1392,60 @@ void CMiniDexed::ProcessSound (void)
14041392
float32_t tmp_float[nFrames*2];
14051393
int32_t tmp_int[nFrames*2];
14061394

1407-
if(nMasterVolume > 0.0)
1408-
{
1409-
for (uint8_t i = 0; i < m_nToneGenerators; i++)
1410-
{
1411-
tg_mixer->doAddMix(i,m_OutputLevel[i]);
1412-
reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
1413-
}
1414-
// END TG mixing
1395+
// get the mix buffer of all TGs
1396+
float32_t *SampleBuffer[2];
1397+
tg_mixer->getBuffers(SampleBuffer);
14151398

1416-
// BEGIN create SampleBuffer for holding audio data
1417-
float32_t SampleBuffer[2][nFrames];
1418-
// END create SampleBuffer for holding audio data
1399+
tg_mixer->zeroFill();
14191400

1420-
// get the mix of all TGs
1421-
tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]);
1401+
for (uint8_t i = 0; i < m_nToneGenerators; i++)
1402+
{
1403+
tg_mixer->doAddMix(i,m_OutputLevel[i]);
1404+
}
1405+
// END TG mixing
14221406

1423-
// BEGIN adding reverb
1424-
if (m_nParameter[ParameterReverbEnable])
1425-
{
1426-
float32_t ReverbBuffer[2][nFrames];
1427-
float32_t ReverbSendBuffer[2][nFrames];
1407+
// BEGIN adding reverb
1408+
if (m_nParameter[ParameterReverbEnable])
1409+
{
1410+
float32_t ReverbBuffer[2][nFrames];
14281411

1429-
arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames);
1430-
arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames);
1431-
arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames);
1432-
arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames);
1412+
float32_t *ReverbSendBuffer[2];
1413+
reverb_send_mixer->getBuffers(ReverbSendBuffer);
14331414

1434-
m_ReverbSpinLock.Acquire ();
1415+
reverb_send_mixer->zeroFill();
14351416

1436-
reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]);
1437-
reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
1417+
for (uint8_t i = 0; i < m_nToneGenerators; i++)
1418+
{
1419+
reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
1420+
}
14381421

1439-
// scale down and add left reverb buffer by reverb level
1440-
arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
1441-
arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
1442-
// scale down and add right reverb buffer by reverb level
1443-
arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
1444-
arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
1422+
m_ReverbSpinLock.Acquire ();
14451423

1446-
m_ReverbSpinLock.Release ();
1447-
}
1448-
// END adding reverb
1424+
reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
14491425

1450-
// swap stereo channels if needed prior to writing back out
1451-
if (m_bChannelsSwapped)
1452-
{
1453-
indexL=1;
1454-
indexR=0;
1455-
}
1426+
// scale down and add left reverb buffer by reverb level
1427+
arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
1428+
arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
1429+
// scale down and add right reverb buffer by reverb level
1430+
arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
1431+
arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
14561432

1457-
// Convert dual float array (left, right) to single int16 array (left/right)
1458-
for(uint16_t i=0; i<nFrames;i++)
1459-
{
1460-
if(nMasterVolume >0.0 && nMasterVolume <1.0)
1461-
{
1462-
tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume;
1463-
tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume;
1464-
}
1465-
else if(nMasterVolume == 1.0)
1466-
{
1467-
tmp_float[i*2]=SampleBuffer[indexL][i];
1468-
tmp_float[(i*2)+1]=SampleBuffer[indexR][i];
1469-
}
1470-
}
1471-
arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
1433+
m_ReverbSpinLock.Release ();
14721434
}
1473-
else
1435+
// END adding reverb
1436+
1437+
// swap stereo channels if needed prior to writing back out
1438+
if (m_bChannelsSwapped)
14741439
{
1475-
arm_fill_q31(0, tmp_int, nFrames * 2);
1440+
indexL=1;
1441+
indexR=0;
14761442
}
14771443

1444+
// Convert dual float array (left, right) to single int16 array (left/right)
1445+
arm_scale_zip_f32(SampleBuffer[indexL], SampleBuffer[indexR], nMasterVolume, tmp_float, nFrames);
1446+
1447+
arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
1448+
14781449
// Prevent PCM510x analog mute from kicking in
14791450
if (tmp_int[nFrames * 2 - 1] == 0)
14801451
{

0 commit comments

Comments
 (0)