mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2024-12-27 08:34:32 +01:00
1088 lines
34 KiB
C++
1088 lines
34 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
///
|
|
/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
|
|
/// while maintaining the original pitch by using a time domain WSOLA-like
|
|
/// method with several performance-increasing tweaks.
|
|
///
|
|
/// Notes : MMX optimized functions reside in a separate, platform-specific
|
|
/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'.
|
|
///
|
|
/// This source file contains OpenMP optimizations that allow speeding up the
|
|
/// corss-correlation algorithm by executing it in several threads / CPU cores
|
|
/// in parallel. See the following article link for more detailed discussion
|
|
/// about SoundTouch OpenMP optimizations:
|
|
/// http://www.softwarecoven.com/parallel-computing-in-embedded-mobile-devices
|
|
///
|
|
/// Author : Copyright (c) Olli Parviainen
|
|
/// Author e-mail : oparviai 'at' iki.fi
|
|
/// SoundTouch WWW: http://www.surina.net/soundtouch
|
|
///
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// License :
|
|
//
|
|
// SoundTouch audio processing library
|
|
// Copyright (c) Olli Parviainen
|
|
//
|
|
// This library is free software; you can redistribute it and/or
|
|
// modify it under the terms of the GNU Lesser General Public
|
|
// License as published by the Free Software Foundation; either
|
|
// version 2.1 of the License, or (at your option) any later version.
|
|
//
|
|
// This library is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
// Lesser General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Lesser General Public
|
|
// License along with this library; if not, write to the Free Software
|
|
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
//
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include <float.h>
|
|
|
|
#include "STTypes.h"
|
|
#include "cpu_detect.h"
|
|
#include "TDStretch.h"
|
|
|
|
using namespace soundtouch;
|
|
|
|
#define max(x, y) (((x) > (y)) ? (x) : (y))
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Implementation of the class 'TDStretch'
|
|
*
|
|
*****************************************************************************/
|
|
|
|
|
|
TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
|
|
{
|
|
bQuickSeek = false;
|
|
channels = 2;
|
|
|
|
pMidBuffer = nullptr;
|
|
pMidBufferUnaligned = nullptr;
|
|
overlapLength = 0;
|
|
|
|
bAutoSeqSetting = true;
|
|
bAutoSeekSetting = true;
|
|
|
|
tempo = 1.0f;
|
|
setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
|
|
setTempo(1.0f);
|
|
|
|
clear();
|
|
}
|
|
|
|
|
|
|
|
TDStretch::~TDStretch()
|
|
{
|
|
delete[] pMidBufferUnaligned;
|
|
}
|
|
|
|
|
|
|
|
// Sets routine control parameters. These control are certain time constants
|
|
// defining how the sound is stretched to the desired duration.
|
|
//
|
|
// 'sampleRate' = sample rate of the sound
|
|
// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
|
|
// 'seekwindowMS' = seeking window length for scanning the best overlapping
|
|
// position (default = 28 ms)
|
|
// 'overlapMS' = overlapping length (default = 12 ms)
|
|
|
|
void TDStretch::setParameters(int aSampleRate, int aSequenceMS,
|
|
int aSeekWindowMS, int aOverlapMS)
|
|
{
|
|
// accept only positive parameter values - if zero or negative, use old values instead
|
|
if (aSampleRate > 0)
|
|
{
|
|
if (aSampleRate > 192000) ST_THROW_RT_ERROR("Error: Excessive samplerate");
|
|
this->sampleRate = aSampleRate;
|
|
}
|
|
|
|
if (aOverlapMS > 0) this->overlapMs = aOverlapMS;
|
|
|
|
if (aSequenceMS > 0)
|
|
{
|
|
this->sequenceMs = aSequenceMS;
|
|
bAutoSeqSetting = false;
|
|
}
|
|
else if (aSequenceMS == 0)
|
|
{
|
|
// if zero, use automatic setting
|
|
bAutoSeqSetting = true;
|
|
}
|
|
|
|
if (aSeekWindowMS > 0)
|
|
{
|
|
this->seekWindowMs = aSeekWindowMS;
|
|
bAutoSeekSetting = false;
|
|
}
|
|
else if (aSeekWindowMS == 0)
|
|
{
|
|
// if zero, use automatic setting
|
|
bAutoSeekSetting = true;
|
|
}
|
|
|
|
calcSeqParameters();
|
|
|
|
calculateOverlapLength(overlapMs);
|
|
|
|
// set tempo to recalculate 'sampleReq'
|
|
setTempo(tempo);
|
|
}
|
|
|
|
|
|
|
|
/// Get routine control parameters, see setParameters() function.
|
|
/// Any of the parameters to this function can be nullptr, in such case corresponding parameter
|
|
/// value isn't returned.
|
|
void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
|
|
{
|
|
if (pSampleRate)
|
|
{
|
|
*pSampleRate = sampleRate;
|
|
}
|
|
|
|
if (pSequenceMs)
|
|
{
|
|
*pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
|
|
}
|
|
|
|
if (pSeekWindowMs)
|
|
{
|
|
*pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
|
|
}
|
|
|
|
if (pOverlapMs)
|
|
{
|
|
*pOverlapMs = overlapMs;
|
|
}
|
|
}
|
|
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
|
void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
|
|
{
|
|
int i;
|
|
SAMPLETYPE m1, m2;
|
|
|
|
m1 = (SAMPLETYPE)0;
|
|
m2 = (SAMPLETYPE)overlapLength;
|
|
|
|
for (i = 0; i < overlapLength ; i ++)
|
|
{
|
|
pOutput[i] = (pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength;
|
|
m1 += 1;
|
|
m2 -= 1;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void TDStretch::clearMidBuffer()
|
|
{
|
|
memset(pMidBuffer, 0, channels * sizeof(SAMPLETYPE) * overlapLength);
|
|
}
|
|
|
|
|
|
void TDStretch::clearInput()
|
|
{
|
|
inputBuffer.clear();
|
|
clearMidBuffer();
|
|
isBeginning = true;
|
|
maxnorm = 0;
|
|
maxnormf = 1e8;
|
|
skipFract = 0;
|
|
}
|
|
|
|
|
|
// Clears the sample buffers
|
|
void TDStretch::clear()
|
|
{
|
|
outputBuffer.clear();
|
|
clearInput();
|
|
}
|
|
|
|
|
|
|
|
// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
|
|
// to enable
|
|
void TDStretch::enableQuickSeek(bool enable)
|
|
{
|
|
bQuickSeek = enable;
|
|
}
|
|
|
|
|
|
// Returns nonzero if the quick seeking algorithm is enabled.
|
|
bool TDStretch::isQuickSeekEnabled() const
|
|
{
|
|
return bQuickSeek;
|
|
}
|
|
|
|
|
|
// Seeks for the optimal overlap-mixing position.
|
|
int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
|
|
{
|
|
if (bQuickSeek)
|
|
{
|
|
return seekBestOverlapPositionQuick(refPos);
|
|
}
|
|
else
|
|
{
|
|
return seekBestOverlapPositionFull(refPos);
|
|
}
|
|
}
|
|
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
|
|
// of 'ovlPos'.
|
|
inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
|
|
{
|
|
#ifndef USE_MULTICH_ALWAYS
|
|
if (channels == 1)
|
|
{
|
|
// mono sound.
|
|
overlapMono(pOutput, pInput + ovlPos);
|
|
}
|
|
else if (channels == 2)
|
|
{
|
|
// stereo sound
|
|
overlapStereo(pOutput, pInput + 2 * ovlPos);
|
|
}
|
|
else
|
|
#endif // USE_MULTICH_ALWAYS
|
|
{
|
|
assert(channels > 0);
|
|
overlapMulti(pOutput, pInput + channels * ovlPos);
|
|
}
|
|
}
|
|
|
|
|
|
// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
|
|
// routine
|
|
//
|
|
// The best position is determined as the position where the two overlapped
|
|
// sample sequences are 'most alike', in terms of the highest cross-correlation
|
|
// value over the overlapping period
|
|
int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos)
|
|
{
|
|
int bestOffs;
|
|
double bestCorr;
|
|
int i;
|
|
double norm;
|
|
|
|
bestCorr = -FLT_MAX;
|
|
bestOffs = 0;
|
|
|
|
// Scans for the best correlation value by testing each possible position
|
|
// over the permitted range.
|
|
bestCorr = calcCrossCorr(refPos, pMidBuffer, norm);
|
|
bestCorr = (bestCorr + 0.1) * 0.75;
|
|
|
|
#pragma omp parallel for
|
|
for (i = 1; i < seekLength; i ++)
|
|
{
|
|
double corr;
|
|
// Calculates correlation value for the mixing position corresponding to 'i'
|
|
#if defined(_OPENMP) || defined(ST_SIMD_AVOID_UNALIGNED)
|
|
// in parallel OpenMP mode, can't use norm accumulator version as parallel executor won't
|
|
// iterate the loop in sequential order
|
|
// in SIMD mode, avoid accumulator version to allow avoiding unaligned positions
|
|
corr = calcCrossCorr(refPos + channels * i, pMidBuffer, norm);
|
|
#else
|
|
// In non-parallel version call "calcCrossCorrAccumulate" that is otherwise same
|
|
// as "calcCrossCorr", but saves time by reusing & updating previously stored
|
|
// "norm" value
|
|
corr = calcCrossCorrAccumulate(refPos + channels * i, pMidBuffer, norm);
|
|
#endif
|
|
// heuristic rule to slightly favour values close to mid of the range
|
|
double tmp = (double)(2 * i - seekLength) / (double)seekLength;
|
|
corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
|
|
|
|
// Checks for the highest correlation value
|
|
if (corr > bestCorr)
|
|
{
|
|
// For optimal performance, enter critical section only in case that best value found.
|
|
// in such case repeat 'if' condition as it's possible that parallel execution may have
|
|
// updated the bestCorr value in the mean time
|
|
#pragma omp critical
|
|
if (corr > bestCorr)
|
|
{
|
|
bestCorr = corr;
|
|
bestOffs = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
adaptNormalizer();
|
|
#endif
|
|
|
|
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
|
|
clearCrossCorrState();
|
|
|
|
return bestOffs;
|
|
}
|
|
|
|
|
|
// Quick seek algorithm for improved runtime-performance: First roughly scans through the
|
|
// correlation area, and then scan surroundings of two best preliminary correlation candidates
|
|
// with improved precision
|
|
//
|
|
// Based on testing:
|
|
// - This algorithm gives on average 99% as good match as the full algorithm
|
|
// - this quick seek algorithm finds the best match on ~90% of cases
|
|
// - on those 10% of cases when this algorithm doesn't find best match,
|
|
// it still finds on average ~90% match vs. the best possible match
|
|
int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos)
|
|
{
|
|
#define _MIN(a, b) (((a) < (b)) ? (a) : (b))
|
|
#define SCANSTEP 16
|
|
#define SCANWIND 8
|
|
|
|
int bestOffs;
|
|
int i;
|
|
int bestOffs2;
|
|
float bestCorr, corr;
|
|
float bestCorr2;
|
|
double norm;
|
|
|
|
// note: 'float' types used in this function in case that the platform would need to use software-fp
|
|
|
|
bestCorr =
|
|
bestCorr2 = -FLT_MAX;
|
|
bestOffs =
|
|
bestOffs2 = SCANWIND;
|
|
|
|
// Scans for the best correlation value by testing each possible position
|
|
// over the permitted range. Look for two best matches on the first pass to
|
|
// increase possibility of ideal match.
|
|
//
|
|
// Begin from "SCANSTEP" instead of SCANWIND to make the calculation
|
|
// catch the 'middlepoint' of seekLength vector as that's the a-priori
|
|
// expected best match position
|
|
//
|
|
// Roughly:
|
|
// - 15% of cases find best result directly on the first round,
|
|
// - 75% cases find better match on 2nd round around the best match from 1st round
|
|
// - 10% cases find better match on 2nd round around the 2nd-best-match from 1st round
|
|
for (i = SCANSTEP; i < seekLength - SCANWIND - 1; i += SCANSTEP)
|
|
{
|
|
// Calculates correlation value for the mixing position corresponding
|
|
// to 'i'
|
|
corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
|
|
// heuristic rule to slightly favour values close to mid of the seek range
|
|
float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
|
|
corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
|
|
|
|
// Checks for the highest correlation value
|
|
if (corr > bestCorr)
|
|
{
|
|
// found new best match. keep the previous best as 2nd best match
|
|
bestCorr2 = bestCorr;
|
|
bestOffs2 = bestOffs;
|
|
bestCorr = corr;
|
|
bestOffs = i;
|
|
}
|
|
else if (corr > bestCorr2)
|
|
{
|
|
// not new best, but still new 2nd best match
|
|
bestCorr2 = corr;
|
|
bestOffs2 = i;
|
|
}
|
|
}
|
|
|
|
// Scans surroundings of the found best match with small stepping
|
|
int end = _MIN(bestOffs + SCANWIND + 1, seekLength);
|
|
for (i = bestOffs - SCANWIND; i < end; i++)
|
|
{
|
|
if (i == bestOffs) continue; // this offset already calculated, thus skip
|
|
|
|
// Calculates correlation value for the mixing position corresponding
|
|
// to 'i'
|
|
corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
|
|
// heuristic rule to slightly favour values close to mid of the range
|
|
float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
|
|
corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
|
|
|
|
// Checks for the highest correlation value
|
|
if (corr > bestCorr)
|
|
{
|
|
bestCorr = corr;
|
|
bestOffs = i;
|
|
}
|
|
}
|
|
|
|
// Scans surroundings of the 2nd best match with small stepping
|
|
end = _MIN(bestOffs2 + SCANWIND + 1, seekLength);
|
|
for (i = bestOffs2 - SCANWIND; i < end; i++)
|
|
{
|
|
if (i == bestOffs2) continue; // this offset already calculated, thus skip
|
|
|
|
// Calculates correlation value for the mixing position corresponding
|
|
// to 'i'
|
|
corr = (float)calcCrossCorr(refPos + channels*i, pMidBuffer, norm);
|
|
// heuristic rule to slightly favour values close to mid of the range
|
|
float tmp = (float)(2 * i - seekLength - 1) / (float)seekLength;
|
|
corr = ((corr + 0.1f) * (1.0f - 0.25f * tmp * tmp));
|
|
|
|
// Checks for the highest correlation value
|
|
if (corr > bestCorr)
|
|
{
|
|
bestCorr = corr;
|
|
bestOffs = i;
|
|
}
|
|
}
|
|
|
|
// clear cross correlation routine state if necessary (is so e.g. in MMX routines).
|
|
clearCrossCorrState();
|
|
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
adaptNormalizer();
|
|
#endif
|
|
|
|
return bestOffs;
|
|
}
|
|
|
|
|
|
|
|
|
|
/// For integer algorithm: adapt normalization factor divider with music so that
|
|
/// it'll not be pessimistically restrictive that can degrade quality on quieter sections
|
|
/// yet won't cause integer overflows either
|
|
void TDStretch::adaptNormalizer()
|
|
{
|
|
// Do not adapt normalizer over too silent sequences to avoid averaging filter depleting to
|
|
// too low values during pauses in music
|
|
if ((maxnorm > 1000) || (maxnormf > 40000000))
|
|
{
|
|
//norm averaging filter
|
|
maxnormf = 0.9f * maxnormf + 0.1f * (float)maxnorm;
|
|
|
|
if ((maxnorm > 800000000) && (overlapDividerBitsNorm < 16))
|
|
{
|
|
// large values, so increase divider
|
|
overlapDividerBitsNorm++;
|
|
if (maxnorm > 1600000000) overlapDividerBitsNorm++; // extra large value => extra increase
|
|
}
|
|
else if ((maxnormf < 1000000) && (overlapDividerBitsNorm > 0))
|
|
{
|
|
// extra small values, decrease divider
|
|
overlapDividerBitsNorm--;
|
|
}
|
|
}
|
|
|
|
maxnorm = 0;
|
|
}
|
|
|
|
|
|
/// clear cross correlation routine state if necessary
|
|
void TDStretch::clearCrossCorrState()
|
|
{
|
|
// default implementation is empty.
|
|
}
|
|
|
|
|
|
/// Calculates processing sequence length according to tempo setting
|
|
void TDStretch::calcSeqParameters()
|
|
{
|
|
// Adjust tempo param according to tempo, so that variating processing sequence length is used
|
|
// at various tempo settings, between the given low...top limits
|
|
#define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%)
|
|
#define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%)
|
|
|
|
// sequence-ms setting values at above low & top tempo
|
|
#define AUTOSEQ_AT_MIN 90.0
|
|
#define AUTOSEQ_AT_MAX 40.0
|
|
#define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
|
|
#define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
|
|
|
|
// seek-window-ms setting values at above low & top tempoq
|
|
#define AUTOSEEK_AT_MIN 20.0
|
|
#define AUTOSEEK_AT_MAX 15.0
|
|
#define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
|
|
#define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
|
|
|
|
#define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
|
|
|
|
double seq, seek;
|
|
|
|
if (bAutoSeqSetting)
|
|
{
|
|
seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
|
|
seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
|
|
sequenceMs = (int)(seq + 0.5);
|
|
}
|
|
|
|
if (bAutoSeekSetting)
|
|
{
|
|
seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
|
|
seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
|
|
seekWindowMs = (int)(seek + 0.5);
|
|
}
|
|
|
|
// Update seek window lengths
|
|
seekWindowLength = (sampleRate * sequenceMs) / 1000;
|
|
if (seekWindowLength < 2 * overlapLength)
|
|
{
|
|
seekWindowLength = 2 * overlapLength;
|
|
}
|
|
seekLength = (sampleRate * seekWindowMs) / 1000;
|
|
}
|
|
|
|
|
|
|
|
// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
|
|
// tempo, larger faster tempo.
|
|
void TDStretch::setTempo(double newTempo)
|
|
{
|
|
int intskip;
|
|
|
|
tempo = newTempo;
|
|
|
|
// Calculate new sequence duration
|
|
calcSeqParameters();
|
|
|
|
// Calculate ideal skip length (according to tempo value)
|
|
nominalSkip = tempo * (seekWindowLength - overlapLength);
|
|
intskip = (int)(nominalSkip + 0.5);
|
|
|
|
// Calculate how many samples are needed in the 'inputBuffer' to
|
|
// process another batch of samples
|
|
//sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
|
|
sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
|
|
}
|
|
|
|
|
|
|
|
// Sets the number of channels, 1 = mono, 2 = stereo
|
|
void TDStretch::setChannels(int numChannels)
|
|
{
|
|
if (!verifyNumberOfChannels(numChannels) ||
|
|
(channels == numChannels)) return;
|
|
|
|
channels = numChannels;
|
|
inputBuffer.setChannels(channels);
|
|
outputBuffer.setChannels(channels);
|
|
|
|
// re-init overlap/buffer
|
|
overlapLength=0;
|
|
setParameters(sampleRate);
|
|
}
|
|
|
|
|
|
// nominal tempo, no need for processing, just pass the samples through
|
|
// to outputBuffer
|
|
/*
|
|
void TDStretch::processNominalTempo()
|
|
{
|
|
assert(tempo == 1.0f);
|
|
|
|
if (bMidBufferDirty)
|
|
{
|
|
// If there are samples in pMidBuffer waiting for overlapping,
|
|
// do a single sliding overlapping with them in order to prevent a
|
|
// clicking distortion in the output sound
|
|
if (inputBuffer.numSamples() < overlapLength)
|
|
{
|
|
// wait until we've got overlapLength input samples
|
|
return;
|
|
}
|
|
// Mix the samples in the beginning of 'inputBuffer' with the
|
|
// samples in 'midBuffer' using sliding overlapping
|
|
overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
|
|
outputBuffer.putSamples(overlapLength);
|
|
inputBuffer.receiveSamples(overlapLength);
|
|
clearMidBuffer();
|
|
// now we've caught the nominal sample flow and may switch to
|
|
// bypass mode
|
|
}
|
|
|
|
// Simply bypass samples from input to output
|
|
outputBuffer.moveSamples(inputBuffer);
|
|
}
|
|
*/
|
|
|
|
|
|
// Processes as many processing frames of the samples 'inputBuffer', store
|
|
// the result into 'outputBuffer'
|
|
void TDStretch::processSamples()
|
|
{
|
|
int ovlSkip;
|
|
int offset = 0;
|
|
int temp;
|
|
|
|
/* Removed this small optimization - can introduce a click to sound when tempo setting
|
|
crosses the nominal value
|
|
if (tempo == 1.0f)
|
|
{
|
|
// tempo not changed from the original, so bypass the processing
|
|
processNominalTempo();
|
|
return;
|
|
}
|
|
*/
|
|
|
|
// Process samples as long as there are enough samples in 'inputBuffer'
|
|
// to form a processing frame.
|
|
while ((int)inputBuffer.numSamples() >= sampleReq)
|
|
{
|
|
if (isBeginning == false)
|
|
{
|
|
// apart from the very beginning of the track,
|
|
// scan for the best overlapping position & do overlap-add
|
|
offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
|
|
|
|
// Mix the samples in the 'inputBuffer' at position of 'offset' with the
|
|
// samples in 'midBuffer' using sliding overlapping
|
|
// ... first partially overlap with the end of the previous sequence
|
|
// (that's in 'midBuffer')
|
|
overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
|
|
outputBuffer.putSamples((uint)overlapLength);
|
|
offset += overlapLength;
|
|
}
|
|
else
|
|
{
|
|
// Adjust processing offset at beginning of track by not perform initial overlapping
|
|
// and compensating that in the 'input buffer skip' calculation
|
|
isBeginning = false;
|
|
int skip = (int)(tempo * overlapLength + 0.5 * seekLength + 0.5);
|
|
|
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
|
// in SIMD mode, round the skip amount to value corresponding to aligned memory address
|
|
if (channels == 1)
|
|
{
|
|
skip &= -4;
|
|
}
|
|
else if (channels == 2)
|
|
{
|
|
skip &= -2;
|
|
}
|
|
#endif
|
|
skipFract -= skip;
|
|
if (skipFract <= -nominalSkip)
|
|
{
|
|
skipFract = -nominalSkip;
|
|
}
|
|
}
|
|
|
|
// ... then copy sequence samples from 'inputBuffer' to output:
|
|
|
|
// crosscheck that we don't have buffer overflow...
|
|
if ((int)inputBuffer.numSamples() < (offset + seekWindowLength - overlapLength))
|
|
{
|
|
continue; // just in case, shouldn't really happen
|
|
}
|
|
|
|
// length of sequence
|
|
temp = (seekWindowLength - 2 * overlapLength);
|
|
outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * offset, (uint)temp);
|
|
|
|
// Copies the end of the current sequence from 'inputBuffer' to
|
|
// 'midBuffer' for being mixed with the beginning of the next
|
|
// processing sequence and so on
|
|
assert((offset + temp + overlapLength) <= (int)inputBuffer.numSamples());
|
|
memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp),
|
|
channels * sizeof(SAMPLETYPE) * overlapLength);
|
|
|
|
// Remove the processed samples from the input buffer. Update
|
|
// the difference between integer & nominal skip step to 'skipFract'
|
|
// in order to prevent the error from accumulating over time.
|
|
skipFract += nominalSkip; // real skip size
|
|
ovlSkip = (int)skipFract; // rounded to integer skip
|
|
skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
|
|
inputBuffer.receiveSamples((uint)ovlSkip);
|
|
}
|
|
}
|
|
|
|
|
|
// Adds 'numsamples' pcs of samples from the 'samples' memory position into
|
|
// the input of the object.
|
|
void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
|
|
{
|
|
// Add the samples into the input buffer
|
|
inputBuffer.putSamples(samples, nSamples);
|
|
// Process the samples in input buffer
|
|
processSamples();
|
|
}
|
|
|
|
|
|
|
|
/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
|
|
void TDStretch::acceptNewOverlapLength(int newOverlapLength)
|
|
{
|
|
int prevOvl;
|
|
|
|
assert(newOverlapLength >= 0);
|
|
prevOvl = overlapLength;
|
|
overlapLength = newOverlapLength;
|
|
|
|
if (overlapLength > prevOvl)
|
|
{
|
|
delete[] pMidBufferUnaligned;
|
|
|
|
pMidBufferUnaligned = new SAMPLETYPE[overlapLength * channels + 16 / sizeof(SAMPLETYPE)];
|
|
// ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency
|
|
pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned);
|
|
|
|
clearMidBuffer();
|
|
}
|
|
}
|
|
|
|
|
|
// Operator 'new' is overloaded so that it automatically creates a suitable instance
|
|
// depending on if we've a MMX/SSE/etc-capable CPU available or not.
|
|
void * TDStretch::operator new(size_t)
|
|
{
|
|
// Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
|
|
ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
|
|
return newInstance();
|
|
}
|
|
|
|
|
|
TDStretch * TDStretch::newInstance()
|
|
{
|
|
uint uExtensions;
|
|
|
|
uExtensions = detectCPUextensions();
|
|
|
|
// Check if MMX/SSE instruction set extensions supported by CPU
|
|
|
|
#ifdef SOUNDTOUCH_ALLOW_MMX
|
|
// MMX routines available only with integer sample types
|
|
if (uExtensions & SUPPORT_MMX)
|
|
{
|
|
return ::new TDStretchMMX;
|
|
}
|
|
else
|
|
#endif // SOUNDTOUCH_ALLOW_MMX
|
|
|
|
|
|
#ifdef SOUNDTOUCH_ALLOW_SSE
|
|
if (uExtensions & SUPPORT_SSE)
|
|
{
|
|
// SSE support
|
|
return ::new TDStretchSSE;
|
|
}
|
|
else
|
|
#endif // SOUNDTOUCH_ALLOW_SSE
|
|
|
|
{
|
|
// ISA optimizations not supported, use plain C version
|
|
return ::new TDStretch;
|
|
}
|
|
}
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Integer arithmetic specific algorithm implementations.
|
|
//
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef SOUNDTOUCH_INTEGER_SAMPLES
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
|
|
// version of the routine.
|
|
void TDStretch::overlapStereo(short *poutput, const short *input) const
|
|
{
|
|
int i;
|
|
short temp;
|
|
int cnt2;
|
|
|
|
for (i = 0; i < overlapLength ; i ++)
|
|
{
|
|
temp = (short)(overlapLength - i);
|
|
cnt2 = 2 * i;
|
|
poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
|
|
poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
|
|
}
|
|
}
|
|
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Multi'
|
|
// version of the routine.
|
|
void TDStretch::overlapMulti(short *poutput, const short *input) const
|
|
{
|
|
short m1;
|
|
int i = 0;
|
|
|
|
for (m1 = 0; m1 < overlapLength; m1 ++)
|
|
{
|
|
short m2 = (short)(overlapLength - m1);
|
|
for (int c = 0; c < channels; c ++)
|
|
{
|
|
poutput[i] = (input[i] * m1 + pMidBuffer[i] * m2) / overlapLength;
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Calculates the x having the closest 2^x value for the given value
|
|
static int _getClosest2Power(double value)
|
|
{
|
|
return (int)(log(value) / log(2.0) + 0.5);
|
|
}
|
|
|
|
|
|
/// Calculates overlap period length in samples.
|
|
/// Integer version rounds overlap length to closest power of 2
|
|
/// for a divide scaling operation.
|
|
void TDStretch::calculateOverlapLength(int aoverlapMs)
|
|
{
|
|
int newOvl;
|
|
|
|
assert(aoverlapMs >= 0);
|
|
|
|
// calculate overlap length so that it's power of 2 - thus it's easy to do
|
|
// integer division by right-shifting. Term "-1" at end is to account for
|
|
// the extra most significatnt bit left unused in result by signed multiplication
|
|
overlapDividerBitsPure = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
|
|
if (overlapDividerBitsPure > 9) overlapDividerBitsPure = 9;
|
|
if (overlapDividerBitsPure < 3) overlapDividerBitsPure = 3;
|
|
newOvl = (int)pow(2.0, (int)overlapDividerBitsPure + 1); // +1 => account for -1 above
|
|
|
|
acceptNewOverlapLength(newOvl);
|
|
|
|
overlapDividerBitsNorm = overlapDividerBitsPure;
|
|
|
|
// calculate sloping divider so that crosscorrelation operation won't
|
|
// overflow 32-bit register. Max. sum of the crosscorrelation sum without
|
|
// divider would be 2^30*(N^3-N)/3, where N = overlap length
|
|
slopingDivider = (newOvl * newOvl - 1) / 3;
|
|
}
|
|
|
|
|
|
double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare, double &norm)
|
|
{
|
|
long corr;
|
|
unsigned long lnorm;
|
|
int i;
|
|
|
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
|
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
|
#endif
|
|
|
|
// hint compiler autovectorization that loop length is divisible by 8
|
|
int ilength = (channels * overlapLength) & -8;
|
|
|
|
corr = lnorm = 0;
|
|
// Same routine for stereo and mono
|
|
for (i = 0; i < ilength; i += 2)
|
|
{
|
|
corr += (mixingPos[i] * compare[i] +
|
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
|
lnorm += (mixingPos[i] * mixingPos[i] +
|
|
mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBitsNorm;
|
|
// do intermediate scalings to avoid integer overflow
|
|
}
|
|
|
|
if (lnorm > maxnorm)
|
|
{
|
|
// modify 'maxnorm' inside critical section to avoid multi-access conflict if in OpenMP mode
|
|
#pragma omp critical
|
|
if (lnorm > maxnorm)
|
|
{
|
|
maxnorm = lnorm;
|
|
}
|
|
}
|
|
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
|
// done using floating point operation
|
|
norm = (double)lnorm;
|
|
return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
|
|
}
|
|
|
|
|
|
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
|
double TDStretch::calcCrossCorrAccumulate(const short *mixingPos, const short *compare, double &norm)
|
|
{
|
|
long corr;
|
|
long lnorm;
|
|
int i;
|
|
|
|
// hint compiler autovectorization that loop length is divisible by 8
|
|
int ilength = (channels * overlapLength) & -8;
|
|
|
|
// cancel first normalizer tap from previous round
|
|
lnorm = 0;
|
|
for (i = 1; i <= channels; i ++)
|
|
{
|
|
lnorm -= (mixingPos[-i] * mixingPos[-i]) >> overlapDividerBitsNorm;
|
|
}
|
|
|
|
corr = 0;
|
|
// Same routine for stereo and mono.
|
|
for (i = 0; i < ilength; i += 2)
|
|
{
|
|
corr += (mixingPos[i] * compare[i] +
|
|
mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBitsNorm;
|
|
}
|
|
|
|
// update normalizer with last samples of this round
|
|
for (int j = 0; j < channels; j ++)
|
|
{
|
|
i --;
|
|
lnorm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBitsNorm;
|
|
}
|
|
|
|
norm += (double)lnorm;
|
|
if (norm > maxnorm)
|
|
{
|
|
maxnorm = (unsigned long)norm;
|
|
}
|
|
|
|
// Normalize result by dividing by sqrt(norm) - this step is easiest
|
|
// done using floating point operation
|
|
return (double)corr / sqrt((norm < 1e-9) ? 1.0 : norm);
|
|
}
|
|
|
|
#endif // SOUNDTOUCH_INTEGER_SAMPLES
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Floating point arithmetic specific algorithm implementations.
|
|
//
|
|
|
|
#ifdef SOUNDTOUCH_FLOAT_SAMPLES
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'pInput'
|
|
void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
|
|
{
|
|
int i;
|
|
float fScale;
|
|
float f1;
|
|
float f2;
|
|
|
|
fScale = 1.0f / (float)overlapLength;
|
|
|
|
f1 = 0;
|
|
f2 = 1.0f;
|
|
|
|
for (i = 0; i < 2 * (int)overlapLength ; i += 2)
|
|
{
|
|
pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2;
|
|
pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2;
|
|
|
|
f1 += fScale;
|
|
f2 -= fScale;
|
|
}
|
|
}
|
|
|
|
|
|
// Overlaps samples in 'midBuffer' with the samples in 'input'.
|
|
void TDStretch::overlapMulti(float *pOutput, const float *pInput) const
|
|
{
|
|
int i;
|
|
float fScale;
|
|
float f1;
|
|
float f2;
|
|
|
|
fScale = 1.0f / (float)overlapLength;
|
|
|
|
f1 = 0;
|
|
f2 = 1.0f;
|
|
|
|
i=0;
|
|
for (int i2 = 0; i2 < overlapLength; i2 ++)
|
|
{
|
|
// note: Could optimize this slightly by taking into account that always channels > 2
|
|
for (int c = 0; c < channels; c ++)
|
|
{
|
|
pOutput[i] = pInput[i] * f1 + pMidBuffer[i] * f2;
|
|
i++;
|
|
}
|
|
f1 += fScale;
|
|
f2 -= fScale;
|
|
}
|
|
}
|
|
|
|
|
|
/// Calculates overlapInMsec period length in samples.
|
|
void TDStretch::calculateOverlapLength(int overlapInMsec)
|
|
{
|
|
int newOvl;
|
|
|
|
assert(overlapInMsec >= 0);
|
|
newOvl = (sampleRate * overlapInMsec) / 1000;
|
|
if (newOvl < 16) newOvl = 16;
|
|
|
|
// must be divisible by 8
|
|
newOvl -= newOvl % 8;
|
|
|
|
acceptNewOverlapLength(newOvl);
|
|
}
|
|
|
|
|
|
/// Calculate cross-correlation
|
|
double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare, double &anorm)
|
|
{
|
|
float corr;
|
|
float norm;
|
|
int i;
|
|
|
|
#ifdef ST_SIMD_AVOID_UNALIGNED
|
|
// in SIMD mode skip 'mixingPos' positions that aren't aligned to 16-byte boundary
|
|
if (((ulongptr)mixingPos) & 15) return -1e50;
|
|
#endif
|
|
|
|
// hint compiler autovectorization that loop length is divisible by 8
|
|
int ilength = (channels * overlapLength) & -8;
|
|
|
|
corr = norm = 0;
|
|
// Same routine for stereo and mono
|
|
for (i = 0; i < ilength; i ++)
|
|
{
|
|
corr += mixingPos[i] * compare[i];
|
|
norm += mixingPos[i] * mixingPos[i];
|
|
}
|
|
|
|
anorm = norm;
|
|
return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
|
|
}
|
|
|
|
|
|
/// Update cross-correlation by accumulating "norm" coefficient by previously calculated value
|
|
double TDStretch::calcCrossCorrAccumulate(const float *mixingPos, const float *compare, double &norm)
|
|
{
|
|
float corr;
|
|
int i;
|
|
|
|
corr = 0;
|
|
|
|
// cancel first normalizer tap from previous round
|
|
for (i = 1; i <= channels; i ++)
|
|
{
|
|
norm -= mixingPos[-i] * mixingPos[-i];
|
|
}
|
|
|
|
// hint compiler autovectorization that loop length is divisible by 8
|
|
int ilength = (channels * overlapLength) & -8;
|
|
|
|
// Same routine for stereo and mono
|
|
for (i = 0; i < ilength; i ++)
|
|
{
|
|
corr += mixingPos[i] * compare[i];
|
|
}
|
|
|
|
// update normalizer with last samples of this round
|
|
for (int j = 0; j < channels; j ++)
|
|
{
|
|
i --;
|
|
norm += mixingPos[i] * mixingPos[i];
|
|
}
|
|
|
|
return corr / sqrt((norm < 1e-9 ? 1.0 : norm));
|
|
}
|
|
|
|
|
|
#endif // SOUNDTOUCH_FLOAT_SAMPLES
|