260 lines
8.0 KiB
TypeScript
260 lines
8.0 KiB
TypeScript
/**
|
|
* Copyright 2019 Google LLC
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* https://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations under
|
|
* the License.
|
|
*/
|
|
import * as DCT from 'dct';
|
|
import * as KissFFT from 'kissfft-js';
|
|
|
|
const SR = 16000;
|
|
const hannWindowMap: {[key: number]: number[]} = {};
|
|
let context: AudioContext;
|
|
|
|
export class AudioUtils {
|
|
startIndex = 0;
|
|
endIndex = 0;
|
|
bandMapper: number[] = [];
|
|
context: AudioContext;
|
|
|
|
constructor() {}
|
|
|
|
/**
|
|
* Gets periodic hann window
|
|
* @param windowLength size of the hann window
|
|
* @returns periodic hann map
|
|
*/
|
|
GetPeriodicHann(windowLength: number): number[] {
|
|
if (!hannWindowMap[windowLength]) {
|
|
const window = [];
|
|
// Some platforms don't have M_PI, so define a local constant here.
|
|
for (let i = 0; i < windowLength; ++i) {
|
|
window[i] = 0.5 - 0.5 * Math.cos((2 * Math.PI * i) / windowLength);
|
|
}
|
|
hannWindowMap[windowLength] = window;
|
|
}
|
|
return hannWindowMap[windowLength];
|
|
}
|
|
|
|
/**
|
|
* Calculates the FFT for an array buffer. Output is an array.
|
|
*/
|
|
fft(y: Float32Array) {
|
|
const window = this.GetPeriodicHann(y.length);
|
|
y = y.map((v, index) => v * window[index]);
|
|
const fftSize = nextPowerOfTwo(y.length);
|
|
for (let i = y.length; i < fftSize; i++) {
|
|
y[i] = 0;
|
|
}
|
|
const fftr = new KissFFT.FFTR(fftSize);
|
|
const transform = fftr.forward(y);
|
|
fftr.dispose();
|
|
transform[fftSize] = transform[1];
|
|
transform[fftSize + 1] = 0;
|
|
transform[1] = 0;
|
|
return transform;
|
|
}
|
|
|
|
/**
|
|
* Calculate the DCT encoding for spectrogram.
|
|
* @param y spectrogram data
|
|
* @returns DCT encoded
|
|
*/
|
|
dct(y: Float32Array): Float32Array {
|
|
const scale = Math.sqrt(2.0 / y.length);
|
|
return DCT(y, scale);
|
|
}
|
|
|
|
/**
|
|
* Given an interlaced complex array (y_i is real, y_(i+1) is imaginary),
|
|
* calculates the energies. Output is half the size.
|
|
*/
|
|
fftEnergies(y: Float32Array): Float32Array {
|
|
const out = new Float32Array(y.length / 2);
|
|
for (let i = 0; i < y.length / 2; i++) {
|
|
out[i] = y[i * 2] * y[i * 2] + y[i * 2 + 1] * y[i * 2 + 1];
|
|
}
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Creates mel filterbank map for the give melCount size
|
|
* @param fftSize FFT frequence count
|
|
* @param [melCount] Mel filterbank count
|
|
* @param [lowHz] low bank filter frequence
|
|
* @param [highHz] high bank filter frequence
|
|
* @param [sr] sampling rate
|
|
* @returns mel filterbank map
|
|
*/
|
|
createMelFilterbank(
|
|
fftSize: number, melCount = 40, lowHz = 20, highHz = 4000,
|
|
sr = SR): Float32Array {
|
|
const lowMel = this.hzToMel(lowHz);
|
|
const highMel = this.hzToMel(highHz);
|
|
|
|
// Construct linearly spaced array of melCount intervals, between lowMel and
|
|
// highMel.
|
|
const mels = [];
|
|
|
|
const melSpan = highMel - lowMel;
|
|
const melSpacing = melSpan / (melCount + 1);
|
|
for (let i = 0; i < melCount + 1; ++i) {
|
|
mels[i] = lowMel + (melSpacing * (i + 1));
|
|
}
|
|
|
|
// Always exclude DC; emulate HTK.
|
|
const hzPerSbin = 0.5 * sr / (fftSize - 1);
|
|
this.startIndex = Math.floor(1.5 + (lowHz / hzPerSbin));
|
|
this.endIndex = Math.ceil(highHz / hzPerSbin);
|
|
|
|
// Maps the input spectrum bin indices to filter bank channels/indices. For
|
|
// each FFT bin, band_mapper tells us which channel this bin contributes to
|
|
// on the right side of the triangle. Thus this bin also contributes to the
|
|
// left side of the next channel's triangle response.
|
|
this.bandMapper = [];
|
|
let channel = 0;
|
|
for (let i = 0; i < fftSize; ++i) {
|
|
const melf = this.hzToMel(i * hzPerSbin);
|
|
if ((i < this.startIndex) || (i > this.endIndex)) {
|
|
this.bandMapper[i] = -2; // Indicate an unused Fourier coefficient.
|
|
} else {
|
|
while ((mels[channel] < melf) && (channel < melCount)) {
|
|
++channel;
|
|
}
|
|
this.bandMapper[i] = channel - 1; // Can be == -1
|
|
}
|
|
}
|
|
|
|
// Create the weighting functions to taper the band edges. The contribution
|
|
// of any one FFT bin is based on its distance along the continuum between
|
|
// two mel-channel center frequencies. This bin contributes weights_[i] to
|
|
// the current channel and 1-weights_[i] to the next channel.
|
|
const weights = new Float32Array(fftSize);
|
|
for (let i = 0; i < fftSize; ++i) {
|
|
channel = this.bandMapper[i];
|
|
if ((i < this.startIndex) || (i > this.endIndex)) {
|
|
weights[i] = 0.0;
|
|
} else {
|
|
if (channel >= 0) {
|
|
weights[i] = (mels[channel + 1] - this.hzToMel(i * hzPerSbin)) /
|
|
(mels[channel + 1] - mels[channel]);
|
|
} else {
|
|
weights[i] =
|
|
(mels[0] - this.hzToMel(i * hzPerSbin)) / (mels[0] - lowMel);
|
|
}
|
|
}
|
|
}
|
|
|
|
return weights;
|
|
}
|
|
|
|
/**
|
|
* Given an array of FFT magnitudes, apply a filterbank. Output should be an
|
|
* array with size |filterbank|.
|
|
*/
|
|
applyFilterbank(
|
|
fftEnergies: Float32Array, filterbank: Float32Array,
|
|
melCount = 40): Float32Array {
|
|
const out = new Float32Array(melCount);
|
|
for (let i = this.startIndex; i <= this.endIndex;
|
|
i++) { // For each FFT bin
|
|
const specVal = Math.sqrt(fftEnergies[i]);
|
|
const weighted = specVal * filterbank[i];
|
|
let channel = this.bandMapper[i];
|
|
if (channel >= 0) {
|
|
out[channel] += weighted; // Right side of triangle, downward slope
|
|
}
|
|
channel++;
|
|
if (channel < melCount) {
|
|
out[channel] += (specVal - weighted); // Left side of triangle
|
|
}
|
|
}
|
|
for (let i = 0; i < out.length; ++i) {
|
|
let val = out[i];
|
|
if (val < 1e-12) {
|
|
val = 1e-12;
|
|
}
|
|
out[i] = Math.log(val);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
private hzToMel(hz: number) {
|
|
return 1127.0 * Math.log(1.0 + hz / 700.0);
|
|
}
|
|
|
|
/**
|
|
* Cepstrums from the energy spectrumgram
|
|
* @param melEnergies array of melbank energies
|
|
* @returns
|
|
*/
|
|
cepstrumFromEnergySpectrum(melEnergies: Float32Array) {
|
|
return this.dct(melEnergies);
|
|
}
|
|
|
|
/**
|
|
* Playbacks audio data from array buffer using the given sample rate.
|
|
* @param buffer audio data
|
|
* @param [sampleRate] playback sample rate
|
|
*/
|
|
playbackArrayBuffer(buffer: Float32Array, sampleRate?: number) {
|
|
if (!context) {
|
|
context = new AudioContext();
|
|
}
|
|
if (!sampleRate) {
|
|
sampleRate = this.context.sampleRate;
|
|
}
|
|
const audioBuffer = context.createBuffer(1, buffer.length, sampleRate);
|
|
const audioBufferData = audioBuffer.getChannelData(0);
|
|
audioBufferData.set(buffer);
|
|
|
|
const source = context.createBufferSource();
|
|
source.buffer = audioBuffer;
|
|
source.connect(context.destination);
|
|
source.start();
|
|
}
|
|
|
|
/**
|
|
* Resamples web audio data by the target sample rate.
|
|
* @param audioBuffer Audio data
|
|
* @param targetSr Target sample rate
|
|
* @returns resampled web audio data
|
|
*/
|
|
resampleWebAudio(audioBuffer: AudioBuffer, targetSr: number):
|
|
Promise<AudioBuffer> {
|
|
const sourceSr = audioBuffer.sampleRate;
|
|
const lengthRes = audioBuffer.length * targetSr / sourceSr;
|
|
const offlineCtx = new OfflineAudioContext(1, lengthRes, targetSr);
|
|
|
|
return new Promise((resolve, reject) => {
|
|
const bufferSource = offlineCtx.createBufferSource();
|
|
bufferSource.buffer = audioBuffer;
|
|
offlineCtx.oncomplete = (event) => {
|
|
resolve(event.renderedBuffer);
|
|
};
|
|
bufferSource.connect(offlineCtx.destination);
|
|
bufferSource.start();
|
|
offlineCtx.startRendering();
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Next power of two value for the given number.
|
|
* @param value
|
|
* @returns
|
|
*/
|
|
export function nextPowerOfTwo(value: number) {
|
|
const exponent = Math.ceil(Math.log2(value));
|
|
return 1 << exponent;
|
|
}
|