I would like to detect features in audio files such as note positions and pitch. Essentia.js is a library which can do this. My code is:
const Essentia = require('essentia.js');
const fs = require('fs');
const glob = require('glob');
const path = require('path');
const wav = require('node-wav');
const essentia = new Essentia.Essentia(Essentia.EssentiaWASM);
const audioDir = path.join('test', 'audio', '**', '*.wav');
const audioPaths = glob.globSync(audioDir);
const results = [];
// Loop through each file in the folder and detect audio features.
audioPaths.forEach((audioPath) => {
console.log(`Analyzing ${audioPath}`);
const fileBuffer = fs.readFileSync(audioPath);
const audioBuffer = wav.decode(fileBuffer);
const audioVector = essentia.arrayToVector(audioBuffer.channelData[0]);
const melodia = essentia.PredominantPitchMelodia(audioVector).pitch;
const segments = essentia.PitchContourSegmentation(melodia, audioVector);
results.push({
audioPath,
durations: essentia.vectorToArray(segments.duration),
onsets: essentia.vectorToArray(segments.onset),
pitches: essentia.vectorToArray(segments.MIDIpitch)
});
});
// Output attributes side-by-side for comparison.
results.forEach(result => console.log('durations', result.audioPath, result.durations));
results.forEach(result => console.log('onsets', result.audioPath, result.onsets));
results.forEach(result => console.log('pitches', result.audioPath, result.pitches));
I'm running this code using these test files: https://github.com/kmturley/sfz-tools-core/tree/main/test/audio
However I am getting inconsistent results, different numbers of notes detected, even though the audio files contain the same number of notes:
Durations
durations test/audio/velocity-sin.wav Float32Array(10) [
0.5224489569664001,
0.5369614362716675,
0.5369614362716675,
0.5369614362716675,
0.5369614362716675,
0.10158730298280716,
0.1160997748374939,
0.14222222566604614,
0.09868481010198593,
0.10739228874444962
]
durations test/audio/velocity-saw.wav Float32Array(7) [
0.5195465087890625,
0.528253972530365,
0.528253972530365,
0.528253972530365,
0.528253972530365,
0.12480725347995758,
0.15673469007015228
]
durations test/audio/velocity-piano.wav Float32Array(3) [
2.983764171600342,
2.002721071243286,
3.0040817260742188
]
Onsets
onsets test/audio/scale-square.wav Float32Array(12) [
0,
0.9839455485343933,
1.9824036359786987,
2.9808616638183594,
3.9851248264312744,
4.983582973480225,
5.604716777801514,
5.979138374328613,
6.65541934967041,
6.759909152984619,
6.977596282958984,
7.7380499839782715
]
onsets test/audio/scale-sin.wav Float32Array(11) [
0,
0.9839455485343933,
1.9824036359786987,
2.977959156036377,
3.982222318649292,
4.980680465698242,
5.784671306610107,
5.979138374328613,
6.977596282958984,
7.5290703773498535,
7.679999828338623
]
onsets test/audio/scale-saw.wav Float32Array(10) [
0,
0.9868480563163757,
1.9853061437606812,
2.983764171600342,
3.558458089828491,
3.689070224761963,
3.9851248264312744,
4.97777795791626,
5.982040882110596,
6.980498790740967
]
Pitches:
pitches test/audio/scale2-sin.wav Float32Array(21) [
60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 86, 72,
92, 90, 91, 88, 93, 93, 90
]
pitches test/audio/scale2-saw.wav Float32Array(19) [
60, 61, 62, 63, 64, 65, 66,
67, 68, 69, 70, 71, 72, 92,
91, 89, 92, 88, 92
]
pitches test/audio/scale2-piano.wav Float32Array(11) [
60, 61, 62, 63, 64,
65, 66, 67, 68, 69,
70
]
What is going on? Is this a bug or an issue with my implementation?
This demo has a more effective solution:
It uses two additional modules:
A simple demo:
I also had to modify the modules last lines to be compatible with CommonJS modules:
./lib/onsets.module.js
./lib/polarFFT.module.js
When running this I get more consistent results: