I have a method generating waveform data (using NAudio SampleProviders) from the audio files which was working properly till now. But today we noticed this method is generating wrong results for some specific audios. After examining I realized that the encoding of the problematic audio files was MuLaw which I didn't handle. Which sample provider should I use to properly obtain the sample values? (Btw: My project is .NET Core, so I can only use NAudio.Core)
Here is the complete method:
public async Task<string> GenerateWaveformAsync(AudioFormats format, byte[] content, int duration)
{
int samplesPerPixel = GetSamplesPerPixelFromConfig(duration);
var resultJson = string.Empty;
var waveformResult = new WaveformResult();
var waveformPointCount = 0L;
var waveformPointsAsShort = new List<short>();
var waveformPointsAsFloat = new List<float>();
try
{
_logger.LogInformation($"Waveform generation has been started.");
using (var memoryStream = new MemoryStream(content))
using (WaveStream waveReader = GetReaderStream(memoryStream, format))
{
ISampleProvider provider;
switch (waveReader.WaveFormat.Encoding)
{
case WaveFormatEncoding.Pcm:
provider = new Pcm16BitToSampleProvider(waveReader);
break;
case WaveFormatEncoding.IeeeFloat:
provider = new WaveToSampleProvider(waveReader);
break;
case WaveFormatEncoding.MuLaw:
provider = ???;
break;
default:
provider = new Pcm16BitToSampleProvider(waveReader);
break;
}
waveformResult.Bits = waveReader.WaveFormat.BitsPerSample;
waveformResult.Channels = waveReader.WaveFormat.Channels;
waveformResult.SampleRate = waveReader.WaveFormat.SampleRate;
waveformResult.SamplesPerPixel = samplesPerPixel;
var leftChannelSamples = new List<float>();
var rightChannelSamples = new List<float>();
var buffer = new float[waveReader.WaveFormat.SampleRate];
int byteCountRead;
do
{
byteCountRead = provider.Read(buffer, 0, buffer.Length);
for (var n = 0; n < byteCountRead; n++)
{
if (n % 2 == 0)
{
leftChannelSamples.Add(buffer[n]);
}
else
{
rightChannelSamples.Add(buffer[n]);
}
}
}
while (byteCountRead > 0);
var waveformPointCountDouble = (double)leftChannelSamples.Count / (double)samplesPerPixel;
waveformPointCount = (long)Math.Ceiling(waveformPointCountDouble);
var leftChannelPeaks = new List<PeakInfo>();
var rightChannelPeaks = new List<PeakInfo>();
for (var i = 0; i < waveformPointCount; i++)
{
var currentFrameLeftChannel = leftChannelSamples
.Skip(i * samplesPerPixel)
.Take(samplesPerPixel)
.ToList();
var currentFrameRightChannel = rightChannelSamples
.Skip(i * samplesPerPixel)
.Take(samplesPerPixel)
.ToList();
leftChannelPeaks.Add(new PeakInfo(currentFrameLeftChannel.Min(), currentFrameLeftChannel.Max()));
rightChannelPeaks.Add(new PeakInfo(currentFrameRightChannel.Min(), currentFrameRightChannel.Max()));
}
for (var i = 0; i < leftChannelPeaks.Count; i++)
{
waveformPointsAsFloat.Add(leftChannelPeaks[i].Min);
waveformPointsAsFloat.Add(leftChannelPeaks[i].Max);
waveformPointsAsFloat.Add(rightChannelPeaks[i].Min);
waveformPointsAsFloat.Add(rightChannelPeaks[i].Max);
}
waveformPointsAsFloat
.ForEach(f => waveformPointsAsShort.Add((short)Math.Round(f * short.MaxValue, 0)));
}
waveformResult.Length = waveformPointCount;
waveformResult.Data = waveformPointsAsShort;
var contractResolver = new DefaultContractResolver
{
NamingStrategy = new SnakeCaseNamingStrategy(),
};
resultJson = JsonConvert.SerializeObject(waveformResult, Formatting.Indented,
new JsonSerializerSettings()
{
ReferenceLoopHandling = ReferenceLoopHandling.Ignore,
ContractResolver = contractResolver,
});
_logger.LogInformation($"Waveform has been generated successfully.");
}
catch (Exception ex)
{
_logger.LogError($"An error has occurred while generating waveform. ({ex.Message})");
}
return resultJson;
}
private static WaveStream GetReaderStream(MemoryStream memoryStream, AudioFormats audioFormat)
{
switch (audioFormat)
{
case AudioFormats.Wav:
return new WaveFileReader(memoryStream);
case AudioFormats.Mp3:
return new Mp3FileReader(memoryStream);
default:
throw new UnsupportedAudioFormatForWaveformGenerationException();
}
}