I'm struggling to figure out why my code for live audio feature extraction results in abnormally low amplitude values with little to no variation between frames.
I tried adjusting sampling rate and frame size, which resulted in slightly different amplitude values but still with little to no variation (eg. 0.0032 dB instead of 0.0028 dB).
I tried a different microphone input, and tested both microphones, they seem to function normally.
I tried different approaches to calculate the amplitude (either with numpy RMS, or with librosa library) but that again results in a slightly different amplitude value but still very low and with no variation.
This is my code:
# Parameters
sample_rate = 16000
frame_size = 512
# Initialize PyAudio
p = pyaudio.PyAudio()
# Open a stream for audio input
stream = p.open(format=pyaudio.paInt16,
channels=1, # Mono
rate=sample_rate,
input=True,
frames_per_buffer=frame_size)
# Create an empty list to store data rows
data = []
rms_amplitudes_over_time = []
frame_count = 0
try:
while True:
# Read audio data from the microphone
audio_data = np.frombuffer(stream.read(frame_size), dtype=np.int16)
# Convert audio data to floating-point format
audio_data = audio_data.astype(np.float32) / 32767.0
# Calculate F0 (Fundamental Frequency)
f0 = np.argmax(np.abs(scipy.signal.find_peaks(audio_data)[0]))
# Calculate RMS Amplitude
rms_amplitude = np.sqrt(np.mean(librosa.feature.rms(y=audio_data)))
rms_amplitudes_over_time.append(rms_amplitude)
# Calculate Shimmer
peaks, _ = scipy.signal.find_peaks(audio_data)
amplitude_peaks = np.max(audio_data[peaks]) - np.min(audio_data[peaks])
shimmer = (amplitude_peaks / len(peaks)) * 100 # Calculate percentage shimmer
# Calculate Jitter
periods = np.diff(peaks) / sample_rate
jitter = np.mean(np.abs(periods - np.mean(periods))) * 1000 # Calculate jitter in milliseconds
# Extract MFCC features from the audio data
nfft = 2048 # You can adjust this value as needed
mfcc_features = mfcc(audio_data, sample_rate, numcep=15, nfft=nfft)
frame_count += 1
row = [frame_count, f0, rms_amplitude, shimmer, jitter] + [round(value, 2) for value in mfcc_features[0]]
# Append the row to the data list
data.append(row)
# Print the current frame's data
current_frame_data = [row]
current_df = pd.DataFrame(current_frame_data, columns=["Frame", "F0 (Hz)", "RMS Amplitude", "Shimmer (%)", "Jitter (ms)"] + [f"MFCC {i}" for i in range(15)])
print(current_df)
# Sleep briefly to control the display rate (adjust as needed)
time.sleep(1.0)
except KeyboardInterrupt:
# Press Ctrl+C to stop the script
pass
# Create a DataFrame from the collected data
columns = ["Frame", "F0 (Hz)", "RMS Amplitude (dB)", "Shimmer (%)", "Jitter (ms)"] + [f"MFCC {i}" for i in range(15)]
df = pd.DataFrame(data, columns=columns)
# Print the final DataFrame
print(df)
# Visualize RMS amplitude over time
plt.plot(rms_amplitudes_over_time)
plt.xlabel("Frame")
plt.ylabel("RMS Amplitude")
plt.title("RMS Amplitude Over Time")
plt.show()
# Cleanup and close the audio input stream
stream.stop_stream()
stream.close()
p.terminate()