Can't create Audio frame with from nd_array

181 views Asked by At

I tried to use pyav to create an audio frame and I have an error that I can't fix for a few days now.

This is my custom audio class:

 class CustomAudioTrack(MediaStreamTrack):
    kind = "audio"

    def __init__(self, rate=48000, channels=2):
        super().__init__()
        self.rate = rate
        self.channels = channels
        self._timestamp = 0

    async def recv(self):
        # Nombre de frames à lire pour 0.1 seconde de son
        frames = int(self.rate / 10)
        data = sd.rec(frames, samplerate=self.rate,
                      channels=self.channels, dtype=np.int16)
        sd.wait()  # Attendre que l'enregistrement soit terminé

        # Mise à jour des timestamps
        self._timestamp += frames
        pts = self._timestamp
        time_base = fractions.Fraction(1, self.rate)

        # Création de la trame audio
        audio_frame = av.AudioFrame.from_ndarray(
            data, format='s16', layout='stereo')
        audio_frame.sample_rate = self.rate
        audio_frame.pts = pts
        audio_frame.time_base = time_base

        return audio_frame

my error is :

Traceback (most recent call last):
File "C:\Users\poppppp\AppData\Local\Programs\Python\Python311\Lib\site-packages\aiortc\rtcrtpsender.py", line 328, in _run_rtp
enc_frame = await self._next_encoded_frame(codec)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\poppppp\AppData\Local\Programs\Python\Python311\Lib\site-packages\aiortc\rtcrtpsender.py", line 270, in _next_encoded_frame
data = await self.__track.recv()
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\documents\project\pythonProject\webrtc_tutorial\sadda_python\offerer.py", line 44, in recv
audio_frame = av.AudioFrame.from_ndarray(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "av\audio\frame.pyx", line 125, in av.audio.frame.AudioFrame.from_ndarray
File "av\utils.pyx", line 79, in av.utils.check_ndarray_shape
ValueError: Unexpected numpy array shape (4800, 2)

Do you have an idea of how I can fix this?

1

There are 1 answers

2
Ismael Hadj On BEST ANSWER

I found a solution, The probleme was that when we give a numpy array to from_ndarray function it should be formated like that : [ [ 1] [ 1] [0] .... [ 1] [ 0] ] so now I use pyaudio for listen the microphone but the numpy array has not the right shape so I use data.reshape(-1, 1) for reshape it perfectly and here is the final custom class :

class CustomAudioTrack(MediaStreamTrack):
    kind = "audio"

    def __init__(self, rate=48000, channels=2):
        super().__init__()
        self.rate = rate
        self.channels = channels
        self._timestamp = 0

        # Initialiser PyAudio
        self.pa = pyaudio.PyAudio()
        self.stream = self.pa.open(format=pyaudio.paInt16,
                                   channels=2,
                                   rate=48000,
                                   input=True,
                                   frames_per_buffer=960)

    async def recv(self):
        frames_per_buffer = 960

        # Lire les données du stream PyAudio
        data = np.frombuffer(self.stream.read(
            frames_per_buffer), dtype=np.int16)
        data = data.reshape(-1, 1)

        self._timestamp += frames_per_buffer
        pts = self._timestamp
        time_base = Fraction(1, self.rate)
        # Préparation des données pour PyAV
        audio_frame = av.AudioFrame.from_ndarray(
            data.T, format='s16', layout='stereo')
        audio_frame.sample_rate = self.rate
        audio_frame.pts = pts
        audio_frame.time_base = time_base

        return audio_frame

    def __del__(self):
        self.stream.stop_stream()
        self.stream.close()
        self.pa.terminate()