How does one use AVAudioConverter to convert from mono to stereo?

804 views Asked by At

I'm trying to use AVAudioEngine instead of AVAudioPlayer because I need to do some per-packet processing as the audio is playing, but before I can get that far, I need to convert the 16-bit 8khz mono audio data to stereo so the AVAudioEngine will play it. This is my (incomplete) attempt to do it. I'm currently stuck at how to make AVAudioConverter do the mono-to-stereo conversion. If I don't use the AVAudioConverter, the iOS runtime complains that the input format doesn't match the output format. If I do use it (as below), the runtime doesn't complain, but the audio does not play back properly (likely because i'm not doing the mono-to-stereo conversion correctly). Any assistance is appreciated!

  private func loadAudioData(audioData: Data?) {
      // Load audio data into player

      guard let audio = audioData else {return}
      do {
          let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false)
          let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
          
          if inputAudioFormat != nil {
              let inputStreamDescription = inputAudioFormat?.streamDescription.pointee
              let outputStreamDescription = outputAudioFormat.streamDescription.pointee
              let count = UInt32(audio.count)
              if inputStreamDescription != nil && count > 0 {
                  if let ibpf = inputStreamDescription?.mBytesPerFrame {
                      let inputFrameCapacity = count / ibpf
                      let outputFrameCapacity = count / outputStreamDescription.mBytesPerFrame
                      self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat!, frameCapacity: inputFrameCapacity)
                      self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: outputFrameCapacity)
          
                      if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
                          self.pcmConverter = AVAudioConverter(from: inputAudioFormat!, to: outputAudioFormat)
                          input.frameLength = input.frameCapacity
                      
                          let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
                          let bytesCopied = audio.copyBytes(to: b)
                          assert(bytesCopied == count)
          
                          audioEngine.attach(playerNode)
                          audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
          
                          self.pcmConverter?.convert(to: output, error: nil) { packets, status in
                              status.pointee = .haveData
                              return self.pcmInputBuffer    // I know this is wrong, but i'm not sure how to do it correctly
                          }
                          try audioEngine.start()
                      }
                  }
              }
          }
      }
  }
1

There are 1 answers

6
Gordon Childs On

Speculative, incorrect answer

How about pcmConverter?.channelMap = [0, 0]?

Actual answer

You don't need to use the audio converter channel map, because mono to stereo AVAudioConverters seem to duplicate the mono channel by default. The main problems were that outputFrameCapacity was wrong, and you use mainMixers outputFormat before calling audioEngine.prepare() or starting the engine.

Assuming sampleRate = 8000, an amended solution looks like this:

private func loadAudioData(audioData: Data?) throws  {
    // Load audio data into player
    
    guard let audio = audioData else {return}
    do {
        audioEngine.attach(playerNode)
        audioEngine.connect(playerNode, to: audioEngine.mainMixerNode, format: nil)
        audioEngine.prepare() // https://stackoverflow.com/a/70392017/22147
        
        let outputAudioFormat = self.audioEngine.mainMixerNode.outputFormat(forBus: 0)
        guard let inputAudioFormat = AVAudioFormat(commonFormat: .pcmFormatInt16, sampleRate: Double(sampleRate), channels: 1, interleaved: false) else { return }
        
        let inputStreamDescription = inputAudioFormat.streamDescription.pointee
        let outputStreamDescription = outputAudioFormat.streamDescription.pointee
        let count = UInt32(audio.count)
        if count > 0 {
            let ibpf = inputStreamDescription.mBytesPerFrame
            let inputFrameCapacity = count / ibpf
            let outputFrameCapacity = Float64(inputFrameCapacity) * outputStreamDescription.mSampleRate / inputStreamDescription.mSampleRate
            self.pcmInputBuffer = AVAudioPCMBuffer(pcmFormat: inputAudioFormat, frameCapacity: inputFrameCapacity)
            self.pcmOutputBuffer = AVAudioPCMBuffer(pcmFormat: outputAudioFormat, frameCapacity: AVAudioFrameCount(outputFrameCapacity))
            
            if let input = self.pcmInputBuffer, let output = self.pcmOutputBuffer {
                self.pcmConverter = AVAudioConverter(from: inputAudioFormat, to: outputAudioFormat)
                input.frameLength = input.frameCapacity
                
                let b = UnsafeMutableBufferPointer(start: input.int16ChannelData?[0], count: input.stride * Int(inputFrameCapacity))
                let bytesCopied = audio.copyBytes(to: b)
                assert(bytesCopied == count)
                
                self.pcmConverter?.convert(to: output, error: nil) { packets, status in
                    status.pointee = .haveData
                    return self.pcmInputBuffer    // I know this is wrong, but i'm not sure how to do it correctly
                }
                try audioEngine.start()
                
                self.playerNode.scheduleBuffer(output, completionHandler: nil)
                self.playerNode.play()
            }
        }
    }
}