Unable to transcode audio via Android MediaCodec API

1.7k views Asked by At

I'm trying to write a basic raw AAC data to a file, in hopes that I can use mp4parser to encapsulate it with a video track. For that, I need to encode any given audio file to that format. MediaCodec API is readily available since API 16, so I've decided to use that for the codec operation.

I'm not sure why not many resources are available online regarding this, possibly due to the complexity associated. Although, I've managed to learn that the fundamental approach should be:

Get sample data via MediaExtractor -> Enqueue decoder input buffer -> Dequeue output buffer and get the decoded data -> Enqueue encoder input buffer -> Dequeue encoder output buffer -> Write the encoded data to file.

private void transcodeFile(File source, File destination) throws IOException {
    FileInputStream inputStream = new FileInputStream(source);
    FileOutputStream outputStream = new FileOutputStream(destination);

    log("Transcoding file: " + source.getName());

    MediaExtractor extractor;
    MediaCodec encoder;
    MediaCodec decoder;

    ByteBuffer[] encoderInputBuffers;
    ByteBuffer[] encoderOutputBuffers;
    ByteBuffer[] decoderInputBuffers;
    ByteBuffer[] decoderOutputBuffers;

    int noOutputCounter = 0;
    int noOutputCounterLimit = 10;

    extractor = new MediaExtractor();
    extractor.setDataSource(inputStream.getFD());
    extractor.selectTrack(0);

    log(String.format("TRACKS #: %d", extractor.getTrackCount()));
    MediaFormat format = extractor.getTrackFormat(0);
    String mime = format.getString(MediaFormat.KEY_MIME);
    log(String.format("MIME TYPE: %s", mime));


    final String outputType = MediaFormat.MIMETYPE_AUDIO_AAC;
    encoder = MediaCodec.createEncoderByType(outputType);
    MediaFormat encFormat = MediaFormat.createAudioFormat(outputType, 44100, 2);
    encFormat.setInteger(MediaFormat.KEY_BIT_RATE, 64000);
    encoder.configure(encFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);

    decoder = MediaCodec.createDecoderByType(mime);
    decoder.configure(format, null, null, 0);

    encoder.start();
    decoder.start();

    encoderInputBuffers = encoder.getInputBuffers();
    encoderOutputBuffers = encoder.getOutputBuffers();

    decoderInputBuffers = decoder.getInputBuffers();
    decoderOutputBuffers = decoder.getOutputBuffers();

    int timeOutUs = 1000;
    long presentationTimeUs = 0;

    MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
    boolean inputEOS = false;
    boolean outputEOS = false;

    while(!outputEOS && noOutputCounter < noOutputCounterLimit) {
        noOutputCounter++;

        if(!inputEOS) {
            int decInputBufferIndex = decoder.dequeueInputBuffer(timeOutUs);
            log("decInputBufferIndex: " + decInputBufferIndex);
            if (decInputBufferIndex >= 0) {
                ByteBuffer dstBuffer = decoderInputBuffers[decInputBufferIndex];

                //Getting sample with MediaExtractor
                int sampleSize = extractor.readSampleData(dstBuffer, 0);
                if (sampleSize < 0) {
                    inputEOS = true;
                    log("Input EOS");
                    sampleSize = 0;
                } else {
                    presentationTimeUs = extractor.getSampleTime();
                }

                log("Input sample size: " + sampleSize);

                //Enqueue decoder input buffer
                decoder.queueInputBuffer(decInputBufferIndex, 0, sampleSize, presentationTimeUs, inputEOS ? MediaCodec.BUFFER_FLAG_END_OF_STREAM : 0);
                if (!inputEOS) extractor.advance();

            } else {
                log("decInputBufferIndex: " + decInputBufferIndex);
            }
        }

        //Dequeue decoder output buffer
        int res = decoder.dequeueOutputBuffer(info, timeOutUs);
        if(res >= 0) {
            if(info.size > 0) noOutputCounter = 0;

            int decOutputBufferIndex = res;
            log("decOutputBufferIndex: " + decOutputBufferIndex);

            ByteBuffer buffer = decoderOutputBuffers[decOutputBufferIndex];
            buffer.position(info.offset);
            buffer.limit(info.offset + info.size);

            final int size = buffer.limit();
            if(size > 0) {
                //audioTrack.write(buffer, buffer.limit(), AudioTrack.MODE_STATIC);

                int encInputBufferIndex = encoder.dequeueInputBuffer(-1);
                log("encInputBufferIndex: " + encInputBufferIndex);
                //fill the input buffer with the decoded data
                if(encInputBufferIndex >= 0) {
                    ByteBuffer dstBuffer = encoderInputBuffers[encInputBufferIndex];
                    dstBuffer.clear();
                    dstBuffer.put(buffer);

                    encoder.queueInputBuffer(encInputBufferIndex, 0, info.size, info.presentationTimeUs, 0);
                    int encOutputBufferIndex = encoder.dequeueOutputBuffer(info, timeOutUs);
                    if(encOutputBufferIndex >= 0) {
                        log("encOutputBufferIndex: " + encOutputBufferIndex);
                        ByteBuffer outBuffer = encoderOutputBuffers[encOutputBufferIndex];
                        byte[] out = new byte[outBuffer.remaining()];
                        outBuffer.get(out);
                        //write data to file
                        outputStream.write(out);
                    }
                }
            }
            decoder.releaseOutputBuffer(decOutputBufferIndex, false);
            if((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
                outputEOS = true;
                log("Output EOS");
            }
        } else if (res == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
            decoderOutputBuffers = decoder.getOutputBuffers();
            log("Output buffers changed.");
        } else if (res == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
            log("Output format changed.");
        } else {
            log("Dequeued output buffer returned: " + res);
        }
    }

    log("Stopping..");
    releaseCodec(decoder);
    releaseCodec(encoder);
    inputStream.close();
    outputStream.close();

}

The output file is not valid for some reason. Why?

EDIT: Managed to fix an Exception, issue persists.

EDIT 2: I've prevented the buffer overflow by setting the buffer size to the bitrate in the encoder format settings. There are two issues currently: 1. After a very short interval, it gets stuck here, possibly waiting indefinitely.int encInputBufferIndex = dequeueInputBuffer(-1); 2. Decoding takes as long as the track is, why does this regard for the actual interval of samples?

EDIT 3: Testing with AudioTrack.write(), audio plays nice and fine, but this isn't intended and suggests that the decoding is taking place in sync to the media file being fed, this should take place as fast as possible to allow encoder to do its job quick. Changing the presentationTimeUs in decoder.queueInputBuffer() did nothing.

1

There are 1 answers

10
Kirill K On

You are on the right way, the missing part is muxing the encoded frames into valid MP4 file with MediaMuxer. There is a good (and only) example for that on bigflake. Most relevant examples for this matter are

You'll have to combine and simplify/modify them to work with audio instead of video. You'll need API 18 for the above

Edit: how I forward decoder buffer to encoder (more or less). I did not experience buffer overflows so far just hoping that sane implementation will have encoder and decoder buffers of the same capacity:

int decoderStatus = audioDecoder.dequeueOutputBuffer(info, TIMEOUT_USEC);
  if (decoderStatus >= 0) {
      // no output available yet
      if (VERBOSE) Log.d(TAG, "no output from audio decoder available");
...
   } else if (decoderStatus == MediaCodec.INFO_OUTPUT_BUFFERS_CHANGED) {
            audioDecoderOutputBuffers = audioDecoder.getOutputBuffers();
            if (VERBOSE) Log.d(TAG, "decoder output buffers changed (we don't care)");
    } else if (decoderStatus == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED) {
            // expected before first buffer of data
            if (VERBOSE) {
                    MediaFormat newFormat = audioDecoder.getOutputFormat();
                    Log.d(TAG, "decoder output format changed: " + newFormat);
                }
    } else if (decoderStatus < 0) {
            Log.e(TAG, "unexpected result from decoder.dequeueOutputBuffer: "+decoderStatus);
            throw new RuntimeException("Issue with dencoding audio");
    } else { // decoderStatus >= 0
            if (VERBOSE) Log.d(TAG, "audio decoder produced buffer "
                                + decoderStatus + " (size=" + info.size + ")");

            if (info.size! = 0) {                           
                // Forward decoder buffer to encoder
                ByteBuffer decodedData = audioDecoderOutputBuffers[decoderStatus];
                decodedData.position(info.offset);
                decodedData.limit(info.offset + info.size);

                 // Possibly edit buffer data

                // Send it to the audio encoder.
                int encoderStatus = audioEncoder.dequeueInputBuffer(-1);
                if (encoderStatus < 0) {
                    throw new RuntimeException("Could not get input buffer for audio encoder!!!");
                }
            audioEncoderInputBuffers[encoderStatus].clear();
            audioEncoderInputBuffers[encoderStatus].put(decodedData);
         }
audioEncoder.queueInputBuffer(encoderStatus, 0, info.size, mAudioMediaTime, 0);
     if (VERBOSE) Log.d(TAG, "Submitted to AUDIO encoder frame, size=" + info.size + " time=" + mAudioMediaTime);
    }
 audioDecoder.releaseOutputBuffer(decoderStatus, false);