Encoding webm with ffmpeg and libvorbis does not work

1.8k views Asked by At

I am attempting to run a modified version of the ffmpeg muxing example which outputs vorbis encoded audio to a webm container.

The code works fine if I specify mp3 as the format, just not when I use vorbis

THe code is similar to http://www.ffmpeg.org/doxygen/2.0/doc_2examples_2muxing_8c-example.html but with the video portions stripped out. I tested with video enabled and the example video was encoded properly, but with no audio.

ffmpeg is compiled with libvorbis and libvpx support.


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libswresample/swresample.h>

#define STREAM_DURATION 200.0
extern AVCodec ff_libvorbis_encoder;

static AVFrame *frame;
static AVStream *add_stream(AVFormatContext *oc, AVCodec **codec,
                            enum AVCodecID codec_id)
{
    AVCodecContext *c;
    AVStream *st;
    /* find the encoder */
    //*codec = &ff_libvorbis_encoder;
    *codec = avcodec_find_encoder(codec_id);
    if (!(*codec)) {
        fprintf(stderr, "Could not find encoder for '%s'\n",
                avcodec_get_name(codec_id));
        exit(1);
    }
    st = avformat_new_stream(oc, *codec);
    if (!st) {
        fprintf(stderr, "Could not allocate stream\n");
        exit(1);
    }
    st->id = oc->nb_streams-1;
    c = st->codec;
    switch ((*codec)->type) {
    case AVMEDIA_TYPE_AUDIO:
        c->sample_fmt  = AV_SAMPLE_FMT_FLTP;
        c->bit_rate    = 64000;
        c->sample_rate = 44100;
        c->channels    = 2;
        break;
    default:
        break;
    }
    /* Some formats want stream headers to be separate. */
    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
        c->flags |= CODEC_FLAG_GLOBAL_HEADER;
    return st;
}

static float t, tincr, tincr2;
static uint8_t **src_samples_data;
static int       src_samples_linesize;
static int       src_nb_samples;
static int max_dst_nb_samples;
uint8_t **dst_samples_data;
int       dst_samples_linesize;
int       dst_samples_size;
struct SwrContext *swr_ctx = NULL;
static void open_audio(AVFormatContext *oc, AVCodec *codec, AVStream *st) {
  AVCodecContext *c;
    int ret;
    c = st->codec;
    /* open it */
    ret = avcodec_open2(c, codec, NULL);
    if (ret sample_rate;
    /* increment frequency by 110 Hz per second */
    tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
    src_nb_samples = c->codec->capabilities & CODEC_CAP_VARIABLE_FRAME_SIZE ?
        10000 : c->frame_size;
    ret = av_samples_alloc_array_and_samples(&src_samples_data, &src_samples_linesize, c->channels,
                                             src_nb_samples, c->sample_fmt, 0);
    if (ret sample_fmt != AV_SAMPLE_FMT_S16) {
        swr_ctx = swr_alloc();
        if (!swr_ctx) {
            fprintf(stderr, "Could not allocate resampler context\n");
            exit(1);
        }
        /* set options */
        av_opt_set_int       (swr_ctx, "in_channel_count",   c->channels,       0);
        av_opt_set_int       (swr_ctx, "in_sample_rate",     c->sample_rate,    0);
        av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
        av_opt_set_int       (swr_ctx, "out_channel_count",  c->channels,       0);
        av_opt_set_int       (swr_ctx, "out_sample_rate",    c->sample_rate,    0);
        av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt",     AV_SAMPLE_FMT_FLTP,     0);
        /* initialize the resampling context */
        if ((ret = swr_init(swr_ctx)) channels,
                                             max_dst_nb_samples, c->sample_fmt, 0);
    if (ret channels, max_dst_nb_samples,
                                                  c->sample_fmt, 0);
}

static void get_audio_frame(int16_t *samples, int frame_size, int nb_channels)
{
    int j, i, v;
    int16_t *q;
    q = samples;
    for (j = 0; j codec;
    get_audio_frame((int16_t *)src_samples_data[0], src_nb_samples, c->channels);
    /* convert samples from native format to destination codec format, using the resampler */
    if (swr_ctx) {
        /* compute destination number of samples */
        dst_nb_samples = av_rescale_rnd(swr_get_delay(swr_ctx, c->sample_rate) + src_nb_samples,
                                        c->sample_rate, c->sample_rate, AV_ROUND_UP);
        if (dst_nb_samples > max_dst_nb_samples) {
            av_free(dst_samples_data[0]);
            ret = av_samples_alloc(dst_samples_data, &dst_samples_linesize, c->channels,
                                   dst_nb_samples, c->sample_fmt, 0);
            if (ret channels, dst_nb_samples,
                                                          c->sample_fmt, 0);
        }
        /* convert to destination format */
        ret = swr_convert(swr_ctx,
                          dst_samples_data, dst_nb_samples,
                          (const uint8_t **)src_samples_data, src_nb_samples);
        if (ret nb_samples = dst_nb_samples;
    avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt,
                             dst_samples_data[0], dst_samples_size, 0);
    ret = avcodec_encode_audio2(c, &pkt, frame, &got_packet);
    if (ret index;
    /* Write the compressed frame to the media file. */
    ret = av_interleaved_write_frame(oc, &pkt);
    if (ret != 0) {
        fprintf(stderr, "Error while writing audio frame: %s\n",
                av_err2str(ret));
        exit(1);
    }
    avcodec_free_frame(&frame);
}

static void close_audio(AVFormatContext *oc, AVStream *st)
{
    avcodec_close(st->codec);
    av_free(src_samples_data[0]);
    av_free(dst_samples_data[0]);
}

int main(int argc, char *argv[]) {
  AVOutputFormat *fmt;
  AVFormatContext *oc;
  AVStream *audio_st;
  AVCodec *audio_codec;
  double audio_time, video_time;
  int ret = 0;
  const char *input = argv[1];
  const char *output = argv[2];

  av_register_all();
  avformat_alloc_output_context2(&oc, NULL, NULL, output);
  if(!oc) {
    printf("Could not alloc the output context");
    return 1;
  }

  fmt = oc->oformat;

  audio_st = NULL;
  if(fmt->audio_codec != AV_CODEC_ID_NONE) {
    audio_st = add_stream(oc, &audio_codec, fmt->audio_codec);
    printf("Started audio stream with codec %s\n", audio_codec->name);
  }

  if(audio_st) {
    open_audio(oc, audio_codec, audio_st);
  }
  av_dump_format(oc, 0, output, 1);

  if (!(fmt->flags & AVFMT_NOFILE)) {
    ret = avio_open(&oc->pb, output, AVIO_FLAG_WRITE);
    if (ret pts = 0;
  for (;;) {
    audio_time = audio_st ? audio_st->pts.val * av_q2d(audio_st->time_base) : 0.0;
    if ((!audio_st || audio_time >= STREAM_DURATION))
      break;
    write_audio_frame(oc, audio_st);
  }
  av_write_trailer(oc);
  if(audio_st)
    close_audio(oc, audio_st);
  if(!(fmt->flags & AVFMT_NOFILE))
    avio_close(oc->pb);
  avformat_free_context(oc);
  return 0;

}

compiled with

clang -o converter -lavcodec -lavformat -lavutil -lswresample -lvorbis converter.c

output


~/v/converter> ./converter test.wav test.webm
Started audio stream with codec libvorbis
Output #0, webm, to 'test.webm':
    Stream #0:0: Audio: vorbis (libvorbis), 44100 Hz, 2 channels, fltp, 64 kb/s
[libvorbis @ 0x7fdafb800600] 33 frames left in the queue on closing
1

There are 1 answers

2
AnthonyM On

So it turns out the answer is to properly setup the pts data for the audio stream.