i want to speexCodec encoding & decoding

45 views Asked by At

I am encoding and decoding a 100hz pcm file using SpeexCodec in C# WPF. Condition is 70 Bytes (1 frame size generated for 20ms) x 2 Sample rate: 16Kbps Bits per sample: 16bit Raw data size per 20ms: 640 Bytes Speex encoded size per 20ms: 70 Bytes. However, when I encoded and decoded it and converted it to a wav file and listened to it, there was a big difference from the original file. I'm not sure what the problem is.

public class SpeexLib { public struct SpeexBits { public IntPtr chars; // char *chars; public int nbBits; // int nbBits; public int charPtr; // int charPtr; public int bitPtr; // int bitPtr; public int owner; // int owner; public int overflow; // int overflow; public int buf_size; // int buf_size; public int reserved1; // int reserved1; public IntPtr reserved2; // void *reserved2; }

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr speex_encoder_init(IntPtr mode);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_encoder_destroy(IntPtr state);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_reset(ref SpeexBits bits);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_encode_int(IntPtr state, short[] input, ref SpeexBits speexbits);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_bits_write(ref SpeexBits bits, byte[] output, int maxBytes);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr speex_decoder_init(IntPtr mode);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_decoder_destroy(IntPtr state);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_decode(IntPtr state, ref SpeexBits bits, float[] output);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_init(out SpeexBits bits);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_destroy(ref SpeexBits bits);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr speex_lib_get_mode(int mode);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_read_from(ref SpeexBits bits, byte[] chars, int len);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_init_buffer(ref SpeexBits bits, IntPtr buff, int buf_size);

    //추가된 부분
    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_encoder_ctl(IntPtr state, int request, ref int value);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_decoder_ctl(IntPtr state, int request, ref int value);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern void speex_bits_set_bit_buffer(ref SpeexBits bits, IntPtr buff, int buf_size);

    [DllImport("libspeex.dll", CallingConvention = CallingConvention.StdCall)]
    public static extern int speex_encode(IntPtr state, float[] input, ref SpeexBits bits);

    public void initSpeexBits()
    {
    }

    const int SPEEX_MODEID_NB = 0;
    const int SPEEX_MODEID_WB = 1;
    const int FRAME_SIZE = 320; // 20ms의 데이터 (16kHz 샘플레이트)
    const int COMPRESSED_SIZE = 70; // 인코딩사이즈
    const int SPEEX_SET_SAMPLING_RATE = 24; // 샘플레이트 설정에 사용되는 요청 ID
    const int SPEEX_SET_BITRATE = 18;
    const int SPEEX_GET_FRAME_SIZE = 3;
    const int SPEEX_SET_QUALITY = 4;


    public void EncodePCMToSpeex2()
    {
        string exePath = Assembly.GetExecutingAssembly().Location;
        string exeDirectory = Path.GetDirectoryName(exePath);
        string inputFilePath = Path.Combine(exeDirectory, "원본2.pcm");
        string outputFilePath = Path.Combine(exeDirectory, "path_to_output_file.pcm");

        //byte[] pcmData = File.ReadAllBytes(inputFilePath);
        //foreach (var item in pcmData)
        //{
        //    Console.WriteLine(item.ToString());
        //}

        short[] inBuffer = new short[FRAME_SIZE];
        float[] input = new float[FRAME_SIZE];
        byte[] cbits = new byte[COMPRESSED_SIZE];
        int nbBytes;

        // 인코더 초기화 및 설정
        IntPtr state = speex_encoder_init(speex_lib_get_mode(SPEEX_MODEID_WB));
        int tmp = 8; // 품질 설정
        speex_encoder_ctl(state, SPEEX_SET_QUALITY, ref tmp);
        int sampleRate = 16000; // 16kHz
        speex_encoder_ctl(state, SPEEX_SET_SAMPLING_RATE, ref sampleRate);


        SpeexBits bits;
        speex_bits_init(out bits);

        using (var fin = new FileStream(inputFilePath, FileMode.Open))
        using (var fout = new FileStream(outputFilePath, FileMode.Create))
        using (var bin = new BinaryReader(fin))
        using (var bout = new BinaryWriter(fout))
        {
            while (bin.BaseStream.Position < bin.BaseStream.Length)
            {
                for (int i = 0; i < FRAME_SIZE; i++)
                {
                    if (bin.BaseStream.Position < bin.BaseStream.Length)
                        inBuffer[i] = bin.ReadInt16();
                    else
                        inBuffer[i] = 0;
                    input[i] = inBuffer[i];
                }

                speex_bits_reset(ref bits);
                speex_encode(state, input, ref bits);
                nbBytes = speex_bits_write(ref bits, cbits, COMPRESSED_SIZE);

                bout.Write(nbBytes);
                bout.Write(cbits, 0, nbBytes);
            }
        }

        speex_bits_destroy(ref bits);
        speex_encoder_destroy(state);
    }

    public void DecodeSpeexToPCM()
    {
        string exePath = Assembly.GetExecutingAssembly().Location;
        string exeDirectory = Path.GetDirectoryName(exePath);
        string inputFilePath = Path.Combine(exeDirectory, "path_to_output_file.pcm");
        string outputFilePath = Path.Combine(exeDirectory, "path_to_output_file2.pcm");

        SpeexBits bits;
        speex_bits_init(out bits);

        IntPtr state = speex_decoder_init(speex_lib_get_mode(SPEEX_MODEID_WB));

        int enh = 1; // 음질 개선 설정
        speex_decoder_ctl(state, 1, ref enh);

        using (var fin = new FileStream(inputFilePath, FileMode.Open))
        using (var fout = new FileStream(outputFilePath, FileMode.Create))
        using (var bin = new BinaryReader(fin))
        using (var bout = new BinaryWriter(fout))
        {


            while (fin.Position < fin.Length)
            {
                int nbBytes = bin.ReadInt32(); // 인코딩된 바이트 수 읽기
                byte[] cbits = bin.ReadBytes(nbBytes);

                speex_bits_read_from(ref bits, cbits, nbBytes);

                float[] output = new float[FRAME_SIZE];
                speex_decode(state, ref bits, output);

                foreach (float sample in output)
                {
                    short outSample = (short)(sample);
                    //Console.WriteLine(outSample.ToString());
                    bout.Write(outSample);
                }
            }
        }

        speex_bits_destroy(ref bits);
        speex_decoder_destroy(state);
    }


    public void ConvertPcmToWav(string inputFile, string outputFile, int sampleRate, int channels, int bitsPerSample)
    {
        using (var reader = new RawSourceWaveStream(File.OpenRead(inputFile), new WaveFormat(sampleRate, bitsPerSample, channels)))
        using (var writer = new WaveFileWriter(outputFile, reader.WaveFormat))
        {
            reader.CopyTo(writer);
        }
    }

    private short[] ConvertFloatArrayToShortArray(float[] floatArray)
    {
        short[] shortArray = new short[floatArray.Length];
        for (int i = 0; i < floatArray.Length; i++)
        {
            shortArray[i] = (short)(floatArray[i] * short.MaxValue);
        }
        return shortArray;
    }
  

}

This is my code. Please Hele me.

I don't know if the encoding or decoding is wrong.

1

There are 1 answers

0
JonasH On BEST ANSWER

Speex is a lossy compression codec specifically designed to encode human speech. 100hz is fairly close to the lower range of human hearing, and not inside the typical frequency range for speech. So a large distortion is expected since you are apparently using a codec far outside its intended usage scenario. I would recommend either using an lossless encoder, or some lossy encoder better suited to your particular type of data.

I'm kind of surprised you get any output at all, some speech codecs apply a fairly narrow band pass filter that might have removed all of your signal.