I'm trying to implement the speaker-adaptation of UBM using sidekit when I got following error.

Exception: show enroll/something.wav is not in the HDF5 file

I got two files "enroll" and "test" under the file "feat" which contains respectively features(.h5) for training and test, and my enroll_idmap is generated with the audios(.wav) only for training.

The error above comes out during the execution of enroll_stat.accumulate_stat(…)

Could anyone tell me what this error means and how to fix it?

import sidekit
import os    
import numpy as np    
import h5py

nbThread = 4     
NUM_GUASSIANS = 64     
BASE_DIR = "./Database/sidekit_data"


enroll_idmap = sidekit.IdMap.read(os.path.join(BASE_DIR, "task", "enroll_idmap.h5"))

ubm = sidekit.Mixture()    
model_name = "ubm_{}.h5".format(NUM_GUASSIANS)    
ubm.read(os.path.join(BASE_DIR, "ubm", model_name))

server_eval = sidekit.FeaturesServer(feature_filename_structure="./Database/sidekit_data            /feat/{}.h5",
                                    sources=None,
                                    dataset_list=["vad", "energy", "cep", "fb"],
                                    feat_norm="cmvn",
                                    global_cmvn=None,
                                    dct_pca=False,
                                    dct_pca_config=None,
                                    sdc=False,
                                    sdc_config=None,
                                    delta=True,
                                    double_delta=True,
                                    delta_filter=None,
                                    context=None,
                                    traps_dct_nb=None,
                                    rasta=True,
                                    keep_all_features=True)

print("Compute the sufficient statistics")

enroll_stat.accumulate_stat(ubm=ubm,
                            feature_server=server_eval,
                            seg_indices=range(enroll_stat.segset.shape[0]),
                            num_thread=nbThread
                            )

filename = "enroll_stat_{}.h5".format(NUM_GUASSIANS)
enroll_stat.write(os.path.join(BASE_DIR, "stat", filename))

print("MAP adaptation of the speaker models")

enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm=ubm,
                                                    r=3       
                                                    )

print("Compute trial scores")

scores_gmm_ubm = sidekit.gmm_scoring(ubm=ubm,
                                     enroll=enroll_sv,
                                     ndx=test_ndx,
                                     feature_server=server_eval,
                                     num_thread=nbThread
                                    )
filename = "ubm_scores_{}.h5".format(NUM_GUASSIANS)
scores_gmm_ubm.write(os.path.join(BASE_DIR, "result", filename))

filename = "ubm_scores_explained_{}.txt".format(NUM_GUASSIANS)
fout = open(os.path.join(BASE_DIR, "result", filename), "a")
fout.truncate(0) #clear content
modelset = list(scores_gmm_ubm.modelset)
segset = list(scores_gmm_ubm.segset)
scores = np.array(scores_gmm_ubm.scoremat)
for seg_idx, seg in enumerate(segset):
    fout.write("Wav: {}\n".format(seg))
    for speaker_idx, speaker in enumerate(modelset):
        fout.write("\tSpeaker {}:\t{}\n".format(speaker, scores[speaker_idx, seg_idx]))
    fout.write("\n")
fout.close()

0 Answers