I am trying to run the code on the link Here is an example kernel where they use a pretrained VGG16 model as the encoder portion of a U-Net. on the line [t0_img], dm_img = next(train_gen) I get the error ValueError: could not convert string to float: 'eb91b1c659a0_12' .

what can I do to fix this?

"""Using a pretrained model to segment Here is an example kernel where we use a pretrained VGG16 model as the encoder portion of a U-Net and thus can benefit from the features already created in the model and only focus on learning the specific decoding features. The strategy was used with LinkNet by one of the top placers in the competition. I wanted to see how well it worked in particular comparing it to standard or non-pretrained approaches, the code is setup now for VGG16 but can be easily adapted to other problems"""

base_dir = r'E:\Python\carvana-image-masking-challenge\\'

all_img_df = pd.DataFrame(dict(path=glob(os.path.join(base_dir, 'train', '*.*'))))
all_img_df['key_id'] = all_img_df['path'].map(lambda x: splitext(os.path.basename(x))[0])
all_img_df['car_id'] = all_img_df['key_id'].map(lambda x: x.split('_')[0])
all_img_df['mask_path'] = all_img_df['path'].map(lambda x: x.replace('train', 'train_masks').replace('.jpg', '_mask.gif'))
all_img_df['exists'] = all_img_df['mask_path'].map(os.path.exists)
print(all_img_df['exists'].value_counts())
print(all_img_df.sample(3))


def read_diff_img(c_row):
    t0_img = imread(c_row['path'])[:, :, 0:3]
    cg_img = imread(c_row['mask_path'], as_gray=True)
    return t0_img, cg_img


def make_change_figure(c_row):
    a,c = read_diff_img(c_row)
    fig, (ax1, ax3) = plt.subplots(1, 2, figsize=(21, 7))
    ax1.imshow(a)
    ax1.set_title('Before')
    ax1.axis('off')
    d = skimage.measure.label(c)
    ax3.imshow(d, cmap='nipy_spectral_r')
    ax3.set_title('Changes')
    ax3.axis('off')
    return fig


_, t_row = next(all_img_df.sample(1).iterrows())
make_change_figure(t_row).savefig('overview.png', dpi=300)
a,c = read_diff_img(t_row)
plt.imshow(c, cmap='nipy_spectral_r')
plt.show()
print(a.shape, c.shape)

"""Training and Validation Split
Here we split based on scene so the model doesn't overfit the individual images"""

from sklearn.model_selection import train_test_split


def train_test_split_on_group(in_df, col_id, **kwargs):
    group_val = np.unique(in_df[col_id])
    train_ids, test_ids = train_test_split(group_val, **kwargs)
    return in_df[in_df[col_id].isin(train_ids)], in_df[in_df[col_id].isin(test_ids)]


train_df, valid_df = train_test_split_on_group(all_img_df, col_id='car_id', random_state=2018, test_size=0.2)
valid_df, test_df = train_test_split_on_group(valid_df, col_id='car_id', random_state=2018, test_size=0.5)
print(train_df.shape, 'training images')
print(valid_df.shape, 'validation images')
print(test_df.shape, 'test images')

# Augmenting Data

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input


dg_args = dict(featurewise_center=False
                  , samplewise_center=False
                  , rotation_range=5
                  , width_shift_range=0.01
                  , height_shift_range=0.01
                  , shear_range=0.01
                  , zoom_range=[0.9, 1.1]
                  , horizontal_flip=True
                  , vertical_flip=False # no upside down cars
                  , fill_mode = 'nearest'
                  , data_format = 'channels_last'
               , preprocessing_function = preprocess_input)
IMG_SIZE = (512, 512) # slightly smaller than vgg16 normally expects
default_batch_size = 8
core_idg = ImageDataGenerator(**dg_args)
mask_args = dg_args.copy()
mask_args['preprocessing_function'] = lambda x: x/255.0
mask_idg = ImageDataGenerator(**mask_args)


def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
    # base_dir = E:\Python\carvana-image-masking-challenge\\train
    base_dir = os.path.dirname(in_df[path_col].values[0])
    print('## Ignore next message from keras, values are replaced anyways')
    df_gen = img_data_gen.flow_from_directory(base_dir, class_mode='sparse', **dflow_args)
    df_gen.filenames = in_df[path_col].values
    df_gen.classes = np.stack(in_df[y_col].values)
    df_gen.samples = in_df.shape[0]
    df_gen.n = in_df.shape[0]
    df_gen._set_index_array()
    df_gen.directory = '' # since we have the full path
    print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
    return df_gen


def make_gen(img_gen, mask_gen, in_df, batch_size=default_batch_size, seed=None, shuffle=True):
    if seed is None:
        seed = np.random.choice(range(9999))
    flow_args = dict(target_size=IMG_SIZE, batch_size=batch_size, seed=seed, shuffle=shuffle, y_col='key_id')
    t0_gen = flow_from_dataframe(img_gen, in_df, path_col='path', color_mode='rgb', **flow_args)
    dm_gen = flow_from_dataframe(mask_gen, in_df, path_col='mask_path', color_mode='grayscale', **flow_args)
    for (t0_img, _), (dm_img, _) in zip(t0_gen, dm_gen):
        yield [t0_img], dm_img


train_gen = make_gen(core_idg, mask_idg, train_df)
valid_gen = make_gen(core_idg, mask_idg, valid_df, seed=0, shuffle=False)
test_gen = make_gen(core_idg, mask_idg, test_df, seed=0, shuffle=False, batch_size=2*default_batch_size)
[t0_img], dm_img = next(train_gen)
print(t0_img.shape, t0_img.max())
print(dm_img.shape, dm_img.max(), dm_img.mean())

0 Answers