Data cardinality is ambiguous: x sizes: 15, 15 y sizes: 30 Make sure all arrays contain the same number of samples

12 views Asked by At
    def train_gan(self, epochs, batch_size=30, save_interval=50):
        print('Train')
        self.img_rows = 256
        self.img_cols = 256

        # Load and preprocess the dataset
        print('start train_gan')
        # Define the directory path where the images are located
        directory_path = '/content/drive/MyDrive/AllCroppedROIas265by265enhancedWithoutPeriodicNoise'

        # Initialize a list to store the images
        image_list = []

        # Loop through all the image files in the directory
        for filename in glob.glob(os.path.join(directory_path, '*')):
            # Open each image file
            im = Image.open(filename)
    
            # Resize the image to the desired dimensions (e.g., 256x256)
            img_cols, img_rows = 256, 256
            reim = im.resize((img_cols, img_rows), Image.LANCZOS)
    
          # Convert the image to a NumPy array and append to the image list
        image_list.append(np.array(reim))

        # Check if all images have the same shape (256x256)
        image_shape = image_list[0].shape
        for img in image_list:
            if img.shape != image_shape:
                raise ValueError("All images in image_list must have the same shape.")

        # Convert the list of images to a stack (numpy array)
        image_stack = np.stack(image_list)
        X_train = image_stack
        X_train = (X_train.astype(np.float32) - 127.5) / 127.5  # Rescale -1 to 1
        print('reached X-train')
        half_batch = int(batch_size / 2)

        # Build the model and compile it
        self.build_gan()

        # Define the mapping of pathology values to integers
        pathology_to_int = {
            'BENIGN': 0,
            'BENIGN_WITH_CALLBACK': 0,
            'BENIGN_WITHOUT_CALLBACK': 0,
            'MALIGNANT': 1,
            'MALIGNANT_WITH_CALLBACK': 1,
            'MALIGNANT_WITHOUT_CALLBACK': 1,
        }

        # Convert the pathologies to integer representations based on the mapping
        pathologies_int = np.array([pathology_to_int[pathology.upper()] for pathology in pathologies])

        for epoch in range(epochs):
            # Sample noise and labels for the discriminator
            noise_dis, sampled_labels_dis = self.generate_noise_and_labels(batch_size)

            # Train the discriminator
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs, labels = X_train[idx], pathologies_int[idx]
            # Resize the real images to the required size (256x256)
            imgs = np.array([np.array(Image.fromarray(img).resize((256, 256))) for img in imgs])
            # Reshape the images to have the required 4-dimensional shape
            imgs = np.expand_dims(imgs, axis=-1)  # Add channel dimension (channels=1 initially)

            # Sample noise and labels for the generator
            noise_gen, sampled_labels_gen = self.generate_noise_and_labels(half_batch)

            # Generate a half batch of new images
            gen_imgs = self.generator.predict([noise_gen, sampled_labels_gen])
            gen_imgs_single_channel = np.mean(gen_imgs, axis=-1, keepdims=True)  # Convert to single-channel

            # Train the discriminator (real classified as ones and generated as zeros)
            d_loss_real = self.discriminator.train_on_batch(imgs, np.ones((batch_size, 1)))
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs_single_channel, np.zeros((batch_size, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # Train the generator (wants discriminator to mistake images as real)
            noise_gen, sampled_labels_gen = self.generate_noise_and_labels(half_batch)
            valid = np.ones((batch_size, 1))
            g_loss = self.combined.train_on_batch([noise_gen, sampled_labels_gen], valid)

            # Plot the progress
            print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % save_interval == 0:
                self.save_imgs(epoch, sampled_labels_gen)

I am trying to read an image of 256by256 and use CGAN to generate synthetic images, in given code while passing attribute to g_loss the array has been taking different sizes , i tried passing half_batch of size 15 and batch_size of 30 but nothing worked and i really dont understand how to solve this issue. Can someone kindly help me in understanding this issue and solving it

0

There are 0 answers