Getting "TypeError: 'numpy.ndarray' object is not callable" from shuffle call for Fashion-MNIST

263 views Asked by At

I have been trying to implement Mini-Batch optimisation on a CNN with SGD. But in trying the randomised sample selection using shuffle() I am getting the error in the Title. The code is as below. Could there be a header problem or there is something about the mismatch in data types that could lead to such an error? I have tried multiple solutions on this site but they do not seem to work. Or I'm overlooking something

import numpy as np
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
%matplotlib inline
from keras.datasets import fashion_mnist
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import tensorflow as tf 
import keras
from keras.models import Sequential,Input,Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from keras.layers.advanced_activations import LeakyReLU
import random
from random import shuffle

(x_train,y_train), (x_test, y_test) = fashion_mnist.load_data()

n_feature = 2
n_class = 2
n_iter = 10


def make_network(n_hidden=100):
    # Initialize weights with Standard Normal random variables
    model = dict(
        W1=np.random.randn(n_feature, n_hidden),
        W2=np.random.randn(n_hidden, n_class)
    )

    return model


def softmax(x):
    return np.exp(x) / np.exp(x).sum()


def forward(x, model):
    # Input to hidden
    h = x @ model['W1']
    # ReLU non-linearity
    h[h < 0] = 0

    # Hidden to output
    prob = softmax(h @ model['W2'])

    return h, prob

def backward(model, xs, hs, errs):
    """xs, hs, errs contain all informations (input, hidden state, error) of all data in the minibatch"""
    # errs is the gradients of output layer for the minibatch
    dW2 = hs.T @ errs

    # Get gradient of hidden layer
    dh = errs @ model['W2'].T
    dh[hs <= 0] = 0

    dW1 = xs.T @ dh

    return dict(W1=dW1, W2=dW2)

def sgd(model, X_train, y_train, minibatch_size):
    for iter in range(n_iter):
        print('Iteration {}'.format(iter))

        # Randomize data point
        X_train, y_train = shuffle(X_train, y_train)

        for i in range(0, X_train.shape[0], minibatch_size):
            # Get pair of (X, y) of the current minibatch/chunk
            X_train_mini = X_train[i:i + minibatch_size]
            y_train_mini = y_train[i:i + minibatch_size]

            model = sgd_step(model, X_train_mini, y_train_mini)

    return model

def sgd_step(model, X_train, y_train):
    grad = get_minibatch_grad(model, X_train, y_train)
    model = model.copy()

    # Update every parameters in our networks (W1 and W2) using their gradients
    for layer in grad:
        # Learning rate: 1e-4
        model[layer] += 1e-4 * grad[layer]

    return model

def get_minibatch_grad(model, X_train, y_train):
    xs, hs, errs = [], [], []

    for x, cls_idx in zip(X_train, y_train):
        h, y_pred = forward(x, model)

        # Create probability distribution of true label
        y_true = np.zeros(n_class)
        y_true[int(cls_idx)] = 1.

        # Compute the gradient of output layer
        err = y_true - y_pred

        # Accumulate the informations of minibatch
        # x: input
        # h: hidden state
        # err: gradient of output layer
        xs.append(x)
        hs.append(h)
        errs.append(err)

    # Backprop using the informations we get from the current minibatch
    return backward(model, np.array(xs), np.array(hs), np.array(errs))

minibatch_size = 50
n_experiment = 100

# Create placeholder to accumulate prediction accuracy
accs = np.zeros(n_experiment)

for k in range(n_experiment):
    # Reset model
    model = make_network()

    # Train the model
    model = sgd(model, X_train, y_train, minibatch_size)

    y_pred = np.zeros_like(y_test)

    for i, x in enumerate(X_test):
        # Predict the distribution of label
        _, prob = forward(x, model)
        # Get label by picking the most probable one
        y = np.argmax(prob)
        y_pred[i] = y

    # Compare the predictions with the true labels and take the percentage
    accs[k] = (y_pred == y_test).sum() / y_test.size

print('Mean accuracy: {}, std: {}'.format(accs.mean(), accs.std()))
1

There are 1 answers

0
w_sz On

random.shuffle() takes the first argument (an array) and shuffles it according to the second argument (a function) as you can see in the documentation.

So what happens in your example, is that shuffle() is trying to call a second argument (y_train) to shuffle the first argument - which is not possible.

If you want to shuffle inputs into the model, I would suggest doing that in a dataloader or using np.random.shuffle() (so concatenating the arrays, shuffling and splitting them again to preserve order).