I am following Google's tensorflow example, but with a different dataset, and I am getting the above error message.

Here is Google's code link: https://colab.research.google.com/notebooks/mlcc/first_steps_with_tensor_flow.ipynb?utm_source=mlcc&utm_campaign=colab-external&utm_medium=referral&utm_content=firststeps-colab&hl=en

and here is the original dataset before adding an additional column: http://www.exploredata.net/Downloads/Baseball-Data-Set

I am running this all on an anaconda3 powershell prompt.

I have gone through to make sure I didn't skip any lines and added print statements to locate the issue. It seems it's with the def my_input_fn. code:

import numpy as np
import scipy 
from sklearn import datasets, metrics
import csv
import pandas as pd
import math
from matplotlib import cm, gridspec, pyplot as plt
from IPython import display
import tensorflow as tf
from tensorflow.python.data import Dataset

pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.3f}'.format

MLB2008_df = pd.read_csv(r'C:\Users\Alex\Downloads\MLB2008.csv', sep = ',', engine = 'python', header = None)

MLB2008_df.columns = MLB2008_df.iloc[0]
mlb = MLB2008_df.drop([0])
mlb['OBP'] = mlb['OBP'].astype(float)
mlb['SLG'] = mlb['SLG'].astype(float)
mlb['OPS'] = mlb['OBP'] + mlb['SLG']
mlb['SALARY'] = mlb['SALARY'].astype(float)
mlb = mlb.reindex(np.random.permutation(mlb.index))
mlb['SALARY'] /= 1000.0


#define feature
my_feature = mlb['OPS']
feature_columns = [tf.feature_column.numeric_column('OPS')]

#define label
targets = mlb['SALARY']

#gradient descent
myoptimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.0000001)
myoptimizer = tf.contrib.estimator.clip_gradients_by_norm(myoptimizer, 5.0)

#Configure model
linear_regressor = tf.estimator.LinearRegressor(feature_columns = feature_columns, optimizer = myoptimizer)

print ('here1')

def my_input_fn(features, targets, batch_size = 1, shuffle = True, num_epochs = None):
    '''Trains a linear regression model of one feature.

        features: pandas dataframe of features
        targets: pandas dataframe of targets
        batch_size: size of batches to be passed to the model
        shuffle: True or False, whether to shuffle the data
        num_epochs: number of epochs for which data should be repeated. None = repeat indefinitely
        Tuple of (features, labels) for next data batch

    #convert pandas data into a dict of np arrays
    features = {key:np.array(value) for key, value in dict(features).items()}

    #construct a dataset, and configure batching/repeating
    ds = Dataset.from_tensor_slices((features,targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    #shuffle data if specified   
    if shuffle:
        ds = ds.shuffle(buffer_size = 10000)

    #return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

_ = linear_regressor.train(
    input_fn = lambda:my_input_fn(my_feature, targets),
    steps = 100

ValueError: Unbatching a tensor is only supported for rank >= 1 This is the error and any help resolving would be appreciated!

0 Answers