I start to developpe a web application using django, this application predict the sale, for that I used linear regression, I have some variables string, in order to train the model I convert all the string variables to int using method handle_non_numerical_data(). the user of application enter the field of string as string

method

def handle_non_numerical_data(df):
    columns = df.columns.values

    for column in columns:
        text_digit_vals = {}

        def convert_to_int(val):
            return text_digit_vals[val]

        if df[column].dtype != np.int64 and df[column].dtype != np.float64:
            column_contents = df[column].values.tolist()
            unique_elements = set(column_contents)
            x = 0
            for unique in unique_elements:
                if unique not in text_digit_vals:
                    text_digit_vals[unique] = x
                    x = x + 1

            df[column] = list(map(convert_to_int, df[column]))

    return df

my model

    # Libraries
    import numpy as np
    import pandas as pd
    import pickle
    from matplotlib import pyplot as plt
    from sklearn import metrics
    from sklearn import model_selection
    #from sklearn import preprocessing
    from sklearn.cluster import KMeans
    from sklearn.linear_model import LinearRegression
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import train_test_split
    #from sklearn.linear_model import Ridge
    from sklearn.externals import joblib



    # Importing Dataset
    data = pd.read_csv('ml_code/ml_process/test.csv')
    data.fillna(0, inplace=True)


    def handle_non_numerical_data(df):
        columns = df.columns.values

        for column in columns:
            text_digit_vals = {}

            def convert_to_int(val):
                return text_digit_vals[val]

            if df[column].dtype != np.int64 and df[column].dtype != np.float64:
                column_contents = df[column].values.tolist()
                unique_elements = set(column_contents)
                x = 0
                for unique in unique_elements:
                    if unique not in text_digit_vals:
                        text_digit_vals[unique] = x
                        x = x + 1

                df[column] = list(map(convert_to_int, df[column]))

        return df


    data = handle_non_numerical_data(data)
    data = data.as_matrix()

    #X matrice des var. explicatives
    X = data[:,0:9]
    #y vecteur de la var. à prédire
    y = data[:,9]

    X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.3, random_state=0)


    lreg = LinearRegression()

    lreg.fit(X2_train, y2_train)
    print('Accuracy of linear regression on training set: {:.2f}'.format(lreg.score(X2_train, y2_train)))
    print('Accuracy of linear regression on test set: {:.2f}'.format(lreg.score(X2_test, y2_test)))


    # Saving the Logistic Regression Model
    linear_regression_model = pickle.dumps(lreg)

    # Saving the model to a file
    #with open('ml_code/linear_regression_model.pkl','wb') as f:
    joblib.dump(linear_regression_model, 'ml_code/linear_regression_model.pkl')

for predict

import pickle

from sklearn.externals import joblib

linear_regression_model = joblib.load('ml_code/linear_regression_model.pkl')


lreg = pickle.loads(linear_regression_model)


def get_prediction(magasin, numero_article, designation_article, moyen_de_ventes_par_jour, vente_2013,
                   vente_2014, ventes_2015, ventes_2016, ventes_2017
                   ):

    predicted_sales = lreg.predict([[magasin, numero_article, designation_article, moyen_de_ventes_par_jour,
                                     vente_2013, vente_2014, ventes_2015, ventes_2016, ventes_2017
                                     ]])
    return predicted_sales

but I get this error

    ValueError at /
        could not convert string to float: 'tea'
        Request Method: POST
        Request URL:    http://127.0.0.1:8000/
        Django Version: 2.2
        Exception Type: ValueError
        Exception Value:    
        could not convert string to float: 'tea'
        Exception Location: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\lib\site-packages\sklearn\utils\validation.py in check_array, line 448
        Python Executable:  C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\Scripts\python.exe
        Python Version: 3.6.5
        Python Path:    
        ['C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\appweb pred',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts\\python36.zip',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\DLLs',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts',
         'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\Lib',
         'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\DLLs',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f',
         'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib\\site-packages']
        Server time:    Sat, 27 Apr 2019 03:32:44 +0000

because I convert all the variables of my model,

0 Answers