I am following the process shown on Wine Quality Prediction End-to-End ML Project on Krish Naik's YouTube channel to do a Flight Fare Prediction Project.
I run this cell of data transformation pipeline on 03_data_transformation.ipynb:
try:
config = ConfigurationManager()
data_transformation_config = config.get_data_transformation_config()
data_transformation = DataTransformation(config=data_transformation_config)
# data_transformation.train_test_spliting()
# New Line
data_transformation.initiate_data_transformation()
except Exception as e:
raise e
I get this error:
TypeError: convert_to_minutes() takes 1 positional argument but 2 were given
Here is the traceback:
TypeError Traceback (most recent call last)
g:\Machine_Learning_Projects\iNeuron internship\Flight-Fare-Prediction-End-to-End-ML-Project\research\03_data_transformation.ipynb Cell 10 line 9
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=6'>7</a> data_transformation.initiate_data_transformation()
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=7'>8</a> except Exception as e:
----> <a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=8'>9</a> raise e
g:\Machine_Learning_Projects\iNeuron internship\Flight-Fare-Prediction-End-to-End-ML-Project\research\03_data_transformation.ipynb Cell 10 line 7
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=3'>4</a> data_transformation = DataTransformation(config=data_transformation_config)
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=4'>5</a> # data_transformation.train_test_spliting()
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=5'>6</a> # New Line
----> <a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=6'>7</a> data_transformation.initiate_data_transformation()
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=7'>8</a> except Exception as e:
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=8'>9</a> raise e
g:\Machine_Learning_Projects\iNeuron internship\Flight-Fare-Prediction-End-to-End-ML-Project\research\03_data_transformation.ipynb Cell 10 line 6
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=55'>56</a> df = pd.concat([df, df_airline, df_source, df_dest], axis = 1)
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=57'>58</a> ## handling duration column
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=58'>59</a> # df['duration'] = df['Duration'].apply(convert_to_minutes)
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=59'>60</a> # New Line Added
---> <a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=60'>61</a> df['duration'] = df['Duration'].apply(self.convert_to_minutes)
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=61'>62</a> upper_time_limit = df.duration.mean() + 1.5 * df.duration.std()
<a href='vscode-notebook-cell:/g%3A/Machine_Learning_Projects/iNeuron%20internship/Flight-Fare-Prediction-End-to-End-ML-Project/research/03_data_transformation.ipynb#X12sZmlsZQ%3D%3D?line=62'>63</a> df['duration'] = df['duration'].clip(upper = upper_time_limit)
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\pandas\core\series.py:4630, in Series.apply(self, func, convert_dtype, args, **kwargs)
4520 def apply(
4521 self,
4522 func: AggFuncType,
(...)
4525 **kwargs,
4526 ) -> DataFrame | Series:
4527 """
4528 Invoke function on values of Series.
4529
(...)
4628 dtype: float64
4629 """
-> 4630 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\pandas\core\apply.py:1025, in SeriesApply.apply(self)
1022 return self.apply_str()
1024 # self.f is Callable
-> 1025 return self.apply_standard()
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\pandas\core\apply.py:1076, in SeriesApply.apply_standard(self)
1074 else:
1075 values = obj.astype(object)._values
-> 1076 mapped = lib.map_infer(
1077 values,
1078 f,
1079 convert=self.convert_dtype,
1080 )
1082 if len(mapped) and isinstance(mapped[0], ABCSeries):
1083 # GH#43986 Need to do list(mapped) in order to get treated as nested
1084 # See also GH#25959 regarding EA support
1085 return obj._constructor_expanddim(list(mapped), index=obj.index)
File c:\Users\2021\.conda\envs\flightfareprediction\lib\site-packages\pandas\_libs\lib.pyx:2834, in pandas._libs.lib.map_infer()
TypeError: convert_to_minutes() takes 1 positional argument but 2 were given
Here is the code of data transformation cell, which contains convert_to_minutes()
function.
class DataTransformation:
# New Function Added
# https://github.com/yash1314/Flight-Price-Prediction/blob/main/src/utils.py
def convert_to_minutes(duration):
try:
hours, minute = 0, 0
for i in duration.split():
if 'h' in i:
hours = int(i[:-1])
elif 'm' in i:
minute = int(i[:-1])
return hours * 60 + minute
except :
return None
def __init__(self, config: DataTransformationConfig):
self.config = config
## Note: You can add different data transformation techniques such as Scaler, PCA and all
#You can perform all kinds of EDA in ML cycle here before passing this data to the model
# I am only adding train_test_spliting cz this data is already cleaned up
# New Code Added Start
def initiate_data_transformation(self):
## reading the data
# df = pd.read_csv(self.config.data_path)
# New Line
df = pd.read_excel(self.config.data_path)
logger.info('Read data completed')
logger.info(f'df dataframe head: \n{df.head().to_string()}')
## dropping null values
df.dropna(inplace = True)
## Date of journey column transformation
df['journey_date'] = pd.to_datetime(df['Date_of_Journey'], format ="%d/%m/%Y").dt.day
df['journey_month'] = pd.to_datetime(df['Date_of_Journey'], format ="%d/%m/%Y").dt.month
## encoding total stops.
df.replace({'Total_Stops': {'non-stop' : 0, '1 stop': 1, '2 stops': 2, '3 stops': 3, '4 stops': 4}}, inplace = True)
## ecoding airline, source, and destination
df_airline = pd.get_dummies(df['Airline'], dtype=int)
df_source = pd.get_dummies(df['Source'], dtype=int)
df_dest = pd.get_dummies(df['Destination'], dtype=int)
## dropping first columns of each categorical variables.
df_airline.drop('Trujet', axis = 1, inplace = True)
df_source.drop('Banglore', axis = 1, inplace = True)
df_dest.drop('Banglore', axis = 1, inplace = True)
df = pd.concat([df, df_airline, df_source, df_dest], axis = 1)
## handling duration column
# df['duration'] = df['Duration'].apply(convert_to_minutes)
# New Line Added
df['duration'] = df['Duration'].apply(self.convert_to_minutes)
upper_time_limit = df.duration.mean() + 1.5 * df.duration.std()
df['duration'] = df['duration'].clip(upper = upper_time_limit)
## encodign duration column
bins = [0, 120, 360, 1440] # custom bin intervals for 'Short,' 'Medium,' and 'Long'
labels = ['Short', 'Medium', 'Long'] # creating labels for encoding
df['duration'] = pd.cut(df['duration'], bins=bins, labels=labels)
df.replace({'duration': {'Short':1, 'Medium':2, 'Long': 3}}, inplace = True)
## dropping the columns
cols_to_drop = cols_to_drop = ['Airline', 'Date_of_Journey', 'Source', 'Destination', 'Route', 'Dep_Time', 'Arrival_Time', 'Duration', 'Additional_Info', 'Delhi', 'Kolkata']
df.drop(cols_to_drop, axis = 1, inplace = True)
logger.info('df data transformation completed')
logger.info(f' transformed df data head: \n{df.head().to_string()}')
# df.to_csv(self.data_transformation_config.transformed_data_file_path, index = False, header= True)
# New Line
df.to_excel(self.data_transformation_config.transformed_data_file_path, index = False, header= True)
logger.info("transformed data is stored")
df.head(1)
## splitting the data into training and target data
X = df.drop('Price', axis = 1)
y = df['Price']
## accessing the feature importance.
select = ExtraTreesRegressor()
select.fit(X, y)
# plt.figure(figsize=(12, 8))
# fig_importances = pd.Series(select.feature_importances_, index=X.columns)
# fig_importances.nlargest(20).plot(kind='barh')
# ## specify the path to the "visuals" folder using os.path.join
# visuals_folder = 'visuals'
# if not os.path.exists(visuals_folder):
# os.makedirs(visuals_folder)
# ## save the plot in the visuals folder
# plt.savefig(os.path.join(visuals_folder, 'feature_importance_plot.png'))
# logger.info('feature imp figure saving is successful')
## further Splitting the data.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle = True)
logger.info('final splitting the data is successful')
## returning splitted data and data_path.
return (
X_train,
X_test,
y_train,
y_test,
self.data_transformation_config.transformed_data_file_path
)
Here is my file in GitHub.
My file encoding is UTF-8
Would you please help me to fix this issue?
You have
convert_to_minutes
located inside the class, but you don't have aself
parameter. It doesn't look like you NEED to be part of the class, but that's how you have it, and as such the quickest fix is just to make it: