I'm following Temporal-Fusion-Transformer (TFT) tutorial in the PytorchForecasting (https://pytorch-forecasting.readthedocs.io/en/stable/tutorials/stallion.html#Demand-forecasting-with-the-Temporal-Fusion-Transformer) to train TFT model with custom dataset to predict "booking" value based on several static/time-varying features for each Sold TO Party in each Region and Sub-Region.
When converting the dataframe into a PyTorch Forecasting TimeSeriesDataSet, I encountered the error: "TypeError: '<' not supported between instances of 'int' and 'str'." Does anyone know what's potential issue for this error?
The code is as follows:
max_prediction_length = 2
max_encoder_length = 8
training_cutoff = ctmdata["time_idx"].max() - max_prediction_length
target_value = "Booking"
key_idx = ["Region","Sub-Region","Sold TO Party Code"]
training = TimeSeriesDataSet(
ctmdata[lambda x: x.time_idx <= training_cutoff],
time_idx="time_idx",
target=target_value,
group_ids=key_idx,
min_encoder_length=max_encoder_length // 2, # keep encoder length long (as it is in the validation set)
max_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
static_categoricals=["Region","Sub-Region","Sold TO Party Code","Customer Type","Customer Segment L1","Customer Segment L2"],
static_reals=[],
time_varying_known_categoricals=["Supply Chain Customer","Quarter"],
variable_groups={}, # group of categorical variables can be treated as one variable
time_varying_known_reals=["time_idx"],
time_varying_unknown_categoricals=["DW Customer"],
time_varying_unknown_reals=[
"Booking","yeojohnson_Booking","avg_booking_bySubRegion","avg_booking_byCTMSegL1",
"Billing","yeojohnson_Billing","avg_billing_bySubRegion","avg_billing_byCTMSegL1",
"MGP","MGP%","BB Ratio"
],
target_normalizer=GroupNormalizer(
groups=key_idx, transformation="softplus"
) # use softplus and normalize by group
)
The error message shows:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-119-9cc2c7eb9aa5> in <module>()
38 ],
39 target_normalizer=GroupNormalizer(
---> 40 groups=key_idx, transformation="softplus"
41 ) # use softplus and normalize by group
42 )
4 frames
/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/timeseries.py in __init__(self, data, time_idx, target, group_ids, weight, max_encoder_length, min_encoder_length, min_prediction_idx, min_prediction_length, max_prediction_length, static_categoricals, static_reals, time_varying_known_categoricals, time_varying_known_reals, time_varying_unknown_categoricals, time_varying_unknown_reals, variable_groups, constant_fill_strategy, allow_missing_timesteps, lags, add_relative_time_idx, add_target_scales, add_encoder_length, target_normalizer, categorical_encoders, scalers, randomize_length, predict_mode)
432
433 # preprocess data
--> 434 data = self._preprocess_data(data)
435 for target in self.target_names:
436 assert target not in self.scalers, "Target normalizer is separate and not in scalers."
/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/timeseries.py in _preprocess_data(self, data)
651 # use existing encoder - but a copy of it not too loose current encodings
652 encoder = deepcopy(self.categorical_encoders.get(group_name, NaNLabelEncoder()))
--> 653 self.categorical_encoders[group_name] = encoder.fit(data[name].to_numpy().reshape(-1), overwrite=False)
654 data[group_name] = self.transform_values(name, data[name], inverse=False, group_id=True)
655
/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/encoders.py in fit(self, y, overwrite)
88
89 idx += offset
---> 90 for val in np.unique(y):
91 if val not in self.classes_:
92 self.classes_[val] = idx
<__array_function__ internals> in unique(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
270 ar = np.asanyarray(ar)
271 if axis is None:
--> 272 ret = _unique1d(ar, return_index, return_inverse, return_counts)
273 return _unpack_tuple(ret)
274
/usr/local/lib/python3.7/dist-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
331 aux = ar[perm]
332 else:
--> 333 ar.sort()
334 aux = ar
335 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'int' and 'str'
Thanks for help!
You can check the values in the static_categoricals=["Region","Sub-Region","Sold TO Party Code","Customer Type","Customer Segment L1","Customer Segment L2"], maybe you are passing a value that have a str and this is trying to be compare with a int