I'm following Temporal-Fusion-Transformer (TFT) tutorial in the PytorchForecasting (https://pytorch-forecasting.readthedocs.io/en/stable/tutorials/stallion.html#Demand-forecasting-with-the-Temporal-Fusion-Transformer) to train TFT model with custom dataset to predict "booking" value based on several static/time-varying features for each Sold TO Party in each Region and Sub-Region.

When converting the dataframe into a PyTorch Forecasting TimeSeriesDataSet, I encountered the error: "TypeError: '<' not supported between instances of 'int' and 'str'." Does anyone know what's potential issue for this error?

The code is as follows:

max_prediction_length = 2
max_encoder_length = 8
training_cutoff = ctmdata["time_idx"].max() - max_prediction_length
target_value = "Booking"
key_idx = ["Region","Sub-Region","Sold TO Party Code"]

training = TimeSeriesDataSet(
    ctmdata[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target=target_value,
    group_ids=key_idx,
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=["Region","Sub-Region","Sold TO Party Code","Customer Type","Customer Segment L1","Customer Segment L2"],
    static_reals=[],
    time_varying_known_categoricals=["Supply Chain Customer","Quarter"],
    variable_groups={},  # group of categorical variables can be treated as one variable
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_categoricals=["DW Customer"], 
    time_varying_unknown_reals=[
        "Booking","yeojohnson_Booking","avg_booking_bySubRegion","avg_booking_byCTMSegL1",
        "Billing","yeojohnson_Billing","avg_billing_bySubRegion","avg_billing_byCTMSegL1",
        "MGP","MGP%","BB Ratio"
    ],
    target_normalizer=GroupNormalizer(
        groups=key_idx, transformation="softplus"
    )  # use softplus and normalize by group
)

The error message shows:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-119-9cc2c7eb9aa5> in <module>()
     38     ],
     39     target_normalizer=GroupNormalizer(
---> 40         groups=key_idx, transformation="softplus"
     41     )  # use softplus and normalize by group
     42 )

4 frames
/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/timeseries.py in __init__(self, data, time_idx, target, group_ids, weight, max_encoder_length, min_encoder_length, min_prediction_idx, min_prediction_length, max_prediction_length, static_categoricals, static_reals, time_varying_known_categoricals, time_varying_known_reals, time_varying_unknown_categoricals, time_varying_unknown_reals, variable_groups, constant_fill_strategy, allow_missing_timesteps, lags, add_relative_time_idx, add_target_scales, add_encoder_length, target_normalizer, categorical_encoders, scalers, randomize_length, predict_mode)
    432 
    433         # preprocess data
--> 434         data = self._preprocess_data(data)
    435         for target in self.target_names:
    436             assert target not in self.scalers, "Target normalizer is separate and not in scalers."

/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/timeseries.py in _preprocess_data(self, data)
    651             # use existing encoder - but a copy of it not too loose current encodings
    652             encoder = deepcopy(self.categorical_encoders.get(group_name, NaNLabelEncoder()))
--> 653             self.categorical_encoders[group_name] = encoder.fit(data[name].to_numpy().reshape(-1), overwrite=False)
    654             data[group_name] = self.transform_values(name, data[name], inverse=False, group_id=True)
    655 

/usr/local/lib/python3.7/dist-packages/pytorch_forecasting/data/encoders.py in fit(self, y, overwrite)
     88 
     89         idx += offset
---> 90         for val in np.unique(y):
     91             if val not in self.classes_:
     92                 self.classes_[val] = idx

<__array_function__ internals> in unique(*args, **kwargs)

/usr/local/lib/python3.7/dist-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
    270     ar = np.asanyarray(ar)
    271     if axis is None:
--> 272         ret = _unique1d(ar, return_index, return_inverse, return_counts)
    273         return _unpack_tuple(ret)
    274 

/usr/local/lib/python3.7/dist-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
    331         aux = ar[perm]
    332     else:
--> 333         ar.sort()
    334         aux = ar
    335     mask = np.empty(aux.shape, dtype=np.bool_)

TypeError: '<' not supported between instances of 'int' and 'str'

Thanks for help!

1

There are 1 answers

0
Ronny F. On

You can check the values in the static_categoricals=["Region","Sub-Region","Sold TO Party Code","Customer Type","Customer Segment L1","Customer Segment L2"], maybe you are passing a value that have a str and this is trying to be compare with a int