I am able to build LSTM to predict stock prices on historical data using time steps but when I am trying to integrate this with Reinforcement Learning model like DQNAgent for using feedbacks from users on whether the prediction was good or bad and then use it to retrain the model, I am getting multiple errors with respect to the input shape and dimensions. Since the expected input shape for LSTM fit(considering timesteps =100) is different than the input shape for DQNAgent(which works on 1 observation at a time. I am using both models for the first time and hence need your help in resolving the issue. Below is the error-
ValueError: Error when checking input: expected lstm_input to have 3 dimensions, but got array with shape (1, 6)
Code:
Define function to create LSTM model
def create_lstm_model(input_shape):
lstm_input = Input(shape=input_shape, name='lstm_input')
lstm_layer = LSTM(50, return_sequences=True)(lstm_input)
lstm_layer = Dropout(0.2)(lstm_layer)
lstm_layer = LSTM(50, return_sequences=True)(lstm_layer)
lstm_layer = Dropout(0.2)(lstm_layer)
lstm_layer = LSTM(50)(lstm_layer)
output = Dense(num_actions, activation='linear')(lstm_layer)
model = Model(inputs=lstm_input, outputs=output)
model.compile(loss='mean_squared_error', optimizer='adam')
return model
# Define function to preprocess data and create dataset for LSTM
def preprocess_data(dataset, time_step=1):
features = ['Open', 'Close'] # Specify the features to be used
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset[features])
dataX, dataY = [], []
for i in range(len(scaled_data) - time_step):
a = scaled_data[i:(i + time_step), :]
dataX.append(a)
dataY.append(scaled_data[i + time_step, :])
return np.array(dataX), np.array(dataY)
# Load and preprocess historical stock data
#idf = pd.read_csv('your_data.csv') # Replace 'your_data.csv' with your dataset file
x, y = preprocess_data(idf, time_step=100)
historical_data = idf
# Split data into training and testing sets
split_ratio = 0.9
split_index = int(len(x) * split_ratio)
x_train, x_test = x[:split_index], x[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
# Train the LSTM model
input_shape_lstm = (100, 2) # Adjust input shape based on the number of features
num_actions = 2 # Example: 0 - No action, 1 - Retrain LSTM
lstm_model = create_lstm_model(input_shape_lstm)
lstm_model.fit(x_train, y_train, epochs=10, batch_size=32, verbose=1)
# Define and initialize the environment
class StockPredictionEnvironment:
def __init__(self, lstm_model, historical_data):
self.lstm_model = lstm_model
self.historical_data = historical_data
self.current_step = 0
def reset(self):
self.current_step = 0
return self.get_state()
def step(self, action):
next_state = self.get_state()
reward = self.calculate_reward()
done = self.is_done()
return next_state, reward, done, {}
def get_state(self):
if self.current_step < len(self.historical_data):
state = self.historical_data.iloc[self.current_step]
self.current_step += 1
return state.values.reshape(-1) # Reshape state for compatibility with LSTM
else:
return None
def calculate_reward(self):
try:
predicted_price = self.lstm_model.predict(np.array([self.historical_data.iloc[self.current_step]]))[0][0]
actual_price = self.historical_data.iloc[self.current_step + 1][0]
reward = actual_price - predicted_price
return reward
except KeyError as e:
print(f"KeyError: {e}")
return 0
def is_done(self):
return self.current_step >= len(self.historical_data)
env = StockPredictionEnvironment(lstm_model, idf)
# Define and initialize the DQN agent
input_shape_dqn = (1, 6) # Adjust input shape based on the number of features
model = create_lstm_model(input_shape_dqn) # Use the same LSTM model architecture
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=num_actions, nb_steps_warmup=10)
dqn.compile(optimizer='adam', metrics=['mae'])
# Train the DQN agent
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)
# Evaluate the DQN agent
scores = dqn.test(env, nb_episodes=10, visualize=False)
print('Average score:', np.mean(scores.history['episode_reward']))