The LTSM does not predict Apple Stock Close column well
I am using LSTM (Long Short Term Memory) to predict the Apple Stock Closing prices using the 3 previous days. My problem is that the model underestimate closing prices. The photo of the final result is given at the end of this passage.
First of all, I have a very highly left skewed data set of Apple stocks as can be seen from the photo below. I used a LSTM model to predict the closing prices using 3 previous days and a batch size of 10.
rec_obj - recipe(Close ~ .,aapl) %%
step_sqrt(Close) %%
step_center(Close) %%
step_scale(Close) %%
prep()
aapl_normalized - bake(rec_obj, aapl) #10372 observations.
#keep centers for denormalization later
center_history - rec_obj$steps[[2]]$means[Close]
center_history
scale_history - rec_obj$steps[[3]]$sds[Close]
scale_history
c(center = center_history, scale = scale_history)
Train validation and test datasets
aapl_trn - aapl_normalized[1:8500,]
aapl_val - aapl_normalized[8501:9401,] #900
aapl_test - aapl_normalized[9402:10372 ,] #970
Reshaping the Data
n_inputs - 3 #number of inputs in the RNN e.g. 1st it. use first 10 days to predict the 11th
n_predictions - 1
batch_size - 10 #number of batches that you give. large the model is faster -- parmeter
Functions
build_windowed_matrix - function(data, timesteps) { #tranforms data into the windows of 4+1) if you have 14K rows this produce a matric of 14K x 5
t(sapply(1:(length(data) - timesteps + 1), function(x)
data[x:(x + timesteps - 1)]))
}
reshape_3D - function(df){ #to do it 14kx5x1 since this is required by keras. If it was multivariate (n) it should be 14kx5xn!!!
dim(df) - c(dim(df)[1], dim(df)[2], 1)
df
}
get_x - function(mtx, n_inputs, batch_size){#for each row gets the the x's (4 in number)
mtx - mtx[, 1:n_inputs]
mtx - mtx[1:(nrow(mtx) %/% batch_size * batch_size), ]
reshape_3D(mtx)
}
get_y - function(mtx, n_inputs, n_predictions, batch_size) {#for each row gets the the y (5th element) + put them in 3D
mtx - mtx[, (n_inputs+1):(n_inputs+n_predictions), drop=FALSE]
mtx - mtx[1:(nrow(mtx) %/% batch_size * batch_size), , drop=FALSE]
if(n_predictions==1){
dim(mtx) - c(length(mtx)[1], 1)
}
reshape_3D(mtx)
}
Extract 'Close' Values
Extract close values and disregard dates
trn - aapl_trn %% select(Close) %% pull() #into vector
val - aapl_val %% select(Close) %% pull()
test - aapl_test %% select(Close) %% pull()
Build matrices
actually using the functions that I defined aboved
trn_mtx - build_windowed_matrix(trn, n_inputs + n_predictions)
val_mtx - build_windowed_matrix(val, n_inputs + n_predictions)
test_mtx - build_windowed_matrix(test, n_inputs + n_predictions)
X_train - get_x(trn_mtx, n_inputs, batch_size) #X_train_close
Y_train - get_y(trn_mtx, n_inputs, n_predictions, batch_size)
X_val - get_x(val_mtx, n_inputs, batch_size)
Y_val - get_y(val_mtx, n_inputs, n_predictions, batch_size)
X_test - get_x(test_mtx, n_inputs, batch_size)
Y_test - get_y(test_mtx, n_inputs, n_predictions, batch_size) #Y_test is the actual closing value in the test set.
head(X_test)
head(Y_test)
I am using the LSTM model below:
1.) Build first model (use only close)
model - keras_model_sequential()
model %%
layer_lstm( #lstm with 32 units in each cell
units = 32,
batch_input_shape = c(batch_size, n_inputs, 1) #1 feature is included
)
layer_lstm( #lstm with 32 units in each cell
units = 16,
batch_input_shape = c(batch_size, n_inputs, 1)
)
model %%
layer_dense(units = 1)
model %%
compile(
loss = 'mean_squared_error',
optimizer = 'sgd',
metrics = list(mean_squared_error)
) '''
```{r}
callbacks - list(#stop criterion depends on if the network is not learning any more...stop the model from training after 5 epochs if there is no learning
callback_early_stopping(patience = 5)
)
history - model %% fit(
x = X_train,
y = Y_train,
validation_data = list(X_val, Y_val),
batch_size = batch_size,
epochs = 100,
callbacks = callbacks
)
Predictions using one feature
pred_test - model %%
predict(X_test, batch_size = batch_size)
# de-normalize to original scale
pred_test - (pred_test * scale_history + center_history) ^2 #denormalization
mse_test - (pred_test - Y_test[,,1]) ^2 #Y_test is the actual closing value in the test set.
Plot predictions vs actual
ggplot(aapl[(9402 + n_inputs):(9401 + n_inputs + dim(pred_test)[1]),], aes(x = Date, y = Close, group = 1)) + geom_line() +
scale_x_discrete(breaks = levels(aapl$Date)[floor(seq(1, nlevels(aapl$Date),length.out = 5))]) +
geom_line(aes(y = pred_test), color = blue) +
labs(x = Date, y = Close Value, title = Apple Stock)
As you see from the graph, my model does seem to underestimate the values. How can I fix this? I tried hyperparameter tuning but it didn't work. Is it because my data was super left skewed? How do I go about this?
Best regards
Topic lstm prediction neural-network
Category Data Science