I have collected a set of observations, and my goal is to perform Ordinary Kriging using the gstat
package to interpolate these observations onto a regular grid. I'm encountering difficulties in selecting the variogram parameter, and the Kriging variance values I'm obtaining are significantly larger than expected. While I anticipate errors in the 2 cm to 4 cm range at the grid nodes, the Kriging results show errors between 4.2 to 4.5 meters. I'm seeking assistance to identify and rectify any potential issues in my approach. I'm open to guidance and suggestions, whether it's related to the code or the underlying concepts. Below is my complete code for reference.
The figure of my actual experimental and fitted variogram is as shown below:
# Load the sf library for spatial data handling
library(sf)
library(gstat)
library(ggplot2)
library(rnaturalearth)
# Set the seed for reproducibility
set.seed(123)
world2 <- ne_coastline(scale = "medium", returnclass = "sf")
countries <- ne_countries(scale = "medium", returnclass = "sf")
# Create a dummy data frame
dummy_data <- data.frame(
lat = runif(1000, min = 21, max = 27), # Random latitude values between 21 and 27
lon = runif(1000, min = 23, max = 27), # Random longitude values between 23 and 27
height_m = runif(1000, min = 14, max = 28), # Random height values between 14 and 28 meters
SD_heights = runif(1000, min = 0.02, max = 0.04) # Random SD of the heights at observed locations meters
)
# Define the extent of the grid
lat_min <- 21
lat_max <- 27
long_min <- 23
long_max <- 27
# Create a sequence of latitude and longitude values
lat_values <- seq(lat_min, lat_max, by = 0.5)
long_values <- seq(long_min, long_max, by = 0.5)
# Create the grid points
grid_point <- expand.grid(lat = lat_values, lon = long_values)
# plot
ggplot() +
geom_sf(data = world2, size = 1, colour = "black") +
geom_sf(data= countries, color= "#E5E5E5", fill = "#E5E5E5") +
coord_sf(xlim = c(23, 27), ylim = c(21, 27)) +
geom_point(data= grid_point, aes(x = lon, y = lat), color ="red")+
geom_point(data= dummy_data, aes(x = lon, y = lat))
# Ordinary Kriging
# Compute the Empirical Variogram:
# Define the original CRS (Assuming WGS 84)
original_crs <- st_crs("+proj=longlat +datum=WGS84")
# Create a data frame with the original latitude and longitude data
data_df <- dummy_data
# Convert the data frame to a spatial data frame (sf object) with the original CRS
data_sf <- st_as_sf(data_df, coords = c("lon", "lat"), crs = original_crs)
# Define the target projection (e.g., UTM Zone 33N)
target_crs <- st_crs("+proj=utm +zone=33 +datum=WGS84 +units=m")
# Project the data to the target CRS
data_sf_projected <- st_transform(data_sf, target_crs)
coor_m <- st_coordinates(data_sf_projected) %>% as.data.frame()
# Detrend the data using linear regression
# Create a data frame with the heights and coordinates
data_combined <- data.frame(
h = data_df$height_m,
lon = coor_m$X,
lat = coor_m$Y
)
# Perform linear regression
lm_model <- lm(h ~ lon + lat, data = data_combined)
# Extract the coefficients a, b, and c
a <- coef(lm_model)[1]
b <- coef(lm_model)[2]
c <- coef(lm_model)[3]
detrended_h <- resid(lm_model)
# Add the detrended variable to the spatial data frame
data_sf_projected$detrended_h <- detrended_h
# Grid points for interpolation in the target CRS
# Create a data frame for the grid points in the target CRS
interp_sf <- st_as_sf(grid_point, coords = c("lon", "lat"), crs = original_crs)
# Reproject interp_sf to the target CRS
interp_sf_reprojected <- st_transform(interp_sf, crs = target_crs)
# Set a smaller cutoff value for the empirical variogram (adjust as needed)
cutoff_value <- 300000
search_radius <- 20000
# Compute the empirical variogram with the new cutoff
variogram_model_detrended <- variogram(detrended_h ~ 1, data = data_sf_projected, width = search_radius, cutoff = cutoff_value)
plot(variogram_model_detrended)
# Fit a variogram model to the empirical variogram
variogram_fit_detrended <- fit.variogram(variogram_model_detrended, model = vgm(psill = 17.3, model = "Exp", range = 250000, nugget = 16.2))
# Plot the empirical variogram
plot(variogram_model_detrended, plot.type = "cloud", main = "Empirical and Fitted Variograms")
# Create a data frame of the empirical variogram
empirical_variogram_df <- data.frame(dist = variogram_model_detrended$dist,
gamma = variogram_model_detrended$gamma)
preds = variogramLine(variogram_fit_detrended, maxdist = max(variogram_model_detrended$dist))
head(preds)
# Create the ggplot plot
ggplot() +
geom_point(data = empirical_variogram_df, aes(x = dist/1e3, y = gamma, color = "Empirical model"), alpha = 0.6) +
geom_line(data = preds, aes(x = dist/1e3, y = gamma, color ="Analytical model"))+
labs(title = "Empirical and Fitted Variograms",
x = "Distance [km]",
y = "Semivariance") +
theme_bw()+
scale_color_manual(name= "", breaks=c('Empirical model', 'Analytical model'),
values=c('Empirical model'='red', 'Analytical model'='blue'))+
guides(color = guide_legend(override.aes = list(shape = c(16, NA),
linetype = c(NA, 1)))) +
theme(#plot.title = element_text(color = "black", face="bold", size = 24),
plot.title = element_blank(),
plot.subtitle = element_blank(),
axis.title = element_text(color = "black", size = 11,
margin = margin(t = 0, r = 20, b = 0, l = 0)),
axis.text = element_text(color="black", size = 10),
# panel.background = element_blank(),
# panel.grid = element_blank(),
legend.title = element_text(color = "black",face="bold", size = 11),
legend.title.align = 0.5 ,
legend.text = element_text(color = "black", size = 10),
legend.key.height = unit(0.7, 'cm'),
#legend.key.width = unit(2.5, 'cm'),
legend.position = "bottom",
aspect.ratio = 1/2)
# Perform kriging
kriged_detrended <- krige(detrended_h ~ 1, locations = data_sf_projected, newdata = interp_sf_reprojected, vgm(psill = 17.3, model = "Exp", range = 250000, nugget = 16.2))
coor_kriged_detrended <- as.data.frame(st_coordinates(kriged_detrended))
kriged_detrended_df <- data.frame(
h = kriged_detrended$var1.pred,
h_var = kriged_detrended$var1.var,
lon = coor_kriged_detrended$X,
lat = coor_kriged_detrended$Y)
kriged_predicted_h <- kriged_detrended_df$h + (coef(lm_model)[1] + coef(lm_model)[2]* kriged_detrended_df$lon+ coef(lm_model)[3] * kriged_detrended_df$lat)
head(kriged_predicted_h)
summary(sqrt(kriged_detrended_df$h_var))