I am having an issue with the raster::predict function related generating probability predictions of Presence with a cforest object.
When I generate a classification output, I obtain a correct output - however, when adding type="prob" to obtain probabilities, the output is a strange banding that doesn't correspond to the classification output. I have attached two photos here: Correct output based on classification predict and Banded, incorrect output based on type="prob" predict that details these issues.
As you will notice, the checkerboard pattern on the output places random areas of high and low probability randomly, whereas the classification output creates blocks of presence (1) and absence (0) where they would be expected.
Below is a snippit of code, with training data and a segment of a raster brick used for prediction.
I would greatly appreciate if anyone has insight into how I can obtain valid probabilities using this function.
Am I missing something important in the predfun function?
Thank you in advance!
Here is the example data:
library(raster)
training = structure(list(
Presence = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
Elevation = c(3937.63541666689, 384.003472222217, 401.357638888879, 226.43749999999, 21.3055555555572, 305.399305555546, 38.3402777777742, 347.302083333335, 168.156250000001, 700.708333333328, 1034.2013888889, 1033.78125, 1426.99305555577, 912.874999999952, 665.854166666672, 657.187499999983, 1181.97916666667, 696.062499999948, 976.812500000002, 1017.98263888889),
Region = structure(c(3L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 4L, 4L, 3L, 3L, 1L, 4L, 2L, 4L, 4L), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
Protected = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("0","1"), class = "factor"),
Population = structure(c(17L, 3L, 4L, 1L, 13L, 2L, 5L, 2L, 9L, 13L, 6L, 5L, 5L, 7L, 8L, 14L, 1L, 13L, 7L, 1L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"), class = "factor"),
Mean_Diunal_Range = c(126,151, 152, 153, 138, 125, 137, 158, 129, 137, 170, 172, 115,151, 150, 178, 149, 146, 158, 165),
Isothermality = c(31,49, 46, 49, 54, 51, 59, 48, 47, 62, 54, 54, 61, 37, 47, 63,61, 75, 55, 57),
Mean_Temp_Wettest_Q = c(-87, 338, 348, 135,236, 193, 305, 322, 247, 253, 249, 247, 210, 105, 313, 252,238, 267, 250, 255),
Mean_Temp_Driest_Q = c(64, 231, 253,230, 324, 194, 263, 261, 328, 236, 133, 130, 158, 315, 188,232, 173, 248, 166, 151),
Precip_Wettest_M = c(119, 30, 18,3, 16, 8, 22, 12, 11, 165, 62, 59, 71, 13, 11, 201, 85, 110,121,91),
Precip_Dryest_m = c(2, 0, 0, 0, 0, 0, 0, 0, 0,0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0),
Precip_Seasonality = c(81,133, 136, 65, 106, 65, 126, 85, 101, 110, 77, 76, 111, 85,111, 125, 116, 123, 106, 93),
Precip_Warmest_Q = c(19, 55,29, 0, 0, 6, 49, 23, 1, 152, 148, 146, 56, 0, 15, 143, 63,140, 174, 195),
Precip_Coldest_Q = c(201, 0, 1, 8, 14, 14,5, 6, 17, 0, 9, 10, 0, 29, 0, 0, 2, 0, 1, 2),
Agriculture_Date = c(9000,3000, 3000, 7200, 7600, 3500, 3500, 3500, 7600, 5000, 1000,1700, 1250, 9500, 4000, 2700, 1250, 4000, 1000, 1000),
distance_to_highways = c(0.697535828668298,3.21145962385929, 4.70553674201054, 1.26025341029057, 0.0306302671744576,1.29795751420045, 0.341335382947427, 2.01515485126633, 1.90935593837289,1.00778111672525, 0.0841242925270876, 0.0135315745860043,2.39038097221274, 1.49284290327056, 1.21019159581485, 5.71817373942967,1.64045219527117, 0.121375728043842, 0.535675418474612, 1.08581690073317)),
row.names = c(NA, -20L), class = c("data.table", "data.frame"))
##Create Prediction Raster Brick
## Create Data Frame for Raster Brick
data_segment <- structure(list(
Elevation = c(1187.9, 1173.5, 1158.2, 1143.3, 1125.6, 1112.4, 1232.7, 1203.3, 1176.7, 1156.8, 1138.9, 1140.8, 1249.9, 1216.9, 1193.2, 1171.4, 1153.5, 1157.6, 1261.5, 1233.2, 1208.2, 1185.6, 1175.5, 1184.2, 1289.1, 1256.4, 1240.7, 1212.2, 1208.1, 1220.9, 1304.6, 1297.2, 1286.6, 1249.2, 1231, 1256.1, 1341.5, 1329.3, 1328.5, 1291.8, 1250.2, 1288.6, 1379.2, 1322.8, 1305.6, 1293.7, 1234.9, 1260.5, 1354.5, 1274.5, 1257.7, 1281.1, 1230.8, 1196.3, 1288.5, 1237.4, 1221.5, 1241.6, 1212.7, 1162.3, 1199.4, 1185.4, 1194.5, 1216.1, 1207.4, 1149.8, 1150.6, 1157, 1176, 1176.1, 1155.6, 1138, 1145, 1131.9, 1121.1, 1116.3, 1109.4, 1116.2),
Region = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
Protected = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
Population = c(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15, 11, 11, 12, 12, 12, 15, 11, 11, 12, 12, 12, 15, 11, 11, 12, 12, 12, 16, 11, 11, 12, 12, 12, 16, 11, 11, 12, 12, 12, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12),
Mean_Diunal_Range = c(10.5, 10.5, 10.6, 10.7, 10.8, 10.9, 10.4, 10.5, 10.5, 10.5, 10.6, 10.6, 10.3, 10.4, 10.5, 10.5, 10.5, 10.4, 10.2, 10.3, 10.3, 10.4, 10.3, 10.4, 10.2, 10.2, 10.2, 10.2, 10.3, 10.3, 10.3, 10.2, 10.1, 10.2, 10.3, 10.3, 10.3, 10.2, 10.2, 10.2, 10.2, 10.1, 10.2, 10.3, 10.2, 10.1, 10.1, 10.1, 10.1, 10.2, 10.2, 10, 10, 10.1, 10.1, 10.1, 10, 9.9, 9.9, 10, 10, 10, 9.9, 9.9, 9.9, 9.9, 10, 9.9, 9.8, 9.9, 9.8, 9.8, 10.1, 10.1, 10, 9.8, 9.8, 9.7),
Isothermality = c(68.5, 68.7, 69.3, 69.4, 69.6, 69.7, 68.4, 68.5, 68.8, 68.7, 69.2, 69.2, 67.8, 68.1, 68.4, 68.6, 68.6, 68.3, 67.4, 67.4, 67.6, 67.9, 68, 68.1, 67.2, 66.8, 66.9, 67.3, 67.5, 67.3, 67.2, 66.4, 66.6, 66.9, 66.7, 67, 66.7, 66.7, 66.5, 66.4, 66.5, 66.2, 65.7, 66.3, 66, 65.7, 65.5, 65.7, 65.4, 65.1, 65.1, 65.2, 65.2, 65.3, 64.2, 64.5, 64.5, 64.1, 64.6, 64.7, 63.8, 64, 64.1, 63.7, 63.8, 64.2, 63.4, 63.7, 63.4, 63.6, 63.5, 63.7, 63.8, 64.1, 63.5, 63, 63.2, 63.1),
Mean_Temp_Wettest_Q = c(24.5, 24.5, 24.7, 24.8, 25, 25, 24.2, 24.4, 24.5, 24.7, 24.8, 24.8, 24.1, 24.3, 24.5, 24.6, 24.7, 24.8, 24, 24.2, 24.4, 24.5, 24.6, 24.7, 24, 24.1, 24.2, 24.4, 24.5, 24.5, 23.9, 24, 24, 24.3, 24.5, 24.4, 23.8, 23.8, 23.9, 24, 24.3, 24.1, 23.5, 23.9, 24, 24, 24.3, 24.3, 23.7, 24.1, 24.2, 24.1, 24.4, 24.6, 24, 24.4, 24.4, 24.3, 24.5, 24.8, 24.5, 24.6, 24.5, 24.5, 24.5, 24.9, 24.8, 24.8, 24.6, 24.7, 24.8, 24.9, 24.8, 25, 25, 25, 25, 25),
Mean_Temp_Driest_Q = c(22.4, 22.4, 22.5, 22.7, 22.8, 22.9, 22.1, 22.3, 22.4, 22.5, 22.7, 22.7, 22, 22.2, 22.4, 22.5, 22.6, 22.6, 21.9, 22.1, 22.3, 22.4, 22.5, 22.5, 21.8, 22, 22, 22.2, 22.3, 22.3, 21.7, 21.7, 21.8, 22.1, 22.2, 22.1, 21.5, 21.5, 21.6, 21.8, 22, 21.8, 21.2, 21.6, 21.7, 21.7, 22, 21.9, 21.3, 21.8, 21.9, 21.8, 22, 22.2, 21.6, 21.9, 22, 21.9, 22, 22.3, 22, 22.1, 22, 22, 22, 22.4, 22.2, 22.2, 22.1, 22.2, 22.2, 22.4, 22.3, 22.8, 22.4, 22.5, 22.5, 22.5),
Precip_Wettest_M = c(161, 160, 161, 161, 159, 158, 163, 162, 163, 163, 162, 161, 166, 165, 164, 164, 163, 163, 169, 167, 166, 166, 166, 166, 172, 169, 169, 168, 169, 169, 173, 172, 173, 171, 170, 170, 174, 176, 176, 174, 171, 173, 179, 175, 175, 175, 171, 173, 180, 176, 174, 176, 172, 171, 181, 177, 176, 177, 174, 170, 177, 176, 176, 177, 175, 170, 175, 175, 177, 178, 175, 174, 176, 175, 175, 175, 173, 174),
Precip_Dryest_m = c(6, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 7, 6, 7, 6, 6, 6, 7, 7, 7, 6, 6, 6, 8, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7),
Precip_Seasonality = c(100.8, 101.7, 103, 105.2, 105.9, 107.2, 99.8, 101.8, 103.4, 105.5, 106.3, 106.8, 101.1, 101.8, 103.4, 105.1, 106.2, 106.8, 100.8, 102, 103.1, 105.3, 106.6, 107.6, 101.4, 102.5, 103.8, 105.3, 106.9, 107.2, 100.8, 102.2, 102.7, 104.9, 106.3, 106.6, 100.1, 101.3, 102.6, 104.1, 105.7, 106.3, 98.8, 101.3, 101.9, 103.6, 104.8, 105.8, 99.2, 102, 102.6, 103.5, 104.5, 106.4, 100.1, 102.4, 103.8, 104.2, 105, 106.8, 100.7, 103, 103.6, 104.1, 104.7, 105.8, 101.2, 102.2, 103.9, 104.6, 105.6, 106.7, 101.3, 102.5, 103.4, 104.2, 105.5, 106.7),
Precip_Warmest_Q = c(97, 96, 94, 89, 86, 84, 100, 97, 94, 90, 87, 86, 100, 99, 95, 91, 88, 87, 102, 99, 96, 92, 89, 87, 102, 98, 96, 92, 91, 90, 105, 100, 98, 95, 92, 92, 106, 104, 101, 97, 93, 93, 109, 101, 101, 97, 92, 93, 108, 100, 97, 97, 93, 89, 105, 99, 96, 96, 93, 89, 101, 98, 96, 95, 94, 89, 98, 97, 96, 94, 91, 89, 99, 96, 94, 93, 90, 88),
Precip_Coldest_Q = c(23, 22, 21, 21, 21, 19, 25, 22, 22, 21, 21, 20, 24, 23, 21, 21, 21, 20, 25, 23, 22, 21, 21, 21, 25, 23, 22, 21, 21, 21, 25, 24, 25, 22, 21, 21, 27, 27, 25, 23, 22, 22, 30, 27, 26, 25, 24, 23, 30, 27, 26, 26, 24, 23, 30, 27, 26, 26, 25, 21, 28, 26, 26, 26, 25, 24, 28, 26, 26, 26, 25, 24, 28, 27, 26, 26, 24, 24),
Agriculture_Date = c(4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000),
distance_to_highways = c(0.1, 0, 0, 0.1, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0.2, 0.1, 0, 0, 0.1, 0.1, 0.1, 0.1, 0, 0, 0.1, 0.1, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0, 0, 0, 0, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0.1, 0, 0, 0.1, 0.1, 0.1, 0.1, 0, 0, 0, 0.1, 0.1, 0.2, 0, 0, 0, 0.1, 0.1, 0.2, 0, 0, 0, 0.1, 0.1, 0.2)),
class = "data.frame", row.names = c(NA, -78L))
## Create RasterBrick and assign values from dataframe
segment_brick <- brick(nrow=13, ncol=6, xmn=39, xmx=39.25, ymn=3.833333, ymx=4.375,crs="+proj=longlat +datum=WGS84 +no_defs", nl=15)
values(segment_brick) <- as.matrix(data_segment)
Now the modeling:
library(party)
##Fit Cforest Model
cforest_example = party::cforest(Presence ~ ., data = training,
control = cforest_unbiased(ntree=10,mtry=3,trace=TRUE))
##Create factor object for raster predict
f = list(levels(training$Protected),levels(training$Region),levels(training$Population))
##Rename factor lists to match original data
names(f) <- c("Protected", "Region", "Population")
##create a wrapper function for Cforest predict in raster package
predfun <- function(m, d, ...) predict(m, newdata=d, ...)
##Predict Raster with no Probabilities - results in correct prediction
Attempt_Processed = raster::predict(segment_brick, cforest_example, OOB=TRUE, factors=f, fun=predfun,progress="text",na.rm=TRUE)
plot(Attempt_Processed)
##Predict Raster with Probabilities - results in odd banded image
Attempt_Processed_prob = raster::predict(segment_brick, cforest_example, OOB=TRUE, factors=f, fun=predfun,progress="text",na.rm=TRUE,type="prob")
plot(Attempt_Processed_prob)
The way to go about this is to have a look what the model's predict function returns
It returns a list, which gets
unlist
ed byraster::predict
and that explains the banding pattern
You can fix this with a prediction function like this
Which you can now use with the RasterBrick
As you only have 2 probabilities, you could simplify the function to return only the probability for one class (absence in this case):