I have 2 dataframes. The RP and the RP2. My goal is to fill the NAs of RP columns Country_1 and Country_2 based on the conditions below:
1.If a cell of RP has already value leave it as it is.
2.If it has NA then see the TreatmenArea and Subarea of that cell and find the same pair of TreatmenArea, Subarea in RP2 then:
a.multiply this value with the PFaverage column of RP2 and divide by 100.
b.if the relative cell in RP2 is NA as well then leave the value in RP as NA
RP<-structure(list(ProductFamily = c("PF_1", "PF_10", "PF_10", "PF_100",
"PF_100", "PF_101", "PF_102", "PF_102", "PF_102", "PF_102", "PF_103",
"PF_103", "PF_104", "PF_105", "PF_106", "PF_106", "PF_106", "PF_106",
"PF_107", "PF_108", "PF_109", "PF_11", "PF_110", "PF_110", "PF_111",
"PF_111", "PF_111", "PF_112", "PF_112", "PF_113", "PF_113", "PF_113",
"PF_114", "PF_115", "PF_115", "PF_116", "PF_117", "PF_118", "PF_119",
"PF_12", "PF_12", "PF_12", "PF_120", "PF_120", "PF_120", "PF_120",
"PF_120", "PF_120", "PF_121", "PF_122"), TreatmenArea = c("TA_7",
"TA_2", "TA_2", "TA_6", "TA_6", "TA_2", "TA_2", "TA_2", "TA_2",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_7", "TA_2", "TA_2", "TA_7",
"TA_7", "TA_7", "TA_7", "TA_7", "TA_2", "TA_2", "TA_2", "TA_6",
"TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_5", "TA_5",
"TA_5", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_2", "TA_4",
"TA_4"), Subarea = c("SA_16", "SA_5", "SA_5", "SA_15", "SA_15",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6", "SA_6",
"SA_17", "SA_6", "SA_6", "SA_22", "SA_22", "SA_22", "SA_22",
"SA_22", "SA_6", "SA_6", "SA_6", "SA_15", "SA_6", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_11", "SA_11", "SA_11", "SA_6", "SA_6",
"SA_6", "SA_6", "SA_6", "SA_6", "SA_10", "SA_10"), Country_1 = c(NA,
NA, NA, NA, NA, 92.6961421759861, NA, NA, NA, NA, 78.3001808318264,NA, NA, NA, 106.832963501416, 0.613496932515337, 104.21011973735,
NA, NA, NA, NA, NA, 99.5238622522423, NA, NA, NA, NA, NA, NA,
89.0343347639485, NA, NA, NA, NA, NA, NA, 101.231684009344, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Country_2 = c(NA,
NA, 161.55950752394, NA, NA, NA, NA, NA, 59.1346153846154, NA,
NA, NA, NA, 128.113063407181, 93.3812839543959, NA, NA, NA, 137.724550898204,
NA, NA, NA, NA, 90.1602849510241, 37.3939722071828, NA, NA, 40.9756097560976,
NA, NA, NA, 87.0095902353967, NA, NA, NA, 50.4591590140164, 92.1639413888299,
44.7601588756493, NA, NA, NA, NA, NA, 100.053835800808, NA, NA,
NA, NA, 136.420722135008, NA)), row.names = c(NA, 50L), class = "data.frame")
and the RP2
RP2<-structure(list(TreatmenArea = c("TA_1", "TA_1", "TA_1", "TA_1",
"TA_2", "TA_2", "TA_2", "TA_3", "TA_4", "TA_4", "TA_5", "TA_5",
"TA_5", "TA_6", "TA_6", "TA_7", "TA_7", "TA_7", "TA_7", "TA_7",
"TA_7", "TA_7", "TA_7", "TA_8", "TA_9"), Subarea = c("SA_1",
"SA_2", "SA_3", "SA_4", "SA_5", "SA_6", "SA_7", "SA_8", "SA_10",
"SA_9", "SA_11", "SA_12", "SA_13", "SA_14", "SA_15", "SA_16",
"SA_17", "SA_18", "SA_19", "SA_20", "SA_21", "SA_22", "SA_23",
"SA_24", "SA_25"), Country_1 = c(101.37519256645, 105.268942332558,
100.49933368058, 104.531597221684, NaN, 83.4404308144341, 86.2833044714836,
81.808967345926, 79.6786979951661, 77.6863475527052, NaN, 78.3001808318264,
112.499238782021, 113.526674294436, NaN, 108.350959378962, NaN,
NaN, 102.243471199266, NaN, 104.323270355678, NaN, NaN, NaN,
100), Country_2 = c(98.7267717862572, 83.9572019653478, 97.164068306148,
103.654771613923, 161.55950752394, 75.4091957339533, 96.5255996196344,
99.8317785594128, 88.1477193135348, NaN, NaN, NaN, 151.411687458963,
107.652477161141, NaN, NaN, NaN, NaN, 92.4695215620261, NaN,
88.9890571623243, 39.1847909816402, 87.0642912470953, NaN, NaN
), PFaverage = c(9.09293100169062, 16.2821052631579, 9.76688333333333,
13.4754047619048, 17.8741666666667, 9.42546567085954, 12.6145188492064,
12.4536666666667, 7.36169471153846, 13.1581818181818, 21.0866666666667,
9.58, 16.2525049019608, 11.552822039072, 12.6908333333333, 20.5489611111111,
11.99, 7.19134920634921, 8.51728472222222, 5.97, 12.2264183501684,6.37925, 16.1375, 11.9384615384615, 15.9185714285714)), class = c("data.frame"), row.names = c(NA, -25L))
If for example we check first row of Country_1 in RP we see that its NA and its Subarea is SA_16 so we move to RP2 and check that the relative value for Country_1 and Subarea SA_16 is 108.35. So the calculation is 108.35*20.54(PFaverage)/100.
Created on 2022-04-06 by the reprex package (v2.0.0)