How to prevent LS means analysis from producing NAs?

Question

How to prevent LS means analysis from producing NAs?

1.2k views Asked by wardah m At 20 November 2018 at 00:33

I am running an linear model regression analysis script and I am running emmeans (ls means) on my model but I am getting a whole of NA's not sure why... Here is what I have run:

   setwd("C:/Users/wkmus/Desktop/R-Stuff")
    ### yeild-twt
    ASM_Data<-read.csv("ASM_FIELD_18_SUMM_wm.csv",header=TRUE, na.strings=".")
    head(ASM_Data)
    str(ASM_Data)
    ####"NA" values in table are labeled as "." colored orange
    ASM_Data$REP <- as.factor(ASM_Data$REP)
    head(ASM_Data$REP)
    ASM_Data$ENTRY_NO <-as.factor(ASM_Data$ENTRY_NO)
    head(ASM_Data$ENTRY_NO)
    ASM_Data$RANGE<-as.factor(ASM_Data$RANGE)
    head(ASM_Data$RANGE)
    ASM_Data$PLOT_ID<-as.factor(ASM_Data$PLOT_ID)
    head(ASM_Data$PLOT_ID)
    ASM_Data$PLOT<-as.factor(ASM_Data$PLOT)
    head(ASM_Data$PLOT)
    ASM_Data$ROW<-as.factor(ASM_Data$ROW)
    head(ASM_Data$ROW)
    ASM_Data$REP <- as.numeric(as.character(ASM_Data$REP))
    head(ASM_Data$REP)
    ASM_Data$TWT_g.li <- as.numeric(as.character(ASM_Data$TWT_g.li))
    ASM_Data$Yield_kg.ha <- as.numeric(as.character(ASM_Data$Yield_kg.ha))
    ASM_Data$PhysMat_Julian <- as.numeric(as.character(ASM_Data$PhysMat_Julian))
    ASM_Data$flowering <- as.numeric(as.character(ASM_Data$flowering))
    ASM_Data$height <- as.numeric(as.character(ASM_Data$height))
    ASM_Data$CLEAN.WT <- as.numeric(as.character(ASM_Data$CLEAN.WT))
    ASM_Data$GRAV.TEST.WEIGHT <-as.numeric(as.character(ASM_Data$GRAV.TEST.WEIGHT))
    str(ASM_Data)
    
    library(lme4)
    #library(lsmeans)
    library(emmeans)

Here is the data frame:

  > str(ASM_Data)
'data.frame':   270 obs. of  20 variables:
 $ TRIAL_ID         : Factor w/ 1 level "18ASM_OvOv": 1 1 1 1 1 1 1 1 1 1 ...
 $ PLOT_ID          : Factor w/ 270 levels "18ASM_OvOv_002",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ PLOT             : Factor w/ 270 levels "2","3","4","5",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ ROW              : Factor w/ 20 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ RANGE            : Factor w/ 15 levels "1","2","3","4",..: 2 3 4 5 6 7 8 9 10 12 ...
 $ REP              : num  1 1 1 1 1 1 1 1 1 1 ...
 $ MP               : int  1 1 1 1 1 1 1 1 1 1 ...
 $ SUB.PLOT         : Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 2 2 2 2 2 3 ...
 $ ENTRY_NO         : Factor w/ 139 levels "840","850","851",..: 116 82 87 134 77 120 34 62 48 136 ...
 $ height           : num  74 70 73 80 70 73 75 68 65 68 ...
 $ flowering        : num  133 133 134 134 133 131 133 137 134 132 ...
 $ CLEAN.WT         : num  1072 929 952 1149 1014 ...
 $ GRAV.TEST.WEIGHT : num  349 309 332 340 325 ...
 $ TWT_g.li         : num  699 618 663 681 650 684 673 641 585 646 ...
 $ Yield_kg.ha      : num  2073 1797 1841 2222 1961 ...
 $ Chaff.Color      : Factor w/ 3 levels "Bronze","Mixed",..: 1 3 3 1 1 1 1 3 1 3 ...
 $ CHAFF_COLOR_SCALE: int  2 1 1 2 2 2 2 1 2 1 ...
 $ PhysMat          : Factor w/ 3 levels "6/12/2018","6/13/2018",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ PhysMat_Julian   : num  163 163 163 163 163 163 163 163 163 163 ...
 $ PEDIGREE         : Factor w/ 1 level "OVERLEY/OVERLAND": 1 1 1 1 1 1 1 1 1 1 ...

This is the head of ASM Data:

 head(ASM_Data)
    `TRIAL_ID        PLOT_ID PLOT ROW RANGE REP MP SUB.PLOT ENTRY_NO height flowering CLEAN.WT GRAV.TEST.WEIGHT TWT_g.li`
    1 18ASM_OvOv 18ASM_OvOv_002    2   1     2   1  1        A      965     74       133   1071.5           349.37      699
    2 18ASM_OvOv 18ASM_OvOv_003    3   1     3   1  1        A      931     70       133    928.8           309.13      618
    3 18ASM_OvOv 18ASM_OvOv_004    4   1     4   1  1        A      936     73       134    951.8           331.70      663
    4 18ASM_OvOv 18ASM_OvOv_005    5   1     5   1  1        A      983     80       134   1148.6           340.47      681
    5 18ASM_OvOv 18ASM_OvOv_006    6   1     6   1  1        B      926     70       133   1014.0           324.95      650
    6 18ASM_OvOv 18ASM_OvOv_007    7   1     7   1  1        B      969     73       131   1076.6           342.09      684
      Yield_kg.ha Chaff.Color CHAFF_COLOR_SCALE   PhysMat PhysMat_Julian         PEDIGREE
    1        2073      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    2        1797       White                 1 6/12/2018            163 OVERLEY/OVERLAND
    3        1841       White                 1 6/12/2018            163 OVERLEY/OVERLAND
    4        2222      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    5        1961      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND
    6        2082      Bronze                 2 6/12/2018            163 OVERLEY/OVERLAND

I am looking at a linear model dealing with test weight.

This is what I ran:

ASM_Data$TWT_g.li <- as.numeric(as.character((ASM_Data$TWT_g.li))) head(ASM_Data$TWT_g.li)

ASM_YIELD_1 <- lm(TWT_g.li~ENTRY_NO + REP + SUB.BLOCK, data=ASM_Data)
anova(ASM_YIELD_1)
summary(ASM_YIELD_1)
emmeans(ASM_YIELD_1, "ENTRY_NO") ###########ADJ. MEANS

I get an output for anova

anova(ASM_YIELD_1)
Analysis of Variance Table

Response: TWT_g.li
           Df Sum Sq Mean Sq  F value  Pr(>F)    
ENTRY_NO  138 217949    1579   7.0339 < 2e-16 ***
REP         1  66410   66410 295.7683 < 2e-16 ***
SUB.BLOCK   4   1917     479   2.1348 0.08035 .  
Residuals 125  28067     225                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

but for emmeans I get something like this:

ENTRY_NO emmean SE df asymp.LCL asymp.UCL
 840      nonEst NA NA        NA        NA
 850      nonEst NA NA        NA        NA
 851      nonEst NA NA        NA        NA
 852      nonEst NA NA        NA        NA
 853      nonEst NA NA        NA        NA
 854      nonEst NA NA        NA        NA
 855      nonEst NA NA        NA        NA
 857      nonEst NA NA        NA        NA
 858      nonEst NA NA        NA        NA
 859      nonEst NA NA        NA        NA

I do have outliers in my data which is indicated by a "." in my data but that's the only thing I can think of which is off.

When I run with(ASM_Data, table(ENTRY_NO, REP, SUB.BLOCK))

this is what I have:

 with(ASM_Data, table(ENTRY_NO,REP,SUB.BLOCK))
, , SUB.BLOCK = A

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 0 0
     854 0 0
     855 0 0
     857 0 0
     858 0 0
     859 0 0
     860 0 0
     861 0 0
     862 0 0
     863 1 0
     864 0 0
     865 1 0
     866 1 0
     867 0 0
     868 0 0
     869 1 0
     870 1 0
     871 0 0
     872 0 0
     873 0 0
     874 0 0
     875 0 0
     876 0 0
     877 0 0
     878 0 0
     879 1 0
     880 0 0
     881 0 0
     882 0 0
     883 0 0
     884 0 0
     885 1 0
     886 0 0
     887 1 0
     888 1 0
     889 1 0
     890 0 0
     891 1 0
     892 0 0
     893 0 0
     894 0 0
     895 0 0
     896 1 0
     897 0 0
     898 0 0
     899 0 0
     900 1 0
     901 1 0
     902 0 0
     903 0 0
     904 1 0
     905 1 0
     906 0 0
     907 1 0
     908 1 0
     909 0 0
     910 0 0
     911 0 0
     912 0 0
     913 0 0
     914 0 0
     915 0 0
     916 1 0
     917 0 0
     918 0 0
     919 1 0
     920 0 0
     921 0 0
     922 0 0
     923 1 0
     924 0 0
     925 0 0
     926 0 0
     927 1 0
     928 0 0
     929 0 0
     930 0 0
     931 1 0
     932 0 0
     933 0 0
     934 0 0
     935 0 0
     936 1 0
     937 0 0
     938 1 0
     939 1 0
     940 0 0
     941 1 0
     942 0 0
     943 1 0
     944 0 0
     945 0 0
     946 0 0
     947 0 0
     948 1 0
     949 0 0
     950 1 0
     951 0 0
     952 0 0
     953 0 0
     954 0 0
     955 1 0
     956 1 0
     957 1 0
     958 1 0
     959 0 0
     960 0 0
     961 0 0
     962 0 0
     963 0 0
     964 0 0
     965 1 0
     966 0 0
     967 1 0
     968 0 0
     969 0 0
     970 1 0
     971 0 0
     972 0 0
     973 0 0
     974 1 0
     975 0 0
     976 0 0
     977 0 0
     978 1 0
     979 0 0
     980 0 0
     981 0 0
     982 0 0
     983 1 0
     984 1 0
     985 0 0
     986 1 0
     987 3 0
     988 0 0

, , SUB.BLOCK = B

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 1 0
     854 0 0
     855 0 0
     857 0 0
     858 0 0
     859 0 0
     860 0 0
     861 1 0
     862 0 0
     863 0 0
     864 0 0
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 1 0
     872 0 0
     873 0 0
     874 0 0
     875 0 0
     876 1 0
     877 1 0
     878 1 0
     879 0 0
     880 1 0
     881 0 0
     882 1 0
     883 1 0
     884 1 0
     885 0 0
     886 0 0
     887 0 0
     888 0 0
     889 0 0
     890 1 0
     891 0 0
     892 1 0
     893 1 0
     894 1 0
     895 1 0
     896 0 0
     897 1 0
     898 0 0
     899 0 0
     900 0 0
     901 0 0
     902 1 0
     903 0 0
     904 0 0
     905 0 0
     906 0 0
     907 0 0
     908 0 0
     909 1 0
     910 0 0
     911 1 0
     912 0 0
     913 1 0
     914 0 0
     915 0 0
     916 0 0
     917 0 0
     918 0 0
     919 0 0
     920 1 0
     921 1 0
     922 0 0
     923 0 0
     924 0 0
     925 1 0
     926 1 0
     927 0 0
     928 0 0
     929 0 0
     930 1 0
     931 0 0
     932 1 0
     933 0 0
     934 1 0
     935 0 0
     936 0 0
     937 1 0
     938 0 0
     939 0 0
     940 1 0
     941 0 0
     942 0 0
     943 0 0
     944 0 0
     945 1 0
     946 0 0
     947 1 0
     948 0 0
     949 0 0
     950 0 0
     951 1 0
     952 0 0
     953 0 0
     954 1 0
     955 0 0
     956 0 0
     957 0 0
     958 0 0
     959 1 0
     960 0 0
     961 0 0
     962 1 0
     963 0 0
     964 0 0
     965 0 0
     966 0 0
     967 0 0
     968 0 0
     969 1 0
     970 0 0
     971 0 0
     972 0 0
     973 0 0
     974 0 0
     975 0 0
     976 1 0
     977 1 0
     978 0 0
     979 0 0
     980 0 0
     981 1 0
     982 1 0
     983 0 0
     984 0 0
     985 3 0
     986 0 0
     987 1 0
     988 1 0

, , SUB.BLOCK = C

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 0
     853 0 0
     854 0 0
     855 0 0
     857 1 0
     858 0 0
     859 1 0
     860 0 0
     861 0 0
     862 1 0
     863 0 0
     864 0 0
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 0 0
     872 1 0
     873 0 0
     874 0 0
     875 0 0
     876 0 0
     877 0 0
     878 0 0
     879 0 0
     880 0 0
     881 1 0
     882 0 0
     883 0 0
     884 0 0
     885 0 0
     886 1 0
     887 0 0
     888 0 0
     889 0 0
     890 0 0
     891 0 0
     892 0 0
     893 0 0
     894 0 0
     895 0 0
     896 0 0
     897 0 0
     898 1 0
     899 1 0
     900 0 0
     901 0 0
     902 0 0
     903 1 0
     904 0 0
     905 0 0
     906 1 0
     907 0 0
     908 0 0
     909 0 0
     910 1 0
     911 0 0
     912 1 0
     913 0 0
     914 1 0
     915 1 0
     916 0 0
     917 1 0
     918 1 0
     919 0 0
     920 0 0
     921 0 0
     922 1 0
     923 0 0
     924 1 0
     925 0 0
     926 0 0
     927 0 0
     928 1 0
     929 1 0
     930 0 0
     931 0 0
     932 0 0
     933 1 0
     934 0 0
     935 1 0
     936 0 0
     937 0 0
     938 0 0
     939 0 0
     940 0 0
     941 0 0
     942 1 0
     943 0 0
     944 1 0
     945 0 0
     946 1 0
     947 0 0
     948 0 0
     949 1 0
     950 0 0
     951 0 0
     952 1 0
     953 1 0
     954 0 0
     955 0 0
     956 0 0
     957 0 0
     958 0 0
     959 0 0
     960 1 0
     961 1 0
     962 0 0
     963 1 0
     964 1 0
     965 0 0
     966 1 0
     967 0 0
     968 1 0
     969 0 0
     970 0 0
     971 1 0
     972 1 0
     973 1 0
     974 0 0
     975 1 0
     976 0 0
     977 0 0
     978 1 0
     979 2 0
     980 0 0
     981 0 0
     982 0 0
     983 0 0
     984 0 0
     985 1 0
     986 3 0
     987 0 0
     988 0 0

, , SUB.BLOCK = D

        REP
ENTRY_NO 1 2
     840 0 0
     850 0 0
     851 0 0
     852 0 1
     853 0 0
     854 0 0
     855 0 0
     857 0 0
     858 0 1
     859 0 0
     860 0 1
     861 0 0
     862 0 0
     863 0 0
     864 0 1
     865 0 0
     866 0 0
     867 0 0
     868 0 0
     869 0 0
     870 0 0
     871 0 0
     872 0 0
     873 0 0
     874 0 0
     875 0 1
     876 0 0
     877 0 0
     878 0 1
     879 0 0
     880 0 1
     881 0 1
     882 0 1
     883 0 1
     884 0 1
     885 0 0
     886 0 0
     887 0 0
     888 0 0
     889 0 0
     890 0 0
     891 0 0
     892 0 1
     893 0 0
     894 0 0
     895 0 0
     896 0 0
     897 0 1
     898 0 0
     899 0 1
     900 0 0
     901 0 0
     902 0 1
     903 0 0
     904 0 0
     905 0 0
     906 0 0
     907 0 0
     908 0 0
     909 0 0
     910 0 0
     911 0 0
     912 0 0
     913 0 1
     914 0 1
     915 0 1
     916 0 0
     917 0 1
     918 0 1
     919 0 0
     920 0 0
     921 0 1
     922 0 1
     923 0 0
     924 0 0
     925 0 0
     926 0 0
     927 0 0
     928 0 0
     929 0 1
     930 0 1
     931 0 0
     932 0 0

Can someone please give me an idea of what is going wrong??

Thanks !

Original Q&A

There are 4 answers

Russ Lenth On 21 November 2018 at 04:57

EMMs are obtained by averaging predictions over 2 reps and 5 blocks (or maybe more?). Look at

coef(ASM_YIELD_1)

If any of the rep or block effects are NA, then you can’t estimate all of the rep or block effects, and that makes the average of them non-estimable.

You can see exactly which factor combinations are non-estimable by doing:

summary(ref_grid(ASM_YIELD_1))

addendum

Here is a reformatting of the tables I requested in comments:

ENTRY   ---------- BLOCK -------------
 NO      A        B        C        D

 840    0 0      0 0      0 0      0 0
 850    0 0      0 0      0 0      0 0
 851    0 0      0 0      0 0      0 0
 852    0 0      0 0      0 0      0 1
 853    0 0      1 0      0 0      0 0
 854    0 0      0 0      0 0      0 0
 855    0 0      0 0      0 0      0 0
 857    0 0      0 0      1 0      0 0
 858    0 0      0 0      0 0      0 1
 859    0 0      0 0      1 0      0 0
    ... etc ...

This is extremely sparse data. I think there are two more blocks not shown. But I see very few instances where a given ENTRY_NO is observed in more than one rep or block. So I think it is seriously over-fitting to try to account for rep or block effects in this model.

MAYBE omitting REP from the model will make it work. MAYBE re-fitting the model with factor(REP) in place of REP will enable emmeans to detect a nesting structure. Otherwise, there's some really subtle dependence in the blocking structure and treatments, and I don't know what to suggest.

jay.sf On 24 July 2023 at 17:51

emmeans can be picky when variables have been dropped from the model due to rank deficiencies. They are actually not estimated, but still live in the data stored in the fit object= emmeans is using and will in a way poison it.

The remedy is to recalculate the model, while eliminating the bad variables from the model, i.e. from the formula.

Consider the following example for lme4::lmer, where we are interested in the interaction st:d1.

fo <- y ~ st*(d1*d5*d6 + d2 + d3 + d4) + a1 + x + (1 | id) + (1 | trial)

mm1 <- lme4::lmer(fo, dat, REML=FALSE)
# fixed-effect model matrix is rank deficient so dropping 4 columns / coefficients
# boundary (singular) fit: see help('isSingular')

Four variables were dropped, and emmeans throws NAs.

emmeans(mm1, ~ st*d1, nuisance='x')
 # st d1 emmean    SE  df lower.CL upper.CL
 #  0  0   2.17 0.190 122     1.79     2.54
 #  1  0   2.25 0.294 241     1.67     2.83
 #  0  1 nonEst    NA  NA       NA       NA
 #  1  1 nonEst    NA  NA       NA       NA

Now, we just build a new reduced formula based on the names of the actual fitted coefficients, maybe something like the following using fixef and ranef and refit the model.

(fo_new <- reformulate(c(names(lme4::fixef(mm1))[-1], 
                        sprintf('(1 | %s)', names(lme4::ranef(mm1)))), 'y'))
# y ~ st + d1 + d5 + d6 + d2 + d3 + d4 + a1 + x + d1:d6 + d5:d6 + 
#     st:d1 + st:d5 + st:d6 + st:d2 + st:d3 + st:d4 + st:d1:d6 + 
#     st:d5:d6 + (1 | id) + (1 | trial)

mm2 <- lmerTest::lmer(fo_new, dat, REML=FALSE)
# boundary (singular) fit: see help('isSingular')

stopifnot(all.equal(mm1, mm2, check.attributes=FALSE))

emmeans(mm2, ~ st*d1, nuisance='x')
#  st d1 emmean    SE  df lower.CL upper.CL
#   0  0   2.17 0.190 122     1.79     2.54
#   1  0   2.25 0.294 241     1.67     2.83
#   0  1   1.95 0.276 109     1.40     2.49
#   1  1   2.05 0.405 192     1.25     2.85

Now, emmeans is happy! :-)

Data:

dat <- structure(list(id = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 
4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 
9L, 10L, 10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 
14L, 14L, 14L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 18L, 
18L, 18L, 19L, 19L, 19L, 20L, 20L, 20L, 21L, 21L, 21L, 22L, 22L, 
22L, 23L, 23L, 23L, 24L, 24L, 24L, 25L, 25L, 25L, 26L, 26L, 26L, 
27L, 27L, 27L, 28L, 28L, 28L, 29L, 29L, 29L, 30L, 30L, 30L, 31L, 
31L, 31L, 32L, 32L, 32L, 33L, 33L, 33L, 34L, 34L, 34L, 35L, 35L, 
35L, 36L, 36L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L, 
40L, 40L, 40L, 41L, 41L, 41L, 42L, 42L, 42L, 43L, 43L, 43L, 44L, 
44L, 44L, 45L, 45L, 45L, 46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L, 
48L, 49L, 49L, 49L, 50L, 50L, 50L), trial = c(1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 
2L, 3L), y = c(1.01837990914119, 1.72281053171597, 1.16038933631774, 
1.84741932777425, 2.15377758211187, 1.44708378268237, 1.32050688356017, 
1.1999393743433, 0.832090772863633, 2.28263010547248, 2.06830807951508, 
3.50064060945826, 2.42965506902417, 1.03344739854432, 2.05249031229156, 
0.624779485406468, 1.34116637631539, 1.69179421074866, 2.27044714140095, 
2.06580159558889, 2.04603679365979, 0.901719165871983, 2.00970305604673, 
2.93104632786378, 1.24723492562883, 2.2271756343974, 1.89879628580628, 
1.83117598856832, 1.70053079532615, 0.626989628785855, 1.984818811227, 
2.50882095372023, 1.27518728336047, 2.24074403417244, 2.14151176453018, 
3.35995098569363, 1.67432260454106, 1.98175867928662, 1.50605033454511, 
2.09774834852059, 1.55073131078313, 1.50766945430263, 2.5248624762036, 
2.57990655332647, 3.42721524313865, 1.68378025917067, 1.47995712419544, 
1.40645873307514, 1.88003440125559, 1.75934122735018, 1.74337693067683, 
1.59747338520177, 1.87679172340406, 1.44208371784719, 2.40275743710848, 
1.5953665217543, 1.55694381354539, 1.64035264679494, 2.48353008255997, 
2.96289269075876, 2.42962806107927, 2.48371741479921, 2.7316101041616, 
2.63939066505726, 2.53641034022768, 2.68671573577324, 1.14285030411344, 
1.20831594497024, 1.27194419545619, 2.09603774330159, 2.23081630563127, 
2.1347438360067, 1.57698407162643, 3.39478557588238, 2.96186354727387, 
2.58239199435352, 2.44241126873145, 2.40546940767434, 1.8969624697088, 
1.2204109755261, 2.20827256882076, 2.3955852344866, 2.34308978876196, 
2.69400711592567, 2.55368414291743, 2.87307348748506, 2.81160389034124, 
2.2251307233081, 2.64579766646207, 2.59041510869758, 1.20044150410878, 
2.5341592223955, 2.11832088659712, 1.37165442217995, 2.48946142326411, 
1.54165246908483, 1.75713292575393, 1.21135423153261, 2.27906786981473, 
0.581949400859875, 2.55432238775076, 0.710437338297285, 1.33730281943567, 
2.81473012051573, 2.38333136367104, 1.69657968891281, 1.55813806156551, 
2.37063978439338, 1.51354489173799, 1.42676809793259, 2.17830007077276, 
2.41163222093621, 3.19393552098512, 2.2541858233295, 1.49809418369717, 
1.12314590398162, 1.97842147508821, 1.14419489276718, 2.29833044665594, 
2.27649799747862, 2.70221763225851, 2.36443763286132, 1.96428137039684, 
2.13762019810797, 1.99990443254389, 2.11636116477743, 1.83950578665097, 
2.55374771744304, 2.1427802527118, 2.64727022373539, 1.96764061562098, 
2.60351192397739, 2.04165177429036, 1.47050617438957, 1.38831574244936, 
0.650957101476655, 1.16840843162526, 2.58234556181071, 1.67146970510712, 
1.96595469079416, 1.72075555640435, 0.889489197804286, 2.14802605910079, 
1.55781298621079, 0.884775340440683, 1.5837912340036, 2.27268991271172, 
1.90433670807436, 2.97569071967808, 2.67673634576096), st = c(0L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 
1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L), d1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), d2 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 
1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
0L, 0L, 1L, 1L, 1L), d3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), d4 = c(0L, 
0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L), d5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), d6 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L), a1 = c(0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 
0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 
1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 
0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 
1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 
0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 
1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L), x = c(21.693378534168, 
21.693378534168, 21.693378534168, 27.5635733655654, 27.5635733655654, 
27.5635733655654, 21.0235442230478, 21.0235442230478, 21.0235442230478, 
37.3397954637185, 37.3397954637185, 37.3397954637185, 36.9168151924387, 
36.9168151924387, 36.9168151924387, 19.1212821025401, 19.1212821025401, 
19.1212821025401, 32.3830615049228, 32.3830615049228, 32.3830615049228, 
23.3126252391376, 23.3126252391376, 23.3126252391376, 24.4104024292901, 
24.4104024292901, 24.4104024292901, 21.1630470496602, 21.1630470496602, 
21.1630470496602, 26.090356990695, 26.090356990695, 26.090356990695, 
36.2644856846891, 36.2644856846891, 36.2644856846891, 26.431828352157, 
26.431828352157, 26.431828352157, 26.3781595965847, 26.3781595965847, 
26.3781595965847, 39.3826594613492, 39.3826594613492, 39.3826594613492, 
18.0264230361208, 18.0264230361208, 18.0264230361208, 20.3397673121653, 
20.3397673121653, 20.3397673121653, 30.3645347533748, 30.3645347533748, 
30.3645347533748, 18.3756379187107, 18.3756379187107, 18.3756379187107, 
31.2602149215527, 31.2602149215527, 31.2602149215527, 29.8046363908798, 
29.8046363908798, 29.8046363908798, 35.0474634887651, 35.0474634887651, 
35.0474634887651, 20.7696465491317, 20.7696465491317, 20.7696465491317, 
26.4675482031889, 26.4675482031889, 26.4675482031889, 39.5598319787532, 
39.5598319787532, 39.5598319787532, 33.5386927942745, 33.5386927942745, 
33.5386927942745, 26.0694283158518, 26.0694283158518, 26.0694283158518, 
27.0705276266672, 27.0705276266672, 27.0705276266672, 38.1853067930788, 
38.1853067930788, 38.1853067930788, 37.9851826457307, 37.9851826457307, 
37.9851826457307, 33.9107203702442, 33.9107203702442, 33.9107203702442, 
20.2714434023947, 20.2714434023947, 20.2714434023947, 26.8274163762107, 
26.8274163762107, 26.8274163762107, 21.5259600100107, 21.5259600100107, 
21.5259600100107, 34.1031798538752, 34.1031798538752, 34.1031798538752, 
19.291430445388, 19.291430445388, 19.291430445388, 23.096094601322, 
23.096094601322, 23.096094601322, 35.3244720133953, 35.3244720133953, 
35.3244720133953, 21.0157737894915, 21.0157737894915, 21.0157737894915, 
34.0905706076883, 34.0905706076883, 34.0905706076883, 34.4306187503971, 
34.4306187503971, 34.4306187503971, 27.0954017587937, 27.0954017587937, 
27.0954017587937, 36.7278028610162, 36.7278028610162, 36.7278028610162, 
30.4951679217629, 30.4951679217629, 30.4951679217629, 20.2830193424597, 
20.2830193424597, 20.2830193424597, 21.5118714510463, 21.5118714510463, 
21.5118714510463, 29.4289674721658, 29.4289674721658, 29.4289674721658, 
22.2672095233575, 22.2672095233575, 22.2672095233575, 25.2977789915167, 
25.2977789915167, 25.2977789915167, 37.4492049706168, 37.4492049706168, 
37.4492049706168)), class = "data.frame", row.names = c(1L, 2L, 
3L, 4L, 6L, 5L, 8L, 9L, 7L, 11L, 10L, 12L, 14L, 13L, 15L, 16L, 
18L, 17L, 19L, 21L, 20L, 24L, 22L, 23L, 26L, 25L, 27L, 28L, 29L, 
30L, 32L, 33L, 31L, 36L, 35L, 34L, 39L, 38L, 37L, 41L, 42L, 40L, 
43L, 45L, 44L, 47L, 48L, 46L, 51L, 50L, 49L, 53L, 52L, 54L, 55L, 
57L, 56L, 58L, 60L, 59L, 63L, 62L, 61L, 66L, 64L, 65L, 69L, 68L, 
67L, 72L, 71L, 70L, 75L, 74L, 73L, 78L, 77L, 76L, 81L, 80L, 79L, 
84L, 83L, 82L, 86L, 87L, 85L, 90L, 89L, 88L, 92L, 91L, 93L, 95L, 
94L, 96L, 97L, 99L, 98L, 102L, 101L, 100L, 105L, 103L, 104L, 
107L, 106L, 108L, 111L, 109L, 110L, 114L, 113L, 112L, 117L, 116L, 
115L, 120L, 118L, 119L, 121L, 122L, 123L, 124L, 126L, 125L, 128L, 
129L, 127L, 131L, 130L, 132L, 134L, 133L, 135L, 136L, 138L, 137L, 
139L, 141L, 140L, 144L, 142L, 143L, 146L, 145L, 147L, 148L, 149L, 
150L))

Stéphane Laurent On 02 February 2024 at 16:29

There are non-estimable marginal means in your model. What you can do is simply filtering out the corresponding rows of the marginal means table:

library(emmeans)
# create a linear model with some non-estimable parameters (fit2)
fit <- lm(breaks ~ wool*tension, data = warpbreaks)
fit2 <- update(fit, subset = -(37:48))
# emmeans
emm <- emmeans(fit2, ~ wool*tension)
summ <- summary(emm)
# remove the rows corresponding to non-estimable parameters
emm[which(!is.na(summ$emmean))]

Here is an example with some pairwise comparisons:

ps <- pairs(emmeans(fit2, ~ wool*tension))
summps <- summary(ps)
ps[which(!is.na(summps$estimate))]

**Russ Lenth** · Accepted Answer · 2018-11-22T17:41:01+00:00

I've been able to create a situation like this. Consider this dataset:

> junk
   trt rep blk           y
1    A   1   1 -1.17415687
2    B   1   1 -0.20084854
3    C   1   1  0.64797806
4    A   1   2 -1.69371434
5    B   1   2 -0.35835442
6    C   1   2  1.35718782
7    A   1   3  0.20510482
8    B   1   3  1.00857651
9    C   1   3 -0.20553167
10   A   2   4  0.31261523
11   B   2   4  0.47989115
12   C   2   4  1.27574085
13   A   2   5 -0.79209520
14   B   2   5  1.07151315
15   C   2   5 -0.04222769
16   A   2   6 -0.80571767
17   B   2   6  0.80442988
18   C   2   6  1.73526561

This has 6 complete blocks, separately labeled with 3 blocks per rep. Not obvious, but true, is that rep is a numeric variable having values 1 and 2, while blk is a factor having 6 levels 1 -- 6:

> sapply(junk, class)
      trt       rep       blk         y 
 "factor" "numeric"  "factor" "numeric"

With this complete dataset, I have no problem obtaining EMMs for modeling situations parallel to what was used in the original posting. However, if I use only a subset of these data, it is different. Consider:

> samp
[1]  1  2  3  5  8 11 13 15 16

> junk.lm = lm(y ~ trt + rep + blk, data = junk, subset = samp)
> emmeans(junk.lm, "trt")
 trt emmean SE df asymp.LCL asymp.UCL
 A   nonEst NA NA        NA        NA
 B   nonEst NA NA        NA        NA
 C   nonEst NA NA        NA        NA

Results are averaged over the levels of: blk 
Confidence level used: 0.95

Again, recall that rep is numeric in this model. If instead, I make rep a factor:

> junk.lmf = lm(y ~ trt + factor(rep) + blk, data = junk, subset = samp)
> emmeans(junk.lmf, "trt")
NOTE: A nesting structure was detected in the fitted model:
    blk %in% rep
If this is incorrect, re-run or update with `nesting` specified
 trt     emmean        SE df  lower.CL upper.CL
 A   -0.6262635 0.4707099  1 -6.607200 5.354673
 B    0.0789780 0.3546191  1 -4.426885 4.584841
 C    0.6597377 0.5191092  1 -5.936170 7.255646

Results are averaged over the levels of: blk, rep 
Confidence level used: 0.95

We get non-NA estimates, in part because it is able to detect the fact that blk is nested in rep, and thus performs the EMM computations separately in each rep. Note in the annotations in this last output that averaging is done over the 2 reps and 6 blocks; whereas in fiber.lm averaging is done only over blocks, while rep, a numeric variable, is set at its average. Compare the reference grids for the two models:

> ref_grid(junk.lm)
'emmGrid' object with variables:
    trt = A, B, C
    rep = 1.4444
    blk = 1, 2, 3, 4, 5, 6

> ref_grid(junk.lmf)
'emmGrid' object with variables:
    trt = A, B, C
    rep = 1, 2
    blk = 1, 2, 3, 4, 5, 6
Nesting structure:  blk %in% rep

An additional option is to avoid the nesting issue by simply omitting rep from the model:

> junk.lm.norep = lm(y ~ trt + blk, data = junk, subset = samp)
> emmeans(junk.lm.norep, "trt")
 trt     emmean        SE df  lower.CL upper.CL
 A   -0.6262635 0.4707099  1 -6.607200 5.354673
 B    0.0789780 0.3546191  1 -4.426885 4.584841
 C    0.6597377 0.5191092  1 -5.936170 7.255646

Results are averaged over the levels of: blk 
Confidence level used: 0.95

Note that exactly the same results are produced. The reason is the levels of blk already predict the levels of rep, so there is no need for it to be in the model.

In summary:

The situation is due in part to the fact that there are missing data
and in part because rep was in the model as a numeric predictor rather than a factor.
In your situation, I suggest re-fitting the model with factor(REP) instead of REPas a numeric predictor. This may be enough to produce estimates.
If, indeed, as in my example, the SUB.BLOCK levels predict the REP levels, just leave REP out of the model altogether.

TechQA.

How to prevent LS means analysis from producing NAs?

There are 4 answers

addendum

Related Questions in R

Related Questions in LINEAR-REGRESSION

Related Questions in LME4

Related Questions in EMMEANS

Related Questions in LSMEANS

Popular Questions

Popular Tags

Trending Questions