-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRobust_statistic.R
995 lines (808 loc) · 44 KB
/
Robust_statistic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
library(pdfCluster)
library(cluster)
library(fpc)
library(prabclus)
library(smacof)
library(tidyverse)
library(cluster)
library(dbscan)
library(factoextra) # for silhouette plots
library(mclust)
library(robustHD) # MAD std
# install.packages("C:/Users/loren/Desktop/EMMIXskew_1.0.3.tar.gz",repos=NULL,type="source")
library(EMMIXskew)
library(poLCA)
library(nomclust)
library(fda) # Functional data analysis
library(funFEM)
library(robustbase)
library(mixtools)
library(tclust)
set.seed(123456)
# INTRODUCTION
par(mfrow=c(1,1))
setwd("C:/Users/loren/Desktop/Henning_Exam/Data")
dortmund <- read.table("Dortmund_msbd.dat",header=TRUE,row.names=1)
mean(dortmund$birthdeath) # -0.2316692 Sample mean estimator Tn = 1/n * sum(i=1 to n){xi}
sum(dortmund$birthdeath>-0.2317) # 165 obs are over the Sample mean!
boxplot(dortmund$birthdeath,main="dortmund$birthdeath")
boxplot(dortmund$birthdeath,main="dortmund$birthdeath",ylim=c(-0.9,0.135))
abline(mean(dortmund$birthdeath),0,col=2)
which.min(dortmund$birthdeath) # obs 133 is an outlier
mean(dortmund$birthdeath[-133]) # -0.03481514 Sample mean estimator obtained after
# removing the outlier
# KEY PROBLEM : outlier can drive arbitrally far away our estimation
# SOLUTION : robust methods can accommodate outliers without giving them too much
# influence.
# ROBUSTNESS THEORY AND ROBUST ESTIMATORS
# IMPORTANT
# BASIC IDEA behind the ROBUST THEORY:
# The basic idea underlying the robust theory is to assume that some of the data
# is distributed conditionally normal while the remaining fraction comes from some
# arbitrary distribution that are the outliers.
# The goal of a robust method is to estimate the parameters of this conditional
# normal distribution without giving to the outliers too much influence.
# NB -> The BP determine how much can be bigger the fraction of outliers that an
# estimator can handle!
# M - ESTIMATORS (Robust estimators) (DEEPER EXPLANATION AT THE END)
# HUBER M - ESTIMATOR: M - estimator for the location (i.e replace the sample mean or the
# median) but require a scale s (i.e if we have a N(0, sigma^2) --> sigma^2 is a scale s
# parameter)
# It behaves like the sample mean if u <= gamma (i.e when we do not have much outlier
# influence) thus it exploit the efficiency of the sample mean estimator
# (where u = y - t i.e the diff btw real values and estimates, and gamma is a sort of
# outlier influence treshold --> if is higher we have strong outlier influence),
# while when u > gamma it behaves like the median and exploit is bounded influence
# function and high BP i.e., robstness.
# NB both sample mean and median are location estimators :
# mean --> best EFFICIENCY low ROBUSTNESS (BP = 0),
# median --> high ROBUSTNESS, low EFFICIENCY.
# Rec
mean(dortmund$birthdeath) # -0.2316692
median(dortmund$birthdeath) # -0.01918741
sd(dortmund$birthdeath) # -0.01918741
# MAD : robust scale estimator in order to obtain s for apply the HUBER M - ESTIMATOR
mad(dortmund$birthdeath, constant = 1.4826) # 0.04708085
# constant = 1.4826 (default) --> is a factor used to enforce the convergence:
# MAD(Y1, ..., Yn) ->p to sigma2 i.e the convergence of our estimator to the true
# values
# HUBER M - ESTIMATOR
HM <- huberM(dortmund$birthdeath,k=1.5)
# k = 1.5 (default) --> tuning parameter(optimizzator) ca.(approximately):
# 95% efficiency with Normal models by preserving the robustness
HM$mu # -0.02259647
HM$s # 0.04708085 # Uses the MAD by default and gives it out.
# ( = mad(dortmund$birthdeath, constant = 1.4826) )
HM$it # 8 # Number of iterations of algorithm
HM$SE # NA # If se=TRUE, huberM estimates its own standard error, can be used for tests
# and confidence intervals
boxplot(dortmund$birthdeath,main="dortmund$birthdeath")
boxplot(dortmund$birthdeath,main="dortmund$birthdeath",ylim=c(-0.9,0.135))
abline(mean(dortmund$birthdeath),0,col= 1) # Sample mean (black)
abline(HM$mu,0,col= 2) # HUBER M - ESTIMATOR (red)
abline(median(dortmund$birthdeath),0,col= 3) # it behaves like the median (green)
# ROBUST MULTIVARIATE LOCATION AND COVARIANCE ESTIMATOR MCD
pairs(dortmund) # Multivariate(p) --> before by consider only $birthdeath
# univariate(p = 1)
# Minimum covariance determinant estimator MCD
# Situation : we need to estimate the location and the covariance matrix of
# Y1, ..., Yn iid with Y1 ~ Np(mu, capitalSigma) (Capital sigma since we are in the
# multivariate case) that are potentially violated by outliers.
# Solution : MCD i.e a robust multivariate estimator for location(mu) and
# covariance matrix(capitalSigma)
# 1) Given n/2 < h < n we need to find the h (> n/2) observations Y* whose classical
# covariance matrix has the lowest possible determinant.
# 2) Then the RAW Tmcd location estimator will be :
# Tmcd(Y1, ..., Yn) = sum(for i=1 to h){Y*i} (Sample mean estimator on the h Y*)
# 2.1) While the RAW Smcd covariance matrix estimator will be:
# Smcd(Y1, ..., Yn) = c * Cov(Y*1, ..., Y*h) i.e.,
# c * Sample covariace estimation (classical covariance matrix on the h Y* where
# c > 0 (consistency factor) will be chosen for consistency under the conditionally
# normal ~ Np(mu, capitalSigma) distribution depending on the h(n of chosen obs)
# and p(number of variables).
# IMPORTANT
# RECALL THE BASIC IDEA BEHIND ROBUST TECHNIQUES: The basic idea that underlying
# the robust methods is that some of the data are assumed to be distributed conditionally
# normal(in this case the h) and the remaining fraction(in this case the n-h) comes
# from some arbitrary distribution (i.e., the outliers).
cd <- cov(dortmund) # Sample covariace estimation (classical covariance matrix)
mcdd <- covMcd(dortmund) # MCD
help("covMcd")
# The MCD method looks for the h (> n/2) (h = h(alpha,n,p) = h.alpha.n(alpha,n,p))
# observations (out of n) whose classical covariance matrix has the lowest possible
# determinant.
# The raw MCD estimate of location is then the average of these h points, whereas
# the raw MCD estimate of scatter is their covariance matrix, multiplied by a consistency
# factor (.MCDcons(p, h/n)) to make it consistent at the normal model
# (~ Np(mu, capitalSigma))and unbiased at small samples.
# h is specified through parameter alpha with default 0.5,
# h is chosen so that h/n = alpha approx alpha with automatic rounding.
# BP is about equal to 1 - alpha (for n to infinity).
# IMPORTANT
# NB by setting an higher alpha = 0.75 (where h is chosen so that h/n = alpha)
# implies to have an higher h i.e we are a little bit more tolerate against outliers,
# since we are selecting an higher number of observations to consider as "good" one.
# NB h/n is a proportion thus by setting higher its result(alpha) implies to have
# an higher h.
mcdd75 <- covMcd(dortmund,alpha=0.75)
# MCD(alpha=0.75) is some kind of compromise between the classical mean/cov-matrix
# and the MCD with alpha = 0.5, but results are not always in between.
colMeans(dortmund) # Sample means for each variables
mcdd$center # Tmcd with alpha = 0.5 for each variables
mcdd75$center # Tmcd with alpha = 0.75 for each variables
# Estmated covariance matrix is in component cov:
str(mcdd$cov)
# The covMcd output also has components raw.cov and raw.center.
# These give the MCD how it was defined above (slides), whereas cov and center
# give somewhat improved estimation versions of covariance matrix (capitalsigma)
# and location(mu) of a Y~ Np(mu, capitalSigma) with better efficiency but same
# robustness.
# These are really not very different here:
str(mcdd$raw.cov)
mcdd$raw.center
mcdd$center
# Visualize differences on 2 variables:
# Used for showing the ellipses defined by the cov-matrices
# Looking at variables 2 and 4.
v1 <- 2
v2 <- 4
plot(dortmund[,c(v1,v2)])
ellipse(colMeans(dortmund[,c(v1,v2)]),cd[c(v1,v2),c(v1,v2)],alpha=0.01) # Sample
# covariace estimation (classical covariance matrix) --> alpha=0.01 here means that
# if data are normally distributed, 99% are expected to be inside the ellipse.
ellipse(mcdd$center[c(v1,v2)],mcdd$cov[c(v1,v2),c(v1,v2)],col=2,alpha=0.01) # Smcd
# with alpha = 0.5
ellipse(mcdd75$center[c(v1,v2)],mcdd75$cov[c(v1,v2),c(v1,v2)],col=4,alpha=0.01)
# Smcd with alpha= 0.75
text(3,10.5,"standard cov")
text(4.2,7.5,"MCD h/n=0.75",col=4)
text(3.7,6,"MCD h/n=0.5",col=2)
#NOTES ABOUT CORRELATIONS
# Sample coorelations :
cor(dortmund[,v1],dortmund[,v2])
cor(dortmund[,v1],dortmund[,v2])
# The following computes "robust correlation" for cov-entries:
mcdd$cov[v1,v2]/sqrt(mcdd$cov[v1,v1]*mcdd$cov[v2,v2])
mcdd75$cov[v1,v2]/sqrt(mcdd75$cov[v1,v1]*mcdd75$cov[v2,v2])
# NOTICE
# NB Classical mean and cov-matrix don't differ whether computed on 2 variables,
# or on 7 variables with values for 2 variables extracted.
# This does not hold for MCD! (Use computations from 7 variables here.)
#i.e.,
mcdd751 <- covMcd(dortmund[,c(2,4)],alpha=0.75) # computed on 2 var
mcdd751$cov
mcdd75$cov[,c(2,4)] # computed on 7 var (before) + extraction of 2 var
# mcdd751$cov != mcdd75$cov[,c(2,4)]
# IMPORTANT REC SUMMARY
# NB by setting an higher alpha = 0.75 (where h is chosen so that h/n = alpha)
# implies to have an higher h i.e we are a little bit more tolerate against outliers,
# since we are selecting an higher number of observations to consider as "good" one.
# NB h/n is a proportion thus by setting higher its result(alpha) implies to have
# an higher h.
# Since, RECALL THE BASIC IDEA BEHIND ROBUST TECHNIQUES: The basic idea that underlying
# the robust methods is that some of the data are assumed to be distributed conditionally
# normal(in this case the h) and the remaining fraction(in this case the n-h) comes
# from some arbitrary distribution (i.e., the outliers).
# OUTLIERS IDENTIFICATION
# In standard situations, outliers can be identified by looking at distances btw obs
# from the mean or multivariate mean (if p >= 2) or their robust counterparts.
# (in regression we use residuals i.e the difference between the true points Y and
# the regression fitted line Yhat)
# REC dist btw obs --> is given by the sum of the distances btw each variables of
# the obs, thus is a number (in the case obs vs mean -> differences of each variables
# with the means for each variables(multivariate means) --> While for the MD
# distance: distance btw the obs and an "overall centroid", a special "centroid"
# i.e a central point in the multivariate space where all the means from all the
# variables intersect, a sort of overall mean for multivariate data.)
# NB LOOK AT LINK TO THE MD DISTANCE IN THE DISTANCE_BASED_METHOD SCRIPT FOR A
# DEEPER EXPLANATION.
# Under normal distribution, squared Mahalanobis distances(thus the "normal" MD
# since squared is implied) of the obs from mean are chi-squared(p)-distributed,
# and a distance larger than a high quantile (say 0.99 - this is a subjective
# choice) can identify an outlier.
# It's much better to identify outliers from robust distances, the MCD object
# contains the squared robust Mahalanobis distances (a sort of robust
# version of the traditional Leverage obtained from MD distance (look slides))
# --> High values of this robust distances(SQRT(mcdd$mah)) btw obs and the "overall
# centroid" obtained basing on the robust estimated locations(Tmcd) and the robust
# estimated covariance matrix Smcd, may identify outliers.
# NB The leverage is obtained just by the sqrt MD basing on the sample mean and
# cov matrix estimation, if those are robust (i.e., Tmcd and Smcd) we obtain the
# robust mahalonobis distances.
# FOR EXAM: Since is better identify outliers from robust distances(masking issue),
# the MCD object contains the computed squared mahalanobis distancebased on the
# Tmcd for locations and the Smcd for the covariance matrix (instead of the sample
# estimations).
# High values of this distances for observation may identifies outliers.
# covMcd$mah --> robust mahalanobis distances btw the observations and the "overall
# centroid" obtained by using the robust estimated locations(Tmcd) and also as input,
# together with the data, the robust estimated covariance matrix Smcd.
# PLOT FOT OUTLIERS DETECTION WITH THE MCD ESTIMATOR
# REC : mcdd <- covMcd(dortmund), 1:170 n of obs
plot(1:170,sqrt(mcdd$mah),type="n",xlab="Observation",
ylab="Squared robust Mahalanobis distance")
text(1:170,sqrt(mcdd$mah),rownames(dortmund),cex=0.7)
abline(sqrt(qchisq(0.99,7)),0,col=2) # qchisq(0.99,7) --> 0.99 high quantile, 7 p
# n of variables
# Should look at smaller values to see more precisely what's going on:
# (i.e we need to change YLIM)
plot(1:170,sqrt(mcdd$mah),type="n",ylim=c(0,30),xlab="Observation",
ylab="Squared robust Mahalanobis distance")
text(1:170,sqrt(mcdd$mah),rownames(dortmund),cex=0.7)
abline(sqrt(qchisq(0.99,7)),0,col=2)
# Compare the simple sqrt Mahalanobis distances with the sqrt robust Mahalanobis
# distances in the task of OUTLIER DETECTION.
# (Always look at the YLIM values)
plot(sqrt(mahalanobis(dortmund,colMeans(dortmund),cd)),sqrt(mcdd$mah),type="n",
xlim=c(0,10),ylim=c(0,30),xlab="Squared standard Mahalanobis distance",
ylab="Squared robust Mahalanobis distance")
text(sqrt(mahalanobis(dortmund,colMeans(dortmund),cd)),sqrt(mcdd$mah),
rownames(dortmund),cex=0.7)
abline(sqrt(qchisq(0.99,7)),0,col=2)
abline(v=sqrt(qchisq(0.99,7)),col=2)
# HOW TO INTERPRET THOSE PLOTS? --> The orizzontal red line corresponds to the y
# axis i.e., if the y axis = sqrt robust Mahalanobis distances then all the obs
# above the orizzontal red line are identified as outliers.
# While the vertical red line correspond to the x axis where all the obs on the
# right of the vertical red line are identified as outliers.
# Compare with alpha=0.75
plot(sqrt(mcdd75$mah),sqrt(mcdd$mah),xlim=c(0,10),ylim=c(0,30),
xlab="Squared robust Mahalanobis distance (alpha=0.75)",
ylab="Squared robust Mahalanobis distance (alpha=0.5)")
abline(sqrt(qchisq(0.99,7)),0,col=2)
abline(v=sqrt(qchisq(0.99,7)),col=2)
# NB by setting an higher alpha = 0.75 (where h is chosen so that h/n = alpha)
# implies to have an higher h i.e we are a little bit more tolerate against outliers,
# since we are selecting an higher number of observations to consider as "good"
# one(y*).
# NB h/n is a proportion thus by setting higher its result(alpha) implies to have
# an higher h.
# (in this case alpha = 0.5 and alpha = 0.75 give almost identical results.)
# The MCD estimator for loc and cov identifies far more outliers than "classical"
# estimators(in "classical" other outliers are "masked" by the largest ones i.e.,
# a "far" outlier can make a "closer" outlier treated as a "normal" obs and vicerversa
# --> this phenomenon is called Masking.)
#IMPORTANT
# NB Through the MCD we identify only the "outliers" deviate from an assumed Gaussian
# distributional shape.(Thus we avoid masking)
# Since, RECALL THE BASIC IDEA BEHIND ROBUST TECHNIQUES: The basic idea that underlying
# the robust methods is that some of the data are assumed to be DISTRIBUTED CONDITIONALLY
# NORMAL(in this case the h) and the remaining fraction(in this case the n-h) comes
# from some arbitrary distribution (i.e., the outliers).
# IMPORTANT
# This does not mean that they are all meaningfully different from the main core of
# observations!
# Such outliers can be faulty measurements, correct but special,or just result from
# non-normality of the process! -->
# In ROBUST REGRESSION we can DISTINGUISH BTW OUTLIERS,GOOD or BAD LEVERAGE POINTS
# since we have a task as reference.
# ROBUST REGRESSION SSION.
# CLASSICAL LINEAR REGRESSION
set.seed(3355111)
x1 <- runif(100)
x2 <- x1+runif(100,-0.3,0.3)
x3 <- -x1+rnorm(100,sd=0.5)
y <- 1*x1-1*x2+3*x3+rnorm(100,sd=0.5)
regdata1 <- as.data.frame(cbind(y,x1,x2,x3))
pairs(regdata1)
lmrd1 <- lm(y~x1+x2+x3,data=regdata1)
summary(lmrd1)
# NB R^2 --> tell us how well the regression model fits the observed data.
par(mfrow=c(2,2))
plot(lmrd1)
# CHECKING THE MODEL ASSUMPTIONS i.e Check if the model works well for the data:
# A) Linearty -> linear relationship among indep var(Y) and dep var(X).
# B) Normality -> residuals should distributed as a ~ N, i.e., if residuals doesn't
# shows anomalies i.e., are distributed according to a normal, it means that our
# prediction/output is close to the true values(since the residuals are given by
# the difference btw the true Y and the Yhat = X*Bhat fitted regression line(our
# model)).
# NB OUTLIERS and strong skewness are problems for normality; many other violations
# of normality are not(since with large data we can exploit the CLT).
# C) Homoskedasticty -> residuals should have the same constant variances. Strong
# violation will affect inference, weak heteroscedasticity can often be tolerated.
# NB linked to what stated before, heteroskedastic residuals can shows anomalous
# / more varying patterns, where for residuals is not good since means that our
# model doesn't predict well.
# D) Independence -> indep amog indep variables Xs (no cov/corr tolerate). Hard
# to diagnose, may affect inference.
# Visible issues with skewness or heteroscedasticity may point to transformations.
help("plot.lm")
# Six plots (selectable by which) are currently available:
# 1) Plot of residuals against fitted values
# 2) Scale-Location plot of sqrt(| residuals |) against fitted values
# 3) Normal Q-Q plot,
# 4) Plot of Cook's distances versus row labels
# 5) Plot of residuals against leverages (with cook's distance also)
# 6) Plot of Cook's distances against leverage/(1-leverage).
# NB By default: 1,2,3 and 5 are provided and here will be our main focus.
# IMPORTANT
# The numbers next to some points(i.e., its row number) in each plot indicates
# EXTREME VALUES that could shows POTENTIALLY PROBLEMATIC OBSERVATIONS according
# to each different criterion for each different plots(i.e., 1,2,3 and 5 plot)
# and are identified by the row numbers in the data set.
# 1) Residuals(y.axis) against fitted values(x.axis) (for checking the Linearity
# assumption):
# This plot shows if residuals have non-linear patterns.
# There could be a non-linear relationship between residuals and an outcome variable
# (Yhat) and the pattern could show up it in this plot if the model doesn't capture
# the non-linear relationship between Y and Xs.
# If you find equally spread residuals around a horizontal line without distinct
# patterns then is a good indication that we don't have to deal with non-linear
# relationships, or that we have captured it with our fitted model if it was present.
# 2) Normal Q-Q plot: std.Residuals(y.axis) vs theoretical quantiles from the normal
# distributions(x.axis) (for check the Normality assumption):
# This plot shows if residuals are normally distributed.
# It's good if residuals are lined well on the straight dashed line.
# 3) Scale location plot: sqrt(std.Residuals)(y.axis) vs fitted values(x.axis)
# (for checking the Omosckedasticity assumption):
# This plot shows if the residuals are spread equally along the range of predictors.
# It's good if you see a horizontal line with equally (randomly) spread points.
# 4) std.Residuals(y.axis) vs Leverage(x.axis) (for checking the presence of
# INFLUENCE-OUTLIERS):
# NB the leverage is i-th diagonal element hii derived from the mahalanobis distance.
# (see robust slides).
# This plot helps us to find influential cases.
# Not all outliers are influential in linear regression analysis, they might not
# be influential to determine our regression line i.e., our model.
# On the other hand, they could be extreme cases against a regression line and can
# alter the results if we exclude them from the analysis.
# We watch out for influential outliers at the upper right corner or at the lower
# right corner. Those spots are the places where cases can be influential against
# a regression line(our model).
# Specifically we look for cases outside the dashed line i.e the Cook's distance.
# When cases are outside of the Cook's distance(meaning that they have an high
# Cook's distance scores), they are influential with respect our regression results
# i.e., those observations are influential outliers.
# NB is more reliable use diagnosis plot from a ROBUST REGRESSION that are explained
# after due to the "Masking" issue.
# FROM HERE WE CAN SAY --> PROBABLY A BAD LEVERAGE POINTS but is more reliable also
# see the robust diagnostic plots (due to masking)
# NB removing an influence-outlier can have a big impact on the slopes(Betas) and
# R^2 of the model in classical linear regression lm, while in the robust regression
# lmrob they change less (robustness property).
# NB Thus adding or removing outliers and "noise" changes lm much more than lmrob.
# Influence outliers can be simple outliers(with respect to the Y scale) or Leverage
# points(with respect to the X scale).
# Notice that the Leverage points are points that do not violate the model assumptions.
# They could be: Good leverage points or Bad leverage points.
# DIFF BTW GOOD AND BAD LEVERAGE POINTS and OUTLIERS --> see word doc.
# FOR EXAM COMMENTS:
# The four plots shows potential problematic cases indicate through row numbers
# of the observation.
# If some cases are identified across all four plots, you might want to take a
# close look at them individually.
# Is there anything special for the subject?
# Or could it be a simply errors in data entry?
# So, what does having patterns in residuals mean to your research?
# It's not just a go-or-stop sign. It tells you about your model and data.
# Your current model might not be the best way to understand your data if there's
# so much good stuff left in the data i.e., non captured informations as non-linearity
# and so on...
# In that case, you may want to go back to your theory and hypotheses.
# Is it really a linear relationship between the predictors and the outcome?
# You may want to include a quadratic term, for example. A log transformation
# may better represent the phenomena that you'd like to model.
# Or, is there any important variable that you left out from your model?
# Other variables that we didn't include (e.g., age or gender) may play an important
# role in your model and data.
# Or, maybe, our data were systematically biased when collecting and we may want
# to redesign the data collection methods.
# EXAMPLES
# Add a regression outlier(with respect the scale of Y) --> c(-6,0.05,-0.1,0.9)
# (4 values i.e y, x1, x2, x3):
regdata2 <- rbind(regdata1,c(-6,0.05,-0.1,0.9))
pairs(regdata2)
lmrd2 <- lm(y~x1+x2+x3,data=regdata2)
summary(lmrd2) # Different summary statistics: lower R^2 and different regression
# coefficients with respect to lmrd1.
par(mfrow=c(2,2))
plot(lmrd2)
# Add a Bad leverage point(with respect the scale of X) --> c(-4,20,-0.1,0.9)
regdata3 <- rbind(regdata1,c(-4,20,-0.1,0.9))
pairs(regdata3)
lmrd3 <- lm(y~x1+x2+x3,data=regdata3)
summary(lmrd3)
par(mfrow=c(2,2))
plot(lmrd3)
# Add a Good leverage point(with respect the scale of X) --> c(22,20,-0.1,0.9)
regdata4 <- rbind(regdata1,c(22,20,-0.1,0.9))
pairs(regdata4)
lmrd4 <- lm(y~x1+x2+x3,data=regdata4)
summary(lmrd4)
par(mfrow=c(2,2))
plot(lmrd4)
# Example for "masking":
data(starsCYG,package="robustbase")
light.lm<-lm(log.light~log.Te, starsCYG)
plot(starsCYG)
abline(light.lm$coef[1],light.lm$coef[2])
par(mfrow=c(2,2))
plot(light.lm)
par(mfrow=c(1,1))
plot(light.lm, which = 5)
# Masking means that several outliers can "mask" each other, i.e., outliers can stop
# each other from appearing as outliers in standard diagnostic methods such as Cook's
# statistic or leverage/Mahalanobis distance.
# Standard diagnostic methods for checking a model assumption normally require all
# other model assumptions to be fulfilled.
# Cook's distance can find an outlier assuming it's the only one(thus masking will
# affect this).
# If the relation between x and Y is nonlinear or heteroscedastic, the normal
# QQ-plot is worthless because it aggregates residuals over different x where in
# fact they have different distributions.
# KEY IMPORTAND
# NB MASKING is the KEY PROBLEM with outliers in classical regression methods (but
# also in all the other non robust method of identification --> see MCD), thus,
# as i said before, is better have a look to diagnosis plots from a robust regression.
# ROBUST REGRESSION ESTIMATORS
# Standard robust regression still assumes i.i.d. and linearity, but under
# heteroscedasticity and non-linearity we can sometimes fit the majority of the data
# well by a homoscedastic linear model.
# IMPORTANT
# Diagnostics from robust regression are generally more reliable than traditional
# diagnostics since they can deal with the masking issue.
# Traditional diagnostics are not that bad ,as robust diagnostics, they will show
# many problems but with the classical regression these problems need to be dealt
# with. whereas robust estimation deals with outliers(i.e., also the masking issues)
# /non-normality automatically, thus make sense also look to diagnosis with robust
# regression estimators.
# MM - estimator
# FOR EXAM COMMENT
# The basic idea underlying the robust linear model (lmrob) is that some
# (1-alpha > 0.5) of the data is distributed conditionally normal and the remaining
# fraction (alpha) comes from some arbitrary distribution (i.e., the outliers).
# The goal of a robust method is to estimate the parameters (beta and sigma^2) of
# this conditional normal distribution without giving to the outliers too much
# influence.
# If the bulk of the data (i.e, the good data) is not distributed conditionally
# normal then a linear model is not appropriate regardless of whether it is fit
# robustly or not. Of course you can still use all of the standard linear modeling
# tricks. For instance a log transformation of the response sometimes helps with
# heteroskedasticity.
help("lmrob") # Computes an MM-type regression estimator.
# It uses a bi-square redescending score function, and it returns a highly robust
# and highly efficient estimator (with 50% breakdown point and 95% asymptotic
# efficiency for normal errors)
# REC : Add a Bad leverage point --> c(-4,20,-0.1,0.9)
# regdata3 <- rbind(regdata1,c(-4,20,-0.1,0.9))
lm3mm <- lmrob(y~x1+x2+x3,data=regdata3)
par(mfrow=c(2,3))
help("plot.lmrob")
plot(lm3mm)
plot(1:101,lm3mm$rweights) # lm3mm$rweights --> wi (from slides) additional
# diagnostic plot (wi downweight high observation values while the setted wi=0
# identify outliers)
par(mfrow=c(1,1))
plot(lm3mm, which = 1) # RUN IT IN ORDER TO UNDERTSAND THE INTERPRETATION
# 1) Standardised residuals vs. robust distances : The plot divides the value range
# in four regions marked by lines and coloured points:
# (1) Regular --> cloud of black points (bulk of the data)
# (2) Outlier --> upper left (empty) over/under (1) --> identify outliers
# (3) Leverage --> front of (1) long rectangular part --> identify GOOD leverage
# points(empty)
# (4) Outlier and Leverage --> upper and lower of (3)--> identify BAD leverage points.
# NB Points in the fourth category are of major importance as they are outliers
# points that influence the model fit if they are removed from the estimation.
plot(lm3mm, which = 2)
# 2) Normal QQ -> is a normal QQ plot (same interpretation)
plot(lm3mm, which = 3)
# 3) Response vs Fitted values --> is a cross validation plot that compare y(response)
# with yhat(fitted values)
# NB robust estimators will not fit all points well, or make all look like normal!
# It will fit the outliers even worse than LS(least square i.e, classical regression),
# because it weights them out for fitting.
# This can be a problem for cross-validation.
# obs 101 don't match the cross -validation due to the high weight assigned in
# robust regression.
plot(lm3mm, which = 4)
# 4) Residuals vs fitted values --> check the linearity as before (same interpretation)
# MOREOVER...
# Residuals vs fitted -> The plot is used to detect non-linearity, unequal error
# variances, and outliers.
# We plot it with the robustness weights in order to see which and where the
# observation are treated as outliers by the estimator i.e., where the estimator
# set the robustness weights = 0.
plot(lm3mm, which = 5)
# 5) Sqrt of abs Residuals vs Fitted values (scale-Location plot) --> check the
# homoskedasticity as before (same interpretation)
# REC
# Robust diagnostics and robust estimation deals with outliers/non-normality
# automatically i.e., identify that they are not violated in general and the influence
# of outliers by re-weighting them.
# The initial estimator (default, equivalent to init = "S",) uses as initial
# estimator an S-estimator. The method argument takes a string that specifies the
# estimates to be calculated as a chain i.e default method = "MM" or "SM" -->
# S init - estimator of the scale of the M - estimator used after.
# Takes the BP from the S init - estimator used for it (rho1 = u/c1) and the
# efficiency from (rho2 = u/c2).
# IDEA : rho2 governs data information, but rho1 S-scale governs outlier tolerance.
# Default: c1 = 1.548 yields BP~1/2, c2 = 4.685 yield 95% efficiency.
a <- summary(lmrd1) # Regression with traditional estimators (LS)
b <- summary(lm3mm) # Regression with ROBUST ESTIMATORS (MM)
# NB
# The output from robust regression is also very similar to those from linear regression.
# The only exception now is that that the observations are weighted based on how
# deviant they are, and also robust standard errors will be computed for model
# estimates.
a$coefficients
b$coefficients
a$r.squared # 0.9201405
b$r.squared # 0.9173593
which(lm3mm$rweights == 0) # obs 101 is an outlier
regdata3[101,]# Bad leverage point --> c(-4,20,-0.1,0.9)
# FOR EXAM COMMENTS
# NB : A robust estimator will not fit all points well, or make all look like normal!
# It will fit the outliers even worse than LS, because it weights them out for fitting.
# This can be a problem for cross-validation; squared loss is strongly dominated by
# biggest misfits. Squared loss is itself non-robust!
# Better run with L1/absolute value or other robust loss when attempting to fit the
# "good majority" well.
# EXAMPLES
# REC : regdata1 <- as.data.frame(cbind(y,x1,x2,x3)) --> Data with no outliers/
# influencial cases
par(ask=F)
lm1mm <- lmrob(y~x1+x2+x3,data=regdata1)
par(mfrow=c(2,3))
plot(lm1mm)
plot(1:100,lm1mm$rweights)
par(mfrow=c(1,1))
plot(lm1mm, which = 1) # LOOK THE DIFFERENCE IDENTIFICATIONS AMONG OUTLIERS AND
# LEVERAGE POINTS IN THIS PLOT
summary(lm1mm)
which(lm1mm$rweights == 0)
# REC : Add a regression outlier(with respect the scale of Y) --> c(-6,0.05,-0.1,0.9)
# regdata2 <- rbind(regdata1,c(-6,0.05,-0.1,0.9))
lm2mm <- lmrob(y~x1+x2+x3,data=regdata2)
par(mfrow=c(2,3))
plot(lm2mm)
plot(1:101,lm2mm$rweights)
par(mfrow=c(1,1))
plot(lm2mm, which = 1)# LOOK THE DIFFERENCE IDENTIFICATIONS AMONG OUTLIERS AND
# LEVERAGE POINTS IN THIS PLOT
summary(lm2mm)
which(lm2mm$rweights == 0)
# REC : Add a Bad leverage point(with respect the scale of x) --> c(-4,20,-0.1,0.9)
# regdata3 <- rbind(regdata1,c(-4,20,-0.1,0.9))
lm3mm <- lmrob(y~x1+x2+x3,data=regdata3)
par(mfrow=c(2,3))
plot(lm3mm)
plot(1:101,lm2mm$rweights)
par(mfrow=c(1,1))
plot(lm3mm, which = 1)# LOOK THE DIFFERENCE IDENTIFICATIONS AMONG OUTLIERS AND
# LEVERAGE POINTS IN THIS PLOT
summary(lm3mm)
which(lm3mm$rweights == 0)
# REC : Add a Good leverage point(with respect the scale of x) --> c(22,20,-0.1,0.9)
# regdata4 <- rbind(regdata1,c(22,20,-0.1,0.9))
lm4mm <- lmrob(y~x1+x2+x3,data=regdata4)
par(mfrow=c(2,3))
plot(lm4mm)
par(mfrow=c(1,1))
plot(lm4mm, which = 1)# LOOK THE DIFFERENCE IDENTIFICATIONS AMONG OUTLIERS AND
# LEVERAGE POINTS IN THIS PLOT
plot(1:101,lm4mm$rweights)
summary(lm4mm)
which(lm4mm$rweights == 0) # 0
# REC : masking example
# MM -estimator
# 4 observations c(11,20,30,34) are outliers with |weight| = 0
light.mm<-lmrob(log.light~log.Te, starsCYG)
summary(light.mm)
par(mfrow=c(2,3))
plot(light.mm)
par(mfrow=c(1,2))
plot(light.mm, which = 1, main = "MASKING WITH LMROB") # MASKING WITH LMROB
plot(light.lm, which = 5,main = "MASKING WITH LM") # MASKING WITH LM
# NB We can see that the typical behaviour of the model with masking presence is
# an UP AND DOWN FITTED LINE!
# NB Lmrob is able to detect the masking!
plot(1:47,light.mm$rweights,xlab="Obs. number",ylab="Robustness weights")
which(light.mm$rweights == 0) # --> # 4 observations c(11,20,30,34) are outliers
# with |weight| = 0
# IMPORTANT
# NB To sum up, the MM - ROBUST REGRESSION differs from the classical basically
# since he reweight potential influencial observation in order to obtain a good
# fitting. He takes out of fitting by putting those weight equal to 0 only OUTLIERS
# and BAD LEVERAGE POINT.
# Moreover it can recognize and deal with the issue of MASKING(always by playing
# with the weight of the responsible observations).
# IN this way he follows the BASIC IDEA behind the ROBUST THEORY:
# The basic idea underlying the robust theory is that we assume that some of the
# data is distributed conditionally normal and the remaining while the remaining
# fraction comes from some arbitrary distribution that are the outliers.
# The goal of a robust method is to estimate the parameters of this conditional
# normal distribution without giving to the outliers too much influence.
# S-estimator
light.s<-lmrob(log.light~log.Te, method="S",starsCYG)
par(mfrow=c(2,3))
plot(light.s)
par(mfrow=c(1,2))
plot(light.mm, which = 1, main = "LMROB - MM ESTIMATOR")
plot(light.s, which = 1, main = "LMROB - S ESTIMATOR")
# NB As we can see the S estimator is less tollerant with respect to the outliers
# but also less efficient, since by excluding and down-weighting to much observations
# we lose information i.e, efficiency.
# Moreover this is also because it has the role of govern the robustness in the
# MM estimator.
par(mfrow=c(1,1))
plot(1:47,light.s$rweights,xlab="Obs. number",ylab="Robustness weights")
which(light.s$rweights == 0)
# 7 observations c(7,9,11,18,20,30,34) are outliers with |weight| = 0
# (in the MM only 4 are treated as outliers/ bad leverage points)
# S - estimator is less tolerate against outliers (but also less efficient).
# NB
# We can derive the S - estimator directly from the lmrob object since it used
# for its initialization.
light.mm$init.S
par(mfrow=c(1,2))
plot(1:47,light.mm$init.S$rweights,xlab="Observation number",ylab="Weight",
main="S-estimator, robustness weights",type="n")
text(1:47,light.mm$init.S$rweights,rownames(starsCYG),cex=0.7)
plot(light.mm$init.S$fitted,light.mm$init.S$residuals,xlab="Fitted values",
ylab="Residuals",main="S-estimator, residuals vs. fitted")
# Residuals vs fitted -> The plot is used to detect non-linearity, unequal error
# variances, and outliers.
# We plot it with the robustness weights in order to see which and where the
# observation are treated as outliers by the estimator i.e., where the estimator
# set the robustness weights = 0.
# NOTICE THAT WE CAN'T OBTAIN DIAGNOSTIC AND SO ON SINCE WE ARE NOT USING THE
# ESTIMATOR FOR FITTING A ROBUST REGRESSION AS BEFORE I.E.,
# lmrob(log.light~log.Te, method="S",starsCYG)
# WE ARE JUST FITTING THE SCALE FOR THE M ESTIMATOR USED AFTER IN THE MM ROBUST
# RERESSION FITTING I.E., light.mm<-lmrob(log.light~log.Te, starsCYG), WHERE
# METHOD IS DEFAULT method="MM".
# COMPARING ROBUST ESTIMATORS (COMMENTS FOR EXAM)
# --> WHEN YOU MAKE COMMENTS FIRST HAVE A LOOK AT THE SUMMARY TABLE IN THE SLIDES!!!
# IN ORDER:
# M-ESTIMATOR(HUBER m) --> M-estimation method is a generalization of the maximum
# likelihood estimation(i.e., the ML is a special case of M-estimators) in the
# context of location models(i.e., estimation of the mean).
# The introduction was motivated by the robust statistics since now we are try to
# find a more general t estimator able to minimize p, that is a function with
# certain properties for each observations yi --> thus we are try to find a t that
# minimize the sum of the p for i = 1 to n.
# --> i.e, Tn = argmin(t){sum(i=1 to n)[p(yi,t)]}
# NB p measures the fit loss of estimating yi by t -> thus we are trying to find
# the best t estimator basing our search on the minimization of a sort of
# "residual function" p(yi,t).
# NB this is a generalization since p can be chosen among different functions i.e.,
# The function p, or its derivative, psi, can be chosen in such a way to provide
# the estimator desirable properties (in terms of bias and efficiency) when the
# data are truly from the assumed distribution i.e, with no outliers.
# (RECALL THE BASIC IDEA OF ROBUST THEORY --> split of the data Y*).
# NB for e.g the traditional LS or ML-estimator is given by setting p = - log f(yi,t).
# Now recall that we are in the field of location estimation where both the mean
# and the median are estimators for it.
# Basically the M-estimator behave like the mean when the proportion of outlier
# influence is under a certain threshold(in the slide |u| = |yi - t| <= gamma)
# exploiting the efficiency properties instead of robustness of the mean, viceversa
# when |u| > gamma it behaves like the median exploiting the robustness properties
# instead of an high efficiency(as the mean) of the median.
# NB Threshold --> since we use those estimator when we suspect that we have outliers
# in the data thus the difference u = yi - t is due to outliers! This is because
# we call gamma "outlier influence threshold".
# Also, this is because we have introducing a generalization of the maximum likelihood
# estimation(more flexible), in order to "switch" the functions.
# The M estimator is nearly as efficient as OLS.
# The weakness of M-estimation method is the lack of consideration on the data
# distribution since is not a function of the overall data because only using the
# median as the weighted value for removing or lower the outliers influence.
# NB In most practical cases, the M-estimators are of psi-type where the psi
# function is the derivative of the p function which sobstitute the p one.
# Because often is simpler to differentiate with respect to Tn and solve for the
# root of the derivative. When this differentiation is possible, the M-estimator
# is said to be of psi-type. Otherwise, the M-estimator is said to be of p-type.
#(LOOK SLIDE FOR MORE DEEPER EXPLANATION)
# Exist also an M-estimator for the scale that follows the same principle stated
# before for the location.
# S - estimator --> the S - estimator minimize an M-estimator of scale since it
# is based on residual scale of M-estimation methodology.
# It has an efficiency of about the 25% if p is chosen to give BP~1/2.
# Have the same asymptotic properties corresponding to M-estimators but it can
# also handle up to the 50% of the outliers appearing in the data.
# This method uses the residual standard deviation to overcome the weaknesses of
# median used in M one.
# S-estimators are more robustly than the M-estimator, because S-estimators have
# smaller asymptotic bias and smaller asymptotic variance in the case contaminated
# data.
# MM - estimator --> MM estimation method is a combination of high breakdown value
# estimation method and efficient estimation method, which was the first estimation
# with an high breakdown point and high efficiency under normal error.
# The key difference with the Huber estimator lies in the definition of the weight
# function now based on a redescending score from the S - estimator.
# The main IDEA is that we combine an M-regression estimator with an S estimator
# for the scale (thus we have an S initialization), where the efficiency comes
# from the M-regression estimator and the robustness from the S estimator for the
# scale.
# MM-estimator has the same BP as S-estimator used for it.
# COMPARISONS + COMMENTS (LOOK AT THE SUMMARY TABLE)
# MM is the more efficiency compared with S method and it is better in outlier
# generating error distribution.
# The M.Huber-estimator is not robust with respect to high leverage points, so it
# should be used in situations where high leverage points do not occur.
# SUMMARYZE --> MM best trade-off btw Asymptotic efficiency and robustness
# (S too robust less efficient, M to efficient less robust)
# M doesn't put =0 bad leverage points (only outliers)
# S put = 0 with psi (look slide) bad leverage points and ouliers but it has
# 25% asy efficient
# MM often best
# Ex 9.4 + Ex 4 mock exam (OTHER PART IN ANOTHER SCRIPT)
# Ex 9.4
# Aim : Investigate the sensitivity of the two estimators to outliers.
# (a) There is an obvious outlier in the data set. Create a new data set identical to the
# original one, but with this outlier removed.
# Run both lm and lmrob on the new data set, and compare the results to the results
# of the same regression method on the original data (i.e., new lm-result to original
# lm-result, new lmrob-result to original lmrob-result)
# by (i) commenting on how ttest results of the variables have changed qualitatively
# (significances and positive or negative direction of influence)
# and (ii) computing the Euclidean distance between the vectors of estimators of
# the regression coefficients ignoring the intercept (as this value is not comparable
# to the others).
# These are available as component coef of both the lm and lmrob.output.
# Comment on what the results mean regarding the sensitivity of the two regressions.
library(robustbase)
unicef <- read.table("unicef97.dat",header=TRUE)
# LS - estimator
uniceflm <- lm(Child.Mortality~Literacy.Fem+Literacy.Ad+Drinking.Water+
Polio.Vacc+Tetanus.Vacc.Preg+Urban.Pop+Foreign.Aid, data=unicef)
# Summary
summary(uniceflm)
# We have 4 significant reg: Literacy.Fem, Drinking.Water, Polio.Vacc and Urban.Pop.
# Multiple R-squared: 0.7587 --> statistical measure of how close the data are to
# the fitted regression line
# Residuals vs fitted (NB: y vs x/ std residuals = residuals)
plot(uniceflm, which = 1) # LINEARITY --> Relation still well approximated by linearity.
# Residuals vs Normal QQ(normal theoretical quantiles)
plot(uniceflm, which = 2) # NORMALITY --> Overall the residuals can be approximated with a
# N distribution since most of the points are aligned. We have issues with SaoTP,
# Angol and Niger observations. They will have to be specifically analyzed later.
# Scale-Location ( sqrt std residuals vs fitted values)
plot(uniceflm, which = 3) # HOMOSKEDASTICITY --> seems that is present an issue
# with HOMOSKEDASTICITY, however weak heteroscedasticity can often be tolerated.
# Residuals vs leverage
plot(uniceflm, which = 5) # FIND INFLUENTIAL CASES --> Not all outliers and
# leverage points are influential in linear regression analysis.
# For example Niger is an outlier since shows an high residual values but he doesn't show
# a significant impact. we can identify two points, far to right o as good leverage points with respect that not influnce, we look for cases outside
# the dashed line (Cook's distance) that can alter the results if we exclude them from
# analysis.
# This plot helps us to find influential cases.
# Not all outliers are influential in linear regression analysis, they might not
# be influential to determine a regression line.
# On the other hand, they could be extreme cases against a regression line and
# can alter the results if we exclude them from analysis.
# We watch out for outlying values at the upper right corner or at the lower right
# corner. Those spots are the places where cases can be influential against a
# regression line, we look for cases outside the dashed line i.e the Cook's distance.
# When cases are outside of the Cook's distance (meaning they have high Cook's distance
# scores), the cases are influential to the regression results.
# MM - estimator
unicefmm <- lmrob(Child.Mortality~Literacy.Fem+Literacy.Ad+Drinking.Water+
Polio.Vacc+Tetanus.Vacc.Preg+Urban.Pop+Foreign.Aid,
data=unicef)
# Summary
summary(unicefmm)
# Std residuals vs Robust distance
plot(unicefmm,which=1)
# Residuals vs Normal QQ
plot(unicefmm,which=2)
# Robustness weights
plot(1:121,unicefmm$rweights,xlab="Observation number",ylab="Weight",
main="MM-estimator, robustness weights",type="n")
text(1:121,unicefmm$rweights,rownames(unicef),cex=0.7)
# Residuals vs. fitted
plot(unicefmm$fitted,unicefmm$residuals,xlab="Fitted values",ylab="Residuals",
main="MM-estimator, residuals vs. fitted")
# S-estimator
unicefmm$init.S
# Summary
unicefmm$init.S
# Robustness weights
plot(1:121,unicefmm$init.S$rweights,xlab="Observation number",ylab="Weight",
main="S-estimator, robustness weights",type="n")
text(1:121,unicefmm$init.S$rweights,rownames(unicef),cex=0.7)
# Residuals vs fitted
plot(unicefmm$init.S$fitted,unicefmm$init.S$residuals,xlab="Fitted values",
ylab="Residuals",main="S-estimator, residuals vs. fitted")