-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path02_DistributionsAndMulticollinearity.R
More file actions
73 lines (55 loc) · 2.99 KB
/
02_DistributionsAndMulticollinearity.R
File metadata and controls
73 lines (55 loc) · 2.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Clear all existing objects in the workspace
rm(list=ls())
# Load required libraries
require(car) # for Box-Cox transformations, density and QQ plots
require(usdm) # for Variance Inflation Factor (VIF) analysis
# Set seed for reproducibility
set.seed(1828)
# Configure options to suppress significance stars in outputs
options(show.signif.stars=FALSE)
# Load raw and transformed datasets
load('avgRawDat.rda') # Raw data
load('avgTransformedDat.rda') # Transformed data
#####################
### Distributions ###
#####################
# Visualize distributions of raw and transformed variables side by side
# Set graphical parameters for a 1x2 panel layout and adjust margins
par(mfrow=c(1,2), mar=c(5,5,1,1))
densityPlot(avgRawDat$CT_score, xlab='CT Score') # Raw CT score
densityPlot(avgTransformedDat$CT_score.inv, xlab='CT Score\n(scaled inverse)') # Transformed CT score
densityPlot(avgRawDat$immersion_years, xlab='Years of Immersion') # Raw years of immersion
densityPlot(avgTransformedDat$immersion_years.rt, xlab='Years of Immersion\n(rank-transformed)') # Transformed years of immersion
densityPlot(avgRawDat$Accuracy.PC1, xlab='Accuracy [PC1]') # Raw Accuracy [PC1]
densityPlot(avgTransformedDat$Accuracy.PC1.rt, xlab='Accuracy [PC1]\n(rank-transformed)') # Transformed Accuracy [PC1]
densityPlot(avgRawDat$CV_trial, xlab='CV per trial') # Raw CV per trial
densityPlot(avgTransformedDat$CV_trial.rt, xlab='CV per trial\n(scaled inverse)') # Transformed CV per trial
densityPlot(avgRawDat$CV_correct, xlab='CV correct, per trial') # Raw CV correct per trial
densityPlot(avgTransformedDat$CV_correct.rt, xlab='CV correct, per trial\n(scaled inverse)') # Transformed CV correct per trial
densityPlot(avgRawDat$Proficiency.PC1, xlab='Proficiency [PC1]') # Raw Proficiency [PC1]
densityPlot(avgTransformedDat$Proficiency.PC1.rt, xlab='Proficiency [PC1]\n(rank-transformed)') # Transformed Proficiency [PC1]
densityPlot(avgRawDat$Proficiency.PC2, xlab='Proficiency [PC2]') # Raw Proficiency [PC2]
densityPlot(avgTransformedDat$Proficiency.PC2.rt, xlab='Proficiency [PC2]\n(rank-transformed)') # Transformed Proficiency [PC2]
#########################
### Multicollinearity ###
#########################
# Check for multicollinearity among transformed variables
# Use a threshold of 0.6 for identifying collinearity issues
vifcor(avgTransformedDat[,4:13], th=0.6)
# 3 variables from the 10 input variables have collinearity problem:
#
# Mean_correct CV_correct_average Mean_trial
#
# After excluding the collinear variables, the linear correlation coefficients ranges between:
# min correlation ( Accuracy.PC1.rt ~ CV_correct.rt ): -0.006244717
# max correlation ( CV_correct.rt ~ CV_trial.rt ): 0.5863136
#
# ---------- VIFs of the remained variables --------
# Variables VIF
# 1 pSTM 1.077146
# 2 CT_score.inv 1.188896
# 3 CV_trial.rt 1.578271
# 4 CV_trial_average 1.181265
# 5 CV_correct.rt 1.655602
# 6 Accuracy.PC1.rt 1.454839
# 7 Proficiency.PC1.rt 1.490497