Search
 
SCRIPT & CODE EXAMPLE
 

R

correlation matrix using factors r

require(tidyverse)
require(rcompanion)


# Calculate a pairwise association between all variables in a data-frame. In particular nominal vs nominal with Chi-square, numeric vs numeric with Pearson correlation, and nominal vs numeric with ANOVA.
# Adopted from https://stackoverflow.com/a/52557631/590437
mixed_assoc = function(df, cor_method="spearman", adjust_cramersv_bias=TRUE){
    df_comb = expand.grid(names(df), names(df),  stringsAsFactors = F) %>% set_names("X1", "X2")

    is_nominal = function(x) class(x) %in% c("factor", "character")
    # https://community.rstudio.com/t/why-is-purr-is-numeric-deprecated/3559
    # https://github.com/r-lib/rlang/issues/781
    is_numeric <- function(x) { is.integer(x) || is_double(x)}

    f = function(xName,yName) {
        x =  pull(df, xName)
        y =  pull(df, yName)

        result = if(is_nominal(x) && is_nominal(y)){
            # use bias corrected cramersV as described in https://rdrr.io/cran/rcompanion/man/cramerV.html
            cv = cramerV(as.character(x), as.character(y), bias.correct = adjust_cramersv_bias)
            data.frame(xName, yName, assoc=cv, type="cramersV")

        }else if(is_numeric(x) && is_numeric(y)){
            correlation = cor(x, y, method=cor_method, use="complete.obs")
            data.frame(xName, yName, assoc=correlation, type="correlation")

        }else if(is_numeric(x) && is_nominal(y)){
            # from https://stats.stackexchange.com/questions/119835/correlation-between-a-nominal-iv-and-a-continuous-dv-variable/124618#124618
            r_squared = summary(lm(x ~ y))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="anova")

        }else if(is_nominal(x) && is_numeric(y)){
            r_squared = summary(lm(y ~x))$r.squared
            data.frame(xName, yName, assoc=sqrt(r_squared), type="anova")

        }else {
            warning(paste("unmatched column type combination: ", class(x), class(y)))
        }

        # finally add complete obs number and ratio to table
        result %>% mutate(complete_obs_pairs=sum(!is.na(x) & !is.na(y)), complete_obs_ratio=complete_obs_pairs/length(x)) %>% rename(x=xName, y=yName)
    }

    # apply function to each variable combination
    map2_df(df_comb$X1, df_comb$X2, f)
}
Comment

PREVIOUS NEXT
Code Example
R :: r predict type 
R :: dplyr mutate with conditional values 
R :: slope by row r 
R :: extract df from lm in r 
R :: logistic inverse CDF in r 
R :: negate R 
R :: change to posixct in r 
R :: excel date format r 
R :: R new column t test p-value 
R :: generate dates between dates in R 
R :: create datframe on r 
R :: R view storage size of variable 
R :: r while loop 
Rust :: read file in rusr 
Rust :: rust copy trait 
Rust :: rust get crate version 
Rust :: reverse vec rust 
Rust :: rust test std out 
Rust :: Pushing Array values to a Vector in Rust 
Rust :: where in Rust 
Rust :: rust from floating point to money 
Rust :: armanriazi•rust•collection•hashmap•avoid_of_duplicate 
Rust :: overwritting print on same line rust 
Rust :: initialize empty vec in rust 
Rust :: rust•armanriazi•thread•sync•sharedstate•mutex 
Lua :: roblox make a rainbow part 
Lua :: lua round number 
Lua :: my second long scripting 
Lua :: roblox pairs 
Lua :: Lua numbers 
ADD CONTENT
Topic
Content
Source link
Name
5+4 =