To get the most recent version of the psych package, use the installer in R. The functions listed below were written before the psych package was released.

For the adventurous, the development version of the psych package may be found at http://personality-project.org/r/src. But it is better to just install the released version from CRAN.

# Useful snippits of R code

A collection of useful functions to do psychometrics and data analysis. To load all of these, use the source command:

```

source("http://personality-project.org/r/useful.r")    #the procedures listed below
source("http://personality-project.org/r/vss.r")       #the Very Simple Structure package

```
1. alpha.scale(scale,item.df) (find coefficient alpha from a scale vector and dataframe of the items)
2. describe(x,digits=2) (give means, sd, n, se, and skew for a data frame)
3. summ.stats (summary statistics for a data frame broken down by a categorical variable) -- also can use by(x,y,describe)
4. error.crosses (error bars in two space)
5. skew (x)
6. pairs.panels(x) (fancy correlations splom below diagonal, correlations above)
7. multi.hist(x) (plot multiple histograms for a data frame)
8. correct.cor (rmatrix,reliabilities) (given a correlation matrix and a vector of reliabilities, put the reliabilities on the diagonal and report unattenuated correlations above the diagonal.
9. paired.r(r12,r13,r23,n) test for the difference between two correlated correlations (returns t value)
10. fisherz(r) convert Pearson r to fisher z
11. VSS (and VSS.plot and VSS.simulate) found in vss.r

## Alpha

Find Cronbach's coefficient alpha for a scale formed from a data.frame of items.
```
#define a function to calculate coefficient alpha
alpha.scale=function (x,y)   #find coefficient alpha given a scale and a data.frame of the items in the scale
{
n=length(y)          #number of variables
Vi=sum(diag(var(y,na.rm=TRUE)))     #sum of item variance
Vt=var(x,na.rm=TRUE)                #total test variance
((Vt-Vi)/Vt)*(n/(n-1))}              #alpha

```

## describe

Report basic descriptive statistics (mean, median, standard deviation, skew, and standard error) for each variable in a data frame. When combined with the by function, this can also describe data by groups. Defaults to 2 decimals places.
```

#general descriptive statistics
describe <- function (x, digits = 2,na.rm=TRUE)   #basic stats after dropping non-numeric data
{                                   #first, define a local function
valid <- function(x) {
return(sum(!is.na(x)))
}
if (is.vector(x) )          #do it for vectors or
{
stats = matrix(rep(NA,6),ncol=6)    #create a temporary array

stats[1, 1] <-  mean(x, na.rm=na.rm )
stats[1, 2] <-  median(x,na.rm=na.rm  )
stats[1, 3] <-  min(x, na.rm=na.rm )
stats[1, 4] <-  max(x, na.rm=na.rm )
stats[1, 5] <-  skew(x,na.rm=na.rm  )
stats[1, 6] <-  valid(x )
len <- 1
}

else  {len = dim(x)[2]     #do it for matrices or data.frames

stats = matrix(rep(NA,len*6),ncol=6)    #create a temporary array
for (i in 1:len) {
if (is.numeric(x[,i])) {   #just do this for numeric data
stats[i, 1] <-  mean(x[,i], na.rm=na.rm )
stats[i, 2] <-  median(x[,i],na.rm=na.rm  )
stats[i, 3] <-  min(x[,i], na.rm=na.rm )
stats[i, 4] <-  max(x[,i], na.rm=na.rm )
stats[i, 5] <-  skew(x[,i],na.rm=na.rm  )
stats[i, 6] <-  valid(x[,i] )
}
}
}
temp <-  data.frame(n = stats[,6],mean = stats[,1], sd = sd(x,na.rm=TRUE), median = stats[,
2],min= stats[,3],max=stats[,4], range=stats[,4]-stats[,3],skew = stats[, 5])
answer <-  round(data.frame(V=seq(1:len),temp, se = temp\$sd/sqrt(temp\$n)),  digits)
}

```

## summ.stats

```
# find basic summary statistics by groups      #adapted from "Kickstarting R"
summ.stats <- function (x,y) {               #data are x, grouping variable is y
means <- tapply(x, y, mean, na.rm=TRUE)
sds  <- tapply(x,y, sd, na.rm = TRUE)
valid <- function (x) {return(sum(!is.na(x)) )}
ns <- tapply(x,y, valid )
se=sds/sqrt(ns)
}

```

## error.crosses

```
#error bars in a two dimensional plot
error.crosses <-function (x,y,z)  # x  and y are data frame with z points
{for (i in 1:z)
{xcen <- x\$mean[i]
ycen <- y\$mean[i]
xse  <- x\$se[i]
yse <-  y\$se[i]
arrows(xcen-xse,ycen,xcen+xse,ycen,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
arrows(xcen,ycen-yse,xcen,ycen+yse,length=.2, angle = 90, code=3,col = par("fg"), lty = NULL, lwd = par("lwd"), xpd = NULL)
}
}

#examples
# plot(mPA,mNA,pch=symb[condit],cex=4.5,col=colors[condit],bg=colors[condit],main="Postive vs. Negative Affect",xlim=c(-1,1.5),ylim=c(-1,1.5),xlab="Positive Affect",ylab="Negative Affect")

#error.crosses(paf.stats,naf.stats,4)     #put in x and y error bars!

skew

skew= function (x, na.rm = FALSE)
{
if (na.rm)    x <- x[!is.na(x)]             #remove missing values
sum((x - mean(x))^3)/(length(x) * sd(x)^3)  #calculate skew
}

```

## fancy correlation plots

Put scatter plots below diagonal, histograms on the diagonal, correlations above the diagonal. The scatter plots can be best fitted with a loess smooth (smooth=TRUE is the default), and the correlations can be scaled (scale=TRUE) by the size of the correlation.

These functions are adapted (stolen?) from the help.cor examples.

```
#some fancy graphics   -- adapted from help.cor
panel.cor.scale <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y,use="pairwise"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex * abs(r))
}

panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y,use="pairwise"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex )
}

panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h\$breaks; nB <- length(breaks)
y <- h\$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}

pairs.panels <- function (x,y,smooth=TRUE,scale=FALSE)
{if (smooth ){
if (scale) {
pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale,lower.panel=panel.smooth)
}
else {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
} #else  {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor,lower.panel=panel.smooth)
}

else      #smooth is not true
{ if (scale) {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor.scale)
} else  {pairs(x,diag.panel=panel.hist,upper.panel=panel.cor) }
} #end of else (smooth)

}   #end of function

```

## multi.hist

```

multi.hist <- function(x) {nvar <- dim(x)[2]  #number of variables
nsize=trunc(sqrt(nvar))+1   #size of graphic
old.par <- par(no.readonly = TRUE) # all par settings which can be changed
par(mfrow=c(nsize,nsize))       #set new graphic parameters
for (i in 1:nvar) {
name=names(x)[i]                #get the names for the variables
hist(x[,i],main=name,xlab=name) }  #draw the histograms for each variable
on.exit(par(old.par))   #set the graphic parameters back to the original
}

```

## correct.cor

```
#function to replace upper triangle of matrix with unattenuated correlations, and the diagonal with reliabilities
#input is a correlation matrix and a vector of reliabilities

correct.cor <- function(x,y) { n=dim(x)[1]
{ x[1,1] <- y[1,1]
for (i in 2:n) {
x[i,i] <- y[1,i]   #put reliabilities on the diagonal
k=i-1
for (j in 1:k) {
x[j,i] <- x[j,i]/sqrt(y[1,i]*y[1,j])  }   #fix the upper triangular part of the matrix

}
return(x)  }}

```

## paired.r

```
#difference of dependent (paired) correlations (following Steiger,J., 1980, Tests for comparing elements of a correlation matrix. Psychological Bulletin, 87, 245-251)
paired.r <- function(xy,xz,yz,n) {
diff <- xy-xz
determin=1-xy*xy - xz*xz - yz*yz + 2*xy*xz*yz
av=(xy+xz)/2
cube= (1-yz)*(1-yz)*(1-yz)
t2 = diff * sqrt((n-1)*(1+yz)/(((2*(n-1)/(n-3))*determin+av*av*cube)))
return(t2)
}

```

## fisherz

```
fisherz <- function(rho)  {0.5*log((1+rho)/(1-rho)) }   #converts r to z

```
#quick demonstration of a random walk process #to show range of variability over trials and replications #the "confidence lines" represent 2 sd of theoretical sum random.walk <- function(length=200,n=20,effect=0) { num <- seq(1:length) colors <- rainbow(n) #select colors x <- cumsum(rnorm(length,effect))/num plot(x,ylim = c(-2.5,2.5),typ="b",col=colors[1],main="Sample means as function of sample size",ylab="sample mean",xlab="sample size") for (i in 2:n) { x <- cumsum(rnorm(length,effect))/num #find the next line points(x,col=colors[i],typ="b") #draw it } curve(2/sqrt(x),1,length,101,add=TRUE) #upper confidence region curve(-2/sqrt(x),1,length,101,add=TRUE) #lower confidence region text(length/2,2,"effect size is " ) text(length/2, 1.8,eval(effect)) }
part of a short guide to R
Version of December 31, 2004
William Revelle
Department of Psychology
Northwestern University