## R Help Guide: The Basics setwd("C:/Users/YourName/Path/Folder") my_data <- read.csv("the_data_name.csv",stringsAsFactors=F) # OR library(foreign) my_data <- read.dta("the_data_name.dta") ## generating pulls from distributions x <- rnorm(1000,0,1) # 1000 draws from standard normal y <- runif(1000,-2,2) # 1000 draws from unif with a=-2 and b=2 z <- rbeta(1000,.5,4) # 1000 draws from beta with a=.5 and b=4 a <- rbinom(1000,2,.5) # flip two fair coins, record number of heads b <- rbinom(1000,1,.5) # flip one fair coin, record outcome ## evaluation the CDF cut_x <- pnorm(.7,0,1) # Pr(x<=.7) in standard normal distribution cut_y <- punif(0,-2,2) # Pr(x<=0) in uniform a=-2 b=2 cut_z <- pbeta(.2,.5,4) # Pr(x<=.2) in beta a=.5 b=4 cut_a <- pbinom(1,2,.5) # Pr(x<=1) flipping 2 fair coins where x is #heads ## plot the normal CDF support <- seq(-2,2,by=.01) plot(support, pnorm(support,0,1),type="l", main="Standard Normal CDF", xlab="values",ylab="Pr(X) <= x",xaxs="i",yaxs='i') abline(v=.7,lwd=2) abline(h=cut_x,lwd=2) text(-1.5,cut_x+.03,round(cut_x,3)) polygon(x=c(seq(-2,.7,length.out=length(pnorm(support,0,1)[support<=.7])), seq(.7,-2,length.out=length(pnorm(support,0,1)[support<=.7]))), y=c(pnorm(support,0,1)[support<=.7], rep(0,length.out=length(pnorm(support,0,1)[support<=.7]))), col="black",density=45) ## assessing if that makes sense visually with some plots hist(x,main="Standard Normal",freq=F,col="gray",ylim=c(0,.6)) text(2.5,.3,paste(round(cut_x,3)*100,"% of density\nis below .7")) lines(density(x),col="blue",lwd=2) abline(v=.7,lwd=2,col="red",lty=2) legend("topleft",fill=c("gray","blue","red"), c("histogram","density","line"),bty="n") hist(y,main="Uniform Distribution",freq=F,col="dodgerblue",ylim=c(0,.5)) abline(h=.25,lwd=3,col="blue") text(x=-1,y=.26,"the expected value",font=2) plot(density(z),main="Beta Distribution",col="black",xlab="proportion", xaxs="i",yaxs="i") y_vals <- density(z)$y #extract x values from density x_vals <- density(z)$x #extract y values from density polygon(x=c(x_vals[which(x_vals<=.2)], # draw a rectangle -- values out, values back rev(x_vals[which(x_vals<=.2)])), y=c(y_vals[which(x_vals<=.2)], rep(0,length.out=length(y_vals[which(x_vals<=.2)]))),col="gray") abline(v=.2,lwd=2,col="red",lty=3) par(mar=c(3,3,2,1)) # bottom, left, top, right -- clip the sides hist(a,breaks=seq(min(a)-.5,max(a)+.5,1),col="gray",xaxt="n", main="Binomial Distribution") axis(1,at=c(0,1,2),c("zero","one","two"),font=2) dev.off() # back to normal-- will also close the graph par(mfrow=c(1,2)) hist(b,col="purple",density=45,freq=F,xlab="possible values",xlim=c(-1,2)) plot(density(b),col="purple",main="Density of b",xlab="possible values",bty="n") polygon(x=c(density(b)$x,rev(density(b)$x)), y=c(density(b)$y,rep(0,length.out=length(density(b)$y))), col="purple",density=45) par(mfrow=c(1,1)) ## notice that the densities are different -- why is that? Think about it. ## (Change the breaks on the histogram to bin the observations differently ## to get a better idea of what's up.)