################################################################################################################# ################################################################################################################# MODULE 3 #The simulated variance of the sampling distribution decreases as sample size increases #and is very close to the estimate of s/sqrt(n) at large sample sizes sim.length=10000 #As before GENERATE A HYPOTHETICAL POULATION OF MEASUREMENTS pop.size=100000 mu=2 sigma=1 hypo.pop=rnorm(pop.size,mu,sigma) #Select a set of different sample sizes with which to explore our populations with steps=c(3,6,100,1000) #steps=c(3,12,50,100) #GET THE PARAMETERS OF THAT POPULATION #mu=mean(hypo.pop) #sigma=sd(hypo.pop) #Create a new dataSummary table dataSummary=data.frame(array(,c(5,4))) colnames(dataSummary)=c("n","mean","Stdev","S.E. Analytical") dataSummary[1,1]="POP" dataSummary[1,2]=mu dataSummary[1,3]=sigma dataSummary[1,4]=NA dataSummary ###############Q: why is the mean and sd not what I have set it too?? u is not 0, s is not 1....is this just rounding error? ##is this close enought???? #PLOT THE HISTOGRAM of the population par(mfrow=c(2,3)) hist(hypo.pop,main="Population distribution",xlab="X",xlim=c(min(hypo.pop),max(hypo.pop))) #################################################################################### ############################################################### MODULE 3 SIMULATIONS ##NOW We will see what happends to the mean and SE as we start with a SAMPLE of low n, #and take repeat samples with succesively larger values of n. ##Simulation 1, using the first sample size from "steps" n=steps[1] sample.means=array(,sim.length) for (i in 1:sim.length){ sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n } hist(sample.means,main=paste("n=",steps[1]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop))) dataSummary[2,1]=steps[1] dataSummary[2,2]=mean(sample.means) dataSummary[2,3]=sd(sample.means) dataSummary[2,4]=sigma/sqrt(n) dataSummary ##Simulation 2, using the 2nd sample size from "steps" n=steps[2] sample.means=array(,sim.length) for (i in 1:sim.length){ sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n } hist(sample.means,main=paste("n=",steps[2]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop))) dataSummary[3,1]=steps[2] dataSummary[3,2]=mean(sample.means) dataSummary[3,3]=sd(sample.means) dataSummary[3,4]=sigma/sqrt(n) dataSummary ##Simulation 3, using the 3rd sample size from "steps" n=steps[3] sample.means=array(,sim.length) for (i in 1:sim.length){ sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n } hist(sample.means,main=paste("n=",steps[3]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop))) dataSummary[4,1]=steps[3] dataSummary[4,2]=mean(sample.means) dataSummary[4,3]=sd(sample.means) dataSummary[4,4]=sigma/sqrt(n) dataSummary ##Simulation 4, using the 4th sample size from "steps" n=steps[4] sample.means=array(,sim.length) for (i in 1:sim.length){ sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n } hist(sample.means,main=paste("n=",steps[4]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop))) dataSummary[5,1]=steps[4] dataSummary[5,2]=mean(sample.means) dataSummary[5,3]=sd(sample.means) dataSummary[5,4]=sigma/sqrt(n) dataSummary