#################################################################################################################
################################################################################################################# MODULE 3
#The simulated variance of the sampling distribution decreases as sample size increases
#and is very close to the estimate of s/sqrt(n) at large sample sizes
sim.length=10000

#As before GENERATE A HYPOTHETICAL POULATION OF MEASUREMENTS
pop.size=100000
mu=2
sigma=1
hypo.pop=rnorm(pop.size,mu,sigma)

#Select a set of different sample sizes with which to explore our populations with
steps=c(3,6,100,1000)
#steps=c(3,12,50,100)



#GET THE PARAMETERS OF THAT POPULATION
#mu=mean(hypo.pop)
#sigma=sd(hypo.pop)

#Create a new dataSummary table
dataSummary=data.frame(array(,c(5,4)))    
colnames(dataSummary)=c("n","mean","Stdev","S.E. Analytical")
dataSummary[1,1]="POP"
dataSummary[1,2]=mu
dataSummary[1,3]=sigma
dataSummary[1,4]=NA
dataSummary
###############Q: why is the mean and sd not what I have set it too?? u is not 0, s is not 1....is this just rounding error?
##is this close enought????


#PLOT THE HISTOGRAM of the population
par(mfrow=c(2,3))
hist(hypo.pop,main="Population distribution",xlab="X",xlim=c(min(hypo.pop),max(hypo.pop)))


####################################################################################
############################################################### MODULE 3 SIMULATIONS
##NOW We will see what happends to the mean and SE as we start with a SAMPLE of low n, 
#and take repeat samples with succesively larger values of n.

##Simulation 1, using the first sample size from "steps"
n=steps[1]
sample.means=array(,sim.length)  
for (i in 1:sim.length){
  sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n 
}

hist(sample.means,main=paste("n=",steps[1]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop)))
dataSummary[2,1]=steps[1]
dataSummary[2,2]=mean(sample.means)   
dataSummary[2,3]=sd(sample.means)
dataSummary[2,4]=sigma/sqrt(n)
dataSummary

##Simulation 2, using the 2nd sample size from "steps"
n=steps[2]
sample.means=array(,sim.length)  
for (i in 1:sim.length){
  sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n 
}

hist(sample.means,main=paste("n=",steps[2]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop)))
dataSummary[3,1]=steps[2]
dataSummary[3,2]=mean(sample.means)   
dataSummary[3,3]=sd(sample.means)
dataSummary[3,4]=sigma/sqrt(n)
dataSummary

##Simulation 3, using the 3rd sample size from "steps"
n=steps[3]
sample.means=array(,sim.length)  
for (i in 1:sim.length){
  sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n 
}

hist(sample.means,main=paste("n=",steps[3]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop)))
dataSummary[4,1]=steps[3]
dataSummary[4,2]=mean(sample.means)   
dataSummary[4,3]=sd(sample.means)
dataSummary[4,4]=sigma/sqrt(n)
dataSummary

##Simulation 4, using the 4th sample size from "steps"
n=steps[4]
sample.means=array(,sim.length)  
for (i in 1:sim.length){
  sample.means[i]=(sum(sample(hypo.pop,n,replace=F)))/n 
}

hist(sample.means,main=paste("n=",steps[4]),xlab="MEAN_X",xlim=c(min(hypo.pop),max(hypo.pop)))
dataSummary[5,1]=steps[4]
dataSummary[5,2]=mean(sample.means)   
dataSummary[5,3]=sd(sample.means)
dataSummary[5,4]=sigma/sqrt(n)
dataSummary