# Statistical Methods in Medical Research 
# Laaketieteellisen tutkimuksen tilastolliset menetelmat
# At University of Helsinki
# 17.8.2024
# Matti Pirinen


###
### 5. Learn R: For-loop, packages, random number generator
###


#******************************
#
#   5.1 FOR-loop
#
#******************************


# For-loop runs through a set of values and repeats a given task for each value, one by one.
# For example, if we want that a variable x gets values 1,2,3,...,10 sequentially,
# we can make that happen in R by stating 'for(x in 1:10)' and then add within symbols '{ }'
# a code block that will be run for each value of 'x' 

for(x in 1:10){ #x is a variable that runs through the sequence 1,...,10, one value at a time
    txt = paste("index =",x, "and its square is",x^2)
    print(txt)
}
# If we use paste in for-loop, we need to explicitly print() within the for-loop
# since otherwise the output of paste( ) doesn't appear on screen.

# The values in the vector to run through can be also something else than integers
vals = c(1, 10.2, 100.99, 1000.12) # make a vector of values
for(ii in vals){ # index can have any name. ii, jj, kk are common names, above it was 'x'.
    if(ii <= 100) {print(paste("Index is still small:",ii))}
    if(ii > 100) {print(paste("index =",ii, "and its square is",ii^2))}
}
# Note that after the for-loop, the index variable keeps its last value.
ii


#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 5.1. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#


#(1) Write a for-loop that writes 1/x on screen where
#    x goes through values from 2 to 5.
#
#(2) Generate a vector 'x' of length 10 that is full of value 'NA' initially.
#    Write a for-loop that fills 'x' by setting x[i] = i+1 for all i = 1,...,10.
#    Show the value of x after the loop has finished.


#*****************************
#
# 5.2 Install Packages in R
#
#*****************************

# To install package "pwr" for power calculations in Lecture 5, type
install.packages("pwr")
# and choose any R-site from the menu to download the package.
# It should install it automatically but it may ask you the directory where to put it.
# You can accept the default location.

# Sometimes the install.package() command doesn't go through because there is some other package
# that is needed first and R doesn't install it by default.
# In that case, you can add 'dependencies = TRUE' to install all the dependencies
install.packages("pwr", dependencies = TRUE) #likely not needed for pwr, but here used as an example


#*******************************
#
# 5.3 Random number generation
#
#*******************************

# We have used functions rbinom() and rnorm() to generate samples from binomial and Normal distributions.
# But how is "randomness" generated with a computer?
# Our "random" samples are actually not truly random but completely deterministically
# generated values. They just behave so much as true random samples from these distributions
# that we can't tell a difference between those values and true randomness.

# The random number generation happens so that we fix a seed number for the random number generator (RNG)
# by 'set.seed()' command
set.seed(85) # this can be any value, here it is 85
# When we now ask for a new random value from, say Uniform distribution, R applies certain deterministic
# formula to the current state of the random number generator (here 85) and the generator gets a new state
runif(1) # this produces a "random" value from uniform and also changes the state of RNG
# next application of runif() will then produce a different value and again change the state of RNG and so on
runif(2) #Let's ask for 2 more uniform values
#If we ask for 3 more values, they look again "random" compared to previous ones
runif(3)
# But if we would now change the seed back to 85, we would get exactly the same 6 "random" 
# values we have seen above and in the same order.
set.seed(85)
runif(6)
# Thus, if you set the seed when you start working on some problem 
# you can reproduce your results exactly, even when they involve "random" numbers.
# This is a useful property when trying to isolate problems in the code,
# or when needing to produce reproducible results with methods that involve random sampling.

# In the lecture notes of this course, there is a set.seed() command in the beginning of each lecture 
# so that the results will look the same every time the document is compiled.
# Otherwise, e.g., the means and SDs of the sampled data sets varied between runs
# and it would become more complicated to write about the results in the lecture notes.

# If you open up R but don't set the seed yourself, then
# a new one is created from the current time and the process ID of your R session. 
# Hence, by default, different sessions will give different simulation results
# and the variation between the results from different R sessions 
# mimics true randomness as far as we can tell.


#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 5.3. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#

#(1)  Set the seed of the random number generator to value '14'.
#(2)  Generate 50 random values from the standard Normal distribution N(0,1) 
#     and compute the standard deviation of this sample.
#(3)  Repeat part (2) without setting the seed to a new value.
#(4)  Set seed again to value '14' and repeat part (2). 
#     Compare the three estimates of SD from a sample of n = 50.


#
##
### ANSWERS
##
#


#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D5.1. 
#*#*#*#*#*#*#*#*#*#*#


#(1) Write a for-loop that writes 1/x on screen where
#    x goes through values from 2 to 5.

for(x in 2:5){
    print(1/x) #print is enough here, no need for paste, although it wouldn't harm either
}


#(2) Generate a vector 'x' of length 10 that is full of value 'NA' initially.
#    Write a for loop that fills 'x' by setting x[i] = i+1 for all i = 1,...,10.

x = rep(NA, 10)
for(i in 1:10){
    x[i] = i+1
}
x #show x after the for loop


#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D5.3. 
#*#*#*#*#*#*#*#*#*#*#

#(1)  Set the seed of the random number generator to value '14'.

set.seed(14)


#(2)  Generate 50 random values from the standard Normal distribution N(0,1) 
#     and compute the standard deviation of this sample.

sd(rnorm(50,0,1))

#(3)  Repeat part (2) without setting the seed to a new value.

sd(rnorm(50,0,1))


#(4)  Set seed again to value '14' and repeat part (2). 
#     Compare the three estimates of SD from a sample of n = 50.

set.seed(14)
sd(rnorm(50,0,1))

# We see that the SD estimate from the first data set is the same as from the 3rd data set
# but different from the data set 2. This is because the 1st and the 3rd data sets were
# generated starting from the same state of the random number generator and hence
# the same rnorm() command produced exactly the same data sets in both cases.
# The 2nd data set was generated from a different state of the random number generator
# and hence it was a different data set from the other two sets.