# Statistical Methods in Medical Research -
# Laaketieteellisen tutkimuksen tilastolliset menetelmat
# At University of Helsinki
# 4.9.2022
# Matti Pirinen
###
### 4. Learn R: Strings and Plotting I
###
#******************************
#
# 4.1 Strings of letters
#
#******************************
# We can make strings of letters or any characters, by using quotes " ".
txt = "Value of x is" #Assign a string to variable called 'txt'
txt
# We can combine strings of text and values of variables using paste() function
x = 22
str.from.paste = paste(txt, x) #this returns a character string "Value of x is 22"
str.from.paste #print it out
x = 33
paste("Now the value of x is", x)
# By default, paste combines the substrings with one white space as a separator.
# This can be changed by parameter sep = in paste command, where the separator
# symbol or string must be in quotes.
# For example, let's use colon:
paste("Now the value of x is", x, sep = ":")
#If you want to use paste without any separator (that is, with empty string "" as separator)
# you can use "paste0()" that is a shorthand for paste(, sep ="")
paste0("Now the value of x is", x)
# NOTE: To print the result from paste() in some contexts you may need to enclose paste within print()
# If you only want R to print out the text, you can also use "cat()" to "concatenate" text.
# This is shown on console but cannot be stored in a variable for later use.
cat("Now the value of x is", x, "\n") #\n marks a newline. It is needed at the end of cat.
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 4.1. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# (1) Make a variable 'x' that contains character string "My age is".
# Make another variable 'y' that has value 5.
# Use paste() function to combine these to string "My age is 5".
# (2) Change your paste command so that it uses " +" as separator and produces "My age is +5".
# (3) Write if sentences that print out "You are old" if value of y is over 5
# and otherwise they print out "You are still young". Try them with values 3 and 10 for y.
#******************************
#
# 4.2 Plotting Part I
#
#******************************
# Let's study some of the basic plotting options in R. (We'll do more some other time.)
# Let's plot a parabola y = -x^2 + 2*x for x = -1,...,1
x = seq(-1, 1, by = 0.1) #make a sequence -1, -0.9, -0.8, ... 0.8, 0.9, 1.0
y = -x^2 + 2*x #compute y corresponding to each x value
plot(x, y) #basic plot command shows points as circles and labels axes by the names of variables in the call
# The plot is at the bottom right corner of Rstudio.
# Get a better look by clicking "Zoom" from the panel above the plot.
# plotting type 't ='
plot(x, y, t = "l") #t = "l" connects points by a line
plot(x, y, t = "b") #t = "b" shows both the line and the points, zoom in to see lines
plot(x, y, t = "o") #t = "o" shows both the line and the points overplotted
plot(x, y, t = "p") #t = "p" shows only the points (default)
plot(x, y, t = "s") #t = "s" shows stairs
plot(x, y, t = "h") #t = "h" shows "histogram", vertical segments from 0
# point style 'pch =', color 'col =' and size 'cex ='
plot(x, y, pch = 1) #pch = "1" is the default circle
plot(x, y, pch = 2) #pch = "2" is a triangle up
plot(x, y, pch = 3) #pch = "3" is a cross
# There are 26 of these, as listed here:
# https://www.r-bloggers.com/2021/06/r-plot-pch-symbols-different-point-shapes-in-r/
# Each of them can also take a color, like
plot(x, y, pch = 3, col = "blue")
# and symbols 21,...,25 can also take a separate background color
plot(x, y, pch = 21, col = "blue", bg = "gray")
# Examples of ready-named colors are here:
# http://applied-r.com/r-color-tables/
# The points can be make larger (> 1) or smaller (< 1) by giving a multiplier with 'cex ='
plot(x, y, pch = 21, col = "blue", bg = "gray", cex = 2) #big points of twice the normal
plot(x, y, pch = 21, col = "blue", bg = "gray", cex = 0.5) #tiny points of half the normal
# Also other symbols can be used as plotting symbols by giving them to pch,
# for example let's use "A"
plot(x, y, pch = "A", col = "darkgreen", cex = 0.8)
# Plotting range 'xlim =' and 'ylim ='
# Let's make x-axis to go only from -0.5 to 0.5 and y-axis from -5 to 3
plot(x, y, xlim = c(-0.5,0.5), ylim = c(-5,3))
#Title 'main =', and axes names 'xlab =' and 'ylab ='
#Let's make plot have title 'Relationship between u and v'
# and label x-axis as 'variable u' and y-axis as 'variable v'
plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v")
# Subtitle 'sub='
# Adding the mean of "v" as a subtitle using paste function
plot(x, y, main = "Relationship between u and v",
xlab = "variable u", ylab = "variable v",
sub = paste("mean(v) =",mean(y)))
# This is not neat as there are too many decimals shown. Let's round to 2 decimals:
plot(x, y, main = "Relationship between u and v",
xlab = "variable u", ylab = "variable v",
sub = paste("mean(v) =",round(mean(y),2)))
# When you want to increase/decrease the size of the labels or title, you can use
# 'cex.lab =', 'cex.main = ' and 'cex.sub ='.
# For example, let's make title smaller and axes labels larger
plot(x, y, main = "Relationship between u and v",
xlab = "variable u", ylab = "variable v",
sub = paste("mean(v) =",round(mean(y),2)),
cex.main = 0.7, cex.lab = 2)
# We run into problem where y-axis label doesn't fit the area anymore.
# Let's change the margins. This happens via 'par()' function where we will
# set parameter 'mar =' that is the four margins of the plot in the order
# bottom, left, top, right.
# Their defaults are mar = c(5, 4, 4, 2) + 0.1
# We will increase the left margin to 5 and decrease the top margin to 2
par(mar = c(5, 5, 2, 2) + 0.1)
plot(x, y, main = "Relationship between u and v",
xlab = "variable u", ylab = "variable v",
sub = paste("mean(v) =",round(mean(y),2)),
cex.main = 0.7, cex.lab = 2)
# Now it fits.
# We may also want to increase the values on the axes by 'cex.axis ='
par(mar = c(5, 5, 2, 2) + 0.1)
plot(x, y, main = "Relationship between u and v",
xlab = "variable u", ylab = "variable v",
sub = paste("mean(v) =",round(mean(y),2)),
cex.main = 0.7, cex.lab = 2, cex.axis = 1.5)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 4.2. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
#(1)
#Plot cumulative distribution function of binomial distribution with size = 30 and prob = 0.3,
#for values of x = 0,...,50, with the following properties
# Use both line and points
# Make points smaller size by a factor of 0.7
# Make plotting symbol a solid square
# Use color "magenta" for both the points and the line
# Add x-axis label "successes" and y-axis label "cumulative probability" and increase labels by a factor of 1.4
# Add title "Bin(30,0.3)" and make it larger by a factor of 1.3
# Increase the values on the axes by a factor of 1.5
#
#(2)
# Set margins so that we have 6 units at the bottom, 4 on the top, 1 on right and 5 on left
# Redraw the figure and add a subtitle which uses paste() command to combine text
# "95% of obs. are <=" with the appropriate value as outputted from qbinom() function.
# (Thus, do not enter the appropriate value, 13, manually.)
#
##
### ANSWERS
##
#
#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 4.1.
#*#*#*#*#*#*#*#*#*#*#*#
# (1) Make a variable 'x' that contains character string "My age is".
# Make another variable 'y' that has value 5.
# Use paste() function to combine these to string "My age is 5".
x = "My age is"
y = 5
paste(x,y)
# (2) Change your paste command so that it uses " +" as separator and produces "My age is +5".
paste(x,y, sep = " +")
# (3) Write if sentences that print out "You are" y "years old" is value of y is over 5
# and otherwise they print out "You are still young". Try them with values 3 and 10 for y.
y = c(3,10)[1] #choose with index 1 or 2
if(y > 5) {paste("You are",y,"years old")}
if( y <= 5) {paste("You are still young")} #Could also use 'print()' here in place of 'paste()'
#*#*#*#*#*#*#*#*#*#*#
# Test Yourself 4.2.
#*#*#*#*#*#*#*#*#*#*#
#(1)
#Plot cumulative distribution function of binomial distribution with size = 30 and prob = 0.3,
#for values of x = 0,...,50, with the following properties
# Use both line and points
# Make points smaller size by a factor of 0.7
# Make plotting symbol a solid square
# Use color "magenta" for both the points and the line
# Add x-axis label "successes" and y-axis label "cumulative probability" and increase labels by a factor of 1.4
# Add title "Bin(30,0.3)" and make it larger by a factor of 1.3
# Increase the values on the axes by a factor of 1.5
x = 0:50
y = pbinom(x, size = 30, prob = 0.3)
plot(x,y, t="b", cex = 0.7, pch = 15, col = "magenta",
xlab = "successes", ylab = "cumulative probability",
cex.lab = 1.4, main = "Bin(30, 0.3)", cex.main = 1.3, cex.axis = 1.5)
#(2)
# Set margins so that we have 6 units at the bottom, 4 on the top, 1 on right and 5 on left
# Redraw the figure and add a subtitle which uses paste() command to combine text
# "95% of obs. are <=" with the correct value from qbinom() function.
par(mar = c(6,5,4,1))
plot(x, y, t = "b", cex = 0.7, pch = 15, col = "magenta",
xlab = "successes", ylab = "cumulative probability",
cex.lab = 1.4, main = "Bin(30, 0.3)", cex.main = 1.3, cex.axis = 1.5,
sub = paste("95% of obs. are <=", qbinom(0.95, size = 30, prob = 0.3)))