# Statistical Methods in Medical Research - # Laaketieteellisen tutkimuksen tilastolliset menetelmat # At University of Helsinki # 14.8.2023 # Matti Pirinen ### ### 4. Learn R: Strings and Plotting I ### #****************************** # # 4.1 Strings of letters # #****************************** # We can make strings of letters or any characters, by using quotes " ". txt = "Value of x is" #This assigns a string to variable called 'txt' txt # We can combine strings of text and values of variables using paste() function x = 22 str.from.paste = paste(txt, x) #this returns a character string "Value of x is 22" str.from.paste #print it out x = 33 paste("Now the value of x is", x) # By default, paste combines the substrings with one white space as a separator. # This can be changed by parameter sep = in paste command, where the separator # symbol or string must be in quotes. # For example, let's use colon: paste("Now the value of x is", x, sep = ":") #If you want to use paste without any separator (that is, with empty string "" as separator) # you can use "paste0()" that is a shorthand for paste(, sep ="") paste0("Now the value of x is", x) # NOTE: To print the result from paste() in some contexts you may need to enclose paste within print(), #. for example, this happens within for-loops that we will learn later. # If you only want R to print out the text, but not to assign the text into a variable, # you can also use "cat()" to "concatenate" text. # This is shown on console but cannot be stored in a variable for later use. cat("Now the value of x is", x, "\n") # last "\n" marks a newline. It is needed if you want cat() to close the line at the end. #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# # Test Yourself 4.1. (Answers are at the end of this file.) #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# # (1) Make a variable 'x' that contains character string "My age is". # Make another variable 'y' that has value 5. # Use paste() function to combine these to string "My age is 5". # (2) Change your paste command so that it uses " +" as separator and produces "My age is +5". # (3) Write if-sentences that print out "You are old" if value of y is over 5 # and otherwise they print out "You are still young". Try them with values 3 and 10 for y. #****************************** # # 4.2 Plotting Part I # #****************************** # Let's study some of the basic plotting options in R. (We'll do more some other time.) # Let's plot a parabola y = -x^2 + 2*x for x = -1,...,1 x = seq(-1, 1, by = 0.1) #make a sequence -1, -0.9, -0.8, ... 0.8, 0.9, 1.0 y = -x^2 + 2*x #compute y corresponding to each x value plot(x, y) #basic plot command shows points as circles and labels axes by the names of variables in the call # The plot is at the bottom right corner of Rstudio. # Get a better look by clicking "Zoom" from the panel above the plot. # plotting type 't =' plot(x, y, t = "l") #t = "l" connects points by a line plot(x, y, t = "b") #t = "b" shows both the line and the points, zoom in to see lines plot(x, y, t = "o") #t = "o" shows both the line and the points overplotted plot(x, y, t = "p") #t = "p" shows only the points (default) plot(x, y, t = "s") #t = "s" shows stairs plot(x, y, t = "h") #t = "h" shows "histogram", vertical segments from 0 # point style 'pch =', color 'col =' and size 'cex =' plot(x, y, pch = 1) #pch = "1" is the default circle plot(x, y, pch = 2) #pch = "2" is a triangle up plot(x, y, pch = 3) #pch = "3" is a cross # There are 26 of these, as listed here: # https://www.r-bloggers.com/2021/06/r-plot-pch-symbols-different-point-shapes-in-r/ # Each of them can also take a color, like plot(x, y, pch = 3, col = "blue") # and symbols 21,...,25 can also take a separate background color plot(x, y, pch = 21, col = "blue", bg = "gray") # Examples of ready-named colors are here: # http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf # The points can be make larger (> 1) or smaller (< 1) by giving a multiplier with 'cex =' plot(x, y, pch = 21, col = "blue", bg = "gray", cex = 2) #big points of twice the normal plot(x, y, pch = 21, col = "blue", bg = "gray", cex = 0.5) #tiny points of half the normal # Also other symbols can be used as plotting symbols by giving them to pch, # for example let's use "A" plot(x, y, pch = "A", col = "darkgreen", cex = 0.8) # Plotting range 'xlim =' and 'ylim =' # Let's make x-axis to go only from -0.5 to 0.5 and y-axis from -5 to 3 plot(x, y, xlim = c(-0.5,0.5), ylim = c(-5,3)) #Title 'main =', and axes names 'xlab =' and 'ylab =' #Let's make plot have title 'Relationship between u and v' # and label x-axis as 'variable u' and y-axis as 'variable v' plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v") # Subtitle 'sub=' # Adding the mean of "v" as a subtitle using paste function plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v", sub = paste("mean(v) =",mean(y))) # This is not neat as there are too many decimals shown. Let's round to 2 decimals: plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v", sub = paste("mean(v) =",round(mean(y),2))) # When you want to increase/decrease the size of the labels or title, you can use # 'cex.lab =', 'cex.main = ' and 'cex.sub ='. # For example, let's make title smaller and axes labels larger plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v", sub = paste("mean(v) =",round(mean(y),2)), cex.main = 0.7, cex.lab = 2) # We run into problem where y-axis label doesn't fit the area anymore. # Let's change the margins. This happens via 'par()' function where we will # set parameter 'mar =' that is the four margins of the plot in the order # bottom, left, top, right. # Their defaults are mar = c(5, 4, 4, 2) + 0.1 # We will increase the left margin to 5 and decrease the top margin to 2 par(mar = c(5, 5, 2, 2) + 0.1) plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v", sub = paste("mean(v) =",round(mean(y),2)), cex.main = 0.7, cex.lab = 2) # Now it fits. # We may also want to increase the values on the axes by 'cex.axis =' par(mar = c(5, 5, 2, 2) + 0.1) plot(x, y, main = "Relationship between u and v", xlab = "variable u", ylab = "variable v", sub = paste("mean(v) =",round(mean(y),2)), cex.main = 0.7, cex.lab = 2, cex.axis = 1.5) #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# # Test Yourself 4.2. (Answers are at the end of this file.) #*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*# #(1) #Plot cumulative distribution function of binomial distribution with size = 30 and prob = 0.3, #for values of x = 0,...,50, with the following properties # Use both line and points # Make points smaller size by a factor of 0.7 # Make plotting symbol a solid square # Use color "magenta" for both the points and the line # Add x-axis label "successes" and y-axis label "cumulative probability" and increase labels by a factor of 1.4 # Add title "Bin(30,0.3)" and make it larger by a factor of 1.3 # Increase the values on the axes by a factor of 1.5 # #(2) # Set margins so that we have 6 units at the bottom, 4 on the top, 1 on right and 5 on left # Redraw the figure and add a subtitle which uses paste() command to combine text # "95% of obs. are <=" with the appropriate value as outputted from qbinom() function. # (Thus, do not enter the appropriate value, 13, manually.) # ## ### ANSWERS ## # #*#*#*#*#*#*#*#*#*#*#*# # Test Yourself 4.1. #*#*#*#*#*#*#*#*#*#*#*# # (1) Make a variable 'x' that contains character string "My age is". # Make another variable 'y' that has value 5. # Use paste() function to combine these to string "My age is 5". x = "My age is" y = 5 paste(x,y) # (2) Change your paste command so that it uses " +" as separator and produces "My age is +5". paste(x,y, sep = " +") # (3) Write if sentences that print out "You are" y "years old" is value of y is over 5 # and otherwise they print out "You are still young". Try them with values 3 and 10 for y. y = c(3,10)[1] #choose with index 1 or 2 if(y > 5) {paste("You are",y,"years old")} if( y <= 5) {paste("You are still young")} #Could also use 'print()' here in place of 'paste()' #*#*#*#*#*#*#*#*#*#*# # Test Yourself 4.2. #*#*#*#*#*#*#*#*#*#*# #(1) #Plot cumulative distribution function of binomial distribution with size = 30 and prob = 0.3, #for values of x = 0,...,50, with the following properties # Use both line and points # Make points smaller size by a factor of 0.7 # Make plotting symbol a solid square # Use color "magenta" for both the points and the line # Add x-axis label "successes" and y-axis label "cumulative probability" and increase labels by a factor of 1.4 # Add title "Bin(30,0.3)" and make it larger by a factor of 1.3 # Increase the values on the axes by a factor of 1.5 x = 0:50 y = pbinom(x, size = 30, prob = 0.3) plot(x,y, t="b", cex = 0.7, pch = 15, col = "magenta", xlab = "successes", ylab = "cumulative probability", cex.lab = 1.4, main = "Bin(30, 0.3)", cex.main = 1.3, cex.axis = 1.5) #(2) # Set margins so that we have 6 units at the bottom, 4 on the top, 1 on right and 5 on left # Redraw the figure and add a subtitle which uses paste() command to combine text # "95% of obs. are <=" with the correct value from qbinom() function. par(mar = c(6,5,4,1)) plot(x, y, t = "b", cex = 0.7, pch = 15, col = "magenta", xlab = "successes", ylab = "cumulative probability", cex.lab = 1.4, main = "Bin(30, 0.3)", cex.main = 1.3, cex.axis = 1.5, sub = paste("95% of obs. are <=", qbinom(0.95, size = 30, prob = 0.3)))