# Statistical Methods in Medical research -
# Laaketieteellisen tutkimuksen tilastolliset menetelmat
# At University of Helsinki
# 2.9.2022
# Matti Pirinen
###
### Topic 2 Learn R: Matrices and data frames
###
#******************************
#
# 2.1 Matrices
#
#******************************
# MATRICES are two dimensional arrays that contain data in rows (rivit) and columns (sarakkeet).
# They can be generated from a vector of values
# by specifying how many rows we want
M = matrix(1:6, nrow = 2)
M # makes a matrix with 2 rows from 6 elements. Thus matrix has 3 columns.
#If we set byrow = TRUE, then matrix is filled by rows rather than by columns
matrix(1:6, nrow = 2, byrow = TRUE)
dim(M) #dimensions of M, first #rows then #columns
rowSums(M) #sums of each row
colSums(M) #sums of each column
sum(M) #total sum of all elements
#We can add names for rows and columns by rownames() and colnames() functions
rownames(M) = c("row1","row2")
colnames(M) = c("col1", "col2", "col3")
M
#Indexing matrix
M[1,2] #element at row = 1, col = 2
M[1,] #whole row 1 (returned as vector)
M[,2] #whole column 2 (returned as vector)
M[1:2,] #whole rows 1 and 2 (returned as matrix)
M[,c(1,3)] #whole columns 1 and 3
M[,-1] #matrix M without column 1 (minus sign means to remove column / row)
#We can also make a matrix by binding together rows or columns
rbind(c(1,2), c(3,4)) #making matrix by row binding
cbind(c(1,2), c(3,4)) #making matrix by column binding
#We can swap the rows of a matrix to become columns and columns to become rows
# by using 'transpose' function t( )
M #original matrix
t(M) #transposed matrix where rows and cols have been swapped
#Read about R matrices with some visualisations from here:
# https://www.guru99.com/r-matrix-tutorial.html
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D2.1. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
#(a) Generate matrix 'x' whose first row is 1,2,3,4 and second row is 4,3,2,1.
#(b) Use matrix indexing to show row 2 of x.
#(c) Use matrix indexing to show column 2 of x.
#(d) Use matrix indexing to show element of row = 2 and column = 3 of x.
#(e) Use matrix indexing to show columns 2 and 4 of x.
#(f) Use matrix indexing to show other columns except column 3 from x.
#(g) Compute row sums of matrix x.
#******************************
#
# 2.2 Data frames
#
#******************************
# The standard data structure in R is data frame, as we saw in Week 1 exercises.
# You can think it as a table of data with rows corresponding to samples (patients)
# and columns to measured values (name, age, blood pressure, bmi ...).
# What separates data frame from matrix is that matrix can have only numerical values,
# whereas data frames can have also text or other types of variables in them.
# To generate data frame, we simply specify the columns of data frame and give each
# a name. Each column should be of same length.
# Let's make a data frame with patient name, sex, bmi value and sbp value.
# Here we denote sex by letters "M" and "F".
x = data.frame(name = c("Esko","Alina","Mika"),
sex = c("M", "F", "M"),
bmi = c(29, 27, 21),
sbp = c(130, 120, 123),
stringsAsFactors = FALSE)
# Parameter "stringsAsFactors" defines whether text variables (here, name and sex)
# are read in as text (=FALSE) or as factor variables (=TRUE).
# Here we read them in as text.
x #show data frame
#Picking columns of data frame
# We can get the columns of a data frame by either using $columnname
x$sex
#or by using similar indexing as with matrix
x[,2]
#We can get rows by indexing
x[2,]
#And we can get columns also by their name when indexing the rows like this
x[2,"bmi"]
#Structure of data frame is given by
str(x)
# As matrix and data frame often look similar,
# you may want to check which one you are working with.
is.data.frame(x)
is.matrix(x)
# this one is data frame.
# We could transform the numerical part (columns 3 & 4)
# of this data frame into a matrix like this
x.mat = as.matrix(x[,3:4])
x.mat
is.data.frame(x.mat)
is.matrix(x.mat)
# Now this one is matrix. Note that this matrix has column names ('bmi','sbp') specified.
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D2.2. (Answers are at the end of this file.)
#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#*#
#(a) Generate data frame 'z' that contains data for 2 individuals.
# The individuals have values for the following variables
# ID, numerical patient code
# age, numerical value in years at hospital visit
# sex, letter "M" or "F"
# diabetes, diabetes status (0=no, 1=yes)
# Patient 1 has ID=887, age = 54, sex = "M", diabetes = 1
# Patient 2 has ID=633, age = 45, sex = "F", diabetes = 0
#(b) Show column for age from data frame z.
#(c) Show whole row for patient 1.
#(d) Show value of diabetes for patient 2.
#(e) Check the structure of data frame z.
#
##
### ANSWERS
##
#
#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D2.1.
#*#*#*#*#*#*#*#*#*#*#
#(a) Generate matrix 'x' whose first row is 1,2,3,4 and second row is 4,3,2,1.
x = rbind(1:4, 4:1)
#OR
x = matrix(c(1:4,4:1), ncol = 4, byrow = TRUE)
#OR
x = matrix(c(1,4,2,3,3,2,4,1), ncol = 4)
#(b) Use matrix indexing to show row 2 of x.
x[2,]
#(c) Use matrix indexing to show column 2 of x.
x[,2]
#(d) Use matrix indexing to show element of row = 2 and column = 3 of x.
x[2,3]
#(e) Use matrix indexing to show columns 2 and 4 of x.
x[,c(2,4)]
#(f) Use matrix indexing to show other columns except column 3 from x.
x[,-3]
#(g) Compute row sums of matrix x.
rowSums(x)
#*#*#*#*#*#*#*#*#*#*#
# Test Yourself D2.2.
#*#*#*#*#*#*#*#*#*#*#
#(a) Generate data frame 'z' that contains data for 2 individuals.
# The individuals have values for the following variables
# ID, numerical patient code
# age, numerical value in years at hospital visit
# sex, letter "M" or "F"
# diabetes, diabetes status (0=no, 1=yes)
# Patient 1 has ID=887, age = 54, sex = "M", diabetes = 1
# Patient 2 has ID=633, age = 45, sex = "F", diabetes = 0
z = data.frame( ID = c(887, 633),
age = c(54, 45),
sex = c("M","F"),
diabetes = c(1,0),
stringsAsFactors = FALSE)
z
#(b) Show column for age from data frame z.
#Any of these works
z[,"age"]
z$age
z[,2]
#(c) Show whole row for patient 1.
z[1,]
#(d) Show value of diabetes for patient 2.
z[2,"diabetes"]
#(e) Check the structure of data frame z.
str(z)