I found it refreshing and put a gist below.
Pick one language, and learn it well!
pick up a dataset, and play with it! Happy coding!
By the way, the food here at KBS is amazing, I am gaining weight :)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#2014 MSU NGS R basics tutorial | |
#http://angus.readthedocs.org/en/2014/R_Introductory_tutorial_2014.html | |
#https://github.com/jrherr/quick_basic_R_tutorial/blob/master/R_tutorial.md | |
#pick one language, and learn it well! | |
#pick up a dataset, play with it! | |
#object-oriented programming | |
#functional programming | |
#deal with big data in R: (R holds all the data in memory) | |
#http://theodi.org/blog/fig-data-11-tips-how-handle-big-data-r-and-1-bad-pun | |
#http://r-pbd.org/ | |
#packages: plyr, dplyr, ggplot2, reshape2, data.table (fread function) | |
# commands start here!! | |
q() # quit R | |
getwd() # get working directory | |
setwd() # set working directory | |
y<- 2 # assign a variable | |
x<- 3 | |
x + y # treat it as a calculator | |
x * y | |
x/y | |
x %% y | |
x == 3 # equal sign, will reture a logical vector: True or False | |
# in R, True and False have numerical values: True resolves to 1 , False resolves to 0 | |
# exponents ** or ^ | |
2**2 # returns 4 | |
2^2 # returns 4 | |
log(2.7) # natural log returns 0.99325 | |
log(4,2) # returns 2 | |
a<- c(2,3,6,8) # assign a vector use c denotes concatenate | |
b<- c(3,5,6,7) | |
a + b # | |
a * b # | |
# when length of a and b are different, R will recycle the longer one and gives a warning | |
length(a) # length of a, returns 4 | |
new_varaible <- c(a,b) # concatenate two variables | |
crap<- rep(1:100) # index starts at 1 | |
rm (crap) # remove this variable | |
?lm # get help for linear regression model function | |
# simple functions | |
a<- c(2,3,6,8) | |
mean(a) | |
sum(a) | |
var(a) | |
b<- c(3,5,6,7) | |
cor(a,b) # pearson correlation for two vectors | |
m<- cbind(a,b) # column bind the vector to a matrix | |
m | |
#> m | |
# a b | |
# [1,] 2 3 | |
# [2,] 3 5 | |
# [3,] 6 6 | |
# [4,] 8 7 | |
cor(m) # pearson correlation for columns of a matrix | |
cor(m, method="spearman") # spearman correlation of columns of a matrix | |
?cor | |
mode (a) # numeric | |
class (a) # numeric | |
class (m) # matrix | |
typeof(m) | |
str(m) # structure of m, try it out in your R console | |
dim(m) # dimension of m: 4 2 | |
nrow(m) # number of rows 4 | |
ncol(m) # number of columns 2 | |
length(m) # 8 | |
is.matrix(m) # True | |
# create a matrix from scratch | |
#> m1<- matrix(1:12,3,4) | |
#> m1 | |
# [,1] [,2] [,3] [,4] | |
#[1,] 1 4 7 10 | |
#[2,] 2 5 8 11 | |
#[3,] 3 6 9 12 | |
# strings | |
cities<- c("E.lansing", "Gainesville", "Shanghai", "Yichun") | |
class(cities) | |
#[1] "character" | |
length(cities) # 4 cities in the vector | |
nchar(cities) # number of characters for each city | |
#[1] 9 11 8 6 | |
sum(nchar(cities)) | |
#[1] 34 | |
rivers<- c("Red Cedar", "swamp", "Huang Pu", "Long He") | |
cities_rivers<- cbind (cities,rivers) | |
cities_rivers | |
# cities rivers | |
#[1,] "E.lansing" "Red Cedar" | |
#[2,] "Gainesville" "swamp" | |
#[3,] "Shanghai" "Huang Pu" | |
#[4,] "Yichun" "Long He" | |
class(cities_rivers) # matrix | |
mode (citeis_rivers) # character | |
model_1<- y ~ x1 + x2 + x1:x2 | |
model_1 | |
#> class(model_1) | |
#[1] "formula" | |
counts_transcript_a <- c(250, 157, 155, 300, 125, 100, 153, 175) | |
genotype <- gl(n=2, k=4, labels = c("wild_type", "mutant")) | |
#> genotype | |
#[1] wild_type wild_type wild_type wild_type mutant | |
#[6] mutant mutant mutant | |
#Levels: wild_type mutant | |
#alternative to gl function, one can | |
genotype1<- factor(rep(c("wild_type","mutant"),each=4)) | |
#> genotype1 | |
#[1] wild_type wild_type wild_type wild_type mutant | |
#[6] mutant mutant mutant | |
#Levels: mutant wild_type | |
#notice the use of the "each" argument | |
genotype1<- factor(rep(c("wild_type","mutant"),4)) | |
#> genotype1 | |
#[1] wild_type mutant wild_type mutant wild_type | |
#[6] mutant wild_type mutant | |
#Levels: mutant wild_type | |
#also, notice that the levels are different with that generated by gl function | |
#we want the wild_type to be the base level. Instead, do: | |
genotype1<- factor(rep(c("wild_type","mutant"),each=4), levels=c("wild_type","mutant")) | |
#> genotype1 | |
#[1] wild_type wild_type wild_type wild_type mutant mutant mutant mutant | |
#Levels: wild_type mutant | |
?relevel # try it also | |
expression_data <- data.frame(counts_transcript_a, genotype) | |
#> expression_data | |
# counts_transcript_a genotype | |
#1 250 wild_type | |
#2 157 wild_type | |
#3 155 wild_type | |
#4 300 wild_type | |
#5 125 mutant | |
#6 100 mutant | |
#7 153 mutant | |
#8 175 mutant | |
expression_data$counts_transcript_a # access a column of the dataframe | |
ls() # objects in the enviroment | |
rm(list=ls()) # remove all the objects in the enviroment | |
### write functions | |
StdErr <- function(vector) { | |
sd(vector)/sqrt(length(vector)) | |
} | |
CoefVar<- function(vector){ | |
sd(vector)/mean(vector) | |
} | |
# apply families http://nsaunders.wordpress.com/2010/08/20/a-brief-introduction-to-apply-in-r/ | |
# with | |
> with(expression_data, tapply(X=counts_transcript_a, INDEX=genotype, FUN=mean)) | |
wild_type mutant | |
215.50 138.25 | |
# some commonly used functions, try ?to understand them | |
head() # print the first 6 lines, different with linux (default 10 lines) | |
table() | |
rownames() | |
colnames() | |
nrow() | |
ncol() | |
by() | |
with() | |
rowSums() | |
rowMeans() | |
summary() | |
# construct sequences | |
one_to_20<- 1:20 | |
twenty_to_1<- 20:1 | |
seq1<- seq(from =1, to = 20, by 0.5) | |
# or seq1<- seq(1,20,0.5) | |
# repeat numbers | |
many_2<- rep(2, times=20) | |
many_a<- rep("a", times=20) | |
seq_rep<- rep(1:10, times=2) | |
rep_3_times<- rep(c(1,2,3), times=3) | |
# different | |
rep_each_3_times<- rep(c(1,2,3), each=3) | |
# to do: subsetting for vectors and matrix | |
# R mark-down |
No comments:
Post a Comment