BRAC@UMD

Rank Analysis of Microarrays (RAM)

##################################
#
# RANK ANALYSIS OF MICROARRAYS (RAM)
#
# The following two examples were given in in Lee, Gray, Bjorkbacka, Freeman.
# Generalized Rank Tests for Replicated Microarray Data, 
# Statistical Applications in Genetics and Molecular Biology,(2005),
# Vol. 4: No. 1, Article 3. http://www.bepress.com/sagmb/vol4/iss1/art3
#   
# This program was written by Dr. Robert J. Gray.
# User documentation was provided by Dr. Weiliang Qiu.
# ##################################

# For Unix version of 'permax',
# 'permax' should be installed in the home directory.

library(permax, lib.loc="~/.")
options(width=90)

##########################
# Example 1: One Treatment Applied to 4 Wild-type and 4 Knock-out mice
# (Two arrays made for each mouse)
#########################
# Read input data
# The data file 'data4ranksum.dat' has 3339 rows and 17 columns.
# The 1st row contains variable name. The 1st column contains gene id.
# 'data1' is a 3338 by 16 data frame.
# 'header=T" indates that the first row contains variable names.
# The rows correspond to genes that have complete observations from all 8 mice.
# The columns correspond to mice and their replicates.

data1<-read.table("data4ranksum.dat", header=T, row.names=1)

# Wild type mouse 1:4
# 'grp' indicates the column numbers in 'data1' corresponding to group 1
# (i.e. wild type mice 1, 2, 3, 4).
# The 1st replicates of these 4 mice correspond to columns 1, 2, 3, 4 of
# 'data1'; The 2nd replicates correspond to columns 9, 10, 11, 12 of 'data1'.
# 1:4 means 1, 2, 3, 4
# 9:12 means 9, 10, 11, 12
# 'grp' is (1,2,3,4,9,10,11,12)

grp <- c(1:4,9:12) 

# cluster=cy3,cy5 from each mouse
# 'clust' is a vector of cluster membership indicators for the columns 
# of 'data1'
# That is, columns 1 and 9 are from cluster 1 
#          columns 2 and 10 are from cluster 2 
#          ................................
#          columns 8 and 16 are from cluster 8 
# 'clust' is (1,2,3,4,5,6,7,8,1,2,3,4,5,6,7,8)

clust <- c(1:8,1:8)

# call the function permax()
#
# for each gene (each with two replicates), calculate the generalized ranksum 
# statistic by using all possible combinations of permuations.
#
# mouse  W1  W2  W3  W4  K1  K2  K3  K4
#repeat1 X11 X21 X31 X41 Y11 Y21 Y31 Y41
#repeat2 X12 X22 X32 X42 Y12 Y22 Y32 Y42
#
# Wi means wild-tye mouse i, i=1,2,3,4
# Kj means knock-out mouse j, j=1,2,3,4
#
# e.g. rank(X11) is among all 16 observations: X11, X21, X31, X41, Y11, Y21,
# Y31, Y41, X12, X22, X32, X42, Y12, Y22, Y32, Y42.
#
# We can randomly assign 4 mice as wild-type 
# and the remaining 4 mice as knock-out.
# There are a total of 70 (8 choose 4) possible permutations.
#
# The 16 columns of 'data1' correspond to
# X11 X21 X31 X41 Y11 Y21 Y31 Y41 X12 X22 X32 X42 Y12 Y22 Y32 Y42
#
# nperm=0 means all possible combinations are enumerated
#
# nl: A lower tail critical value is determined using the 'nl'th
#      largest value of the test statistics.  The number of genes
#      with statistics <= this critical value is determined for each
#      combination.  
# nr: An upper tail critical value is determined using the
#     'n-nr+1'st largest value of the test statistics, where 'n' is
#     the number of genes.  The number of genes with statistics >=
#     this critical value is determined for each combination.  
#
# 'cluster' is specified, 'stratify=FALSE', 'permute.cluster=TRUE'
# it is assumed that group or treatment is defined at the cluster level
# (that is, all members of each cluster are in the same group). In this case
# statistics are computed the same as in the independence case, but only
# whole clusters are permuted to preserve the within cluster dependence.

res1 <- permax(data=data1,ig1=grp,nperm=0, ranks=TRUE,cluster=clust,
             stratify=FALSE, permute.cluster=TRUE, nl=2,nr=5,expord=TRUE)

summary(res1)

# Plots statistics vs expected order statisics computed under the
#   permutation distribution for output from permax
# del: if 'del>0', then lines with slope=1 and intercepts '+del' and
#   '-del' will be added to the plot

plot.expord(x=res1,del=5)

######
# no ranks ('rank=FALSE'), i.e. use T-statistic
#####

res11 <- permax(data=data1,ig1=grp,nperm=0, ranks=FALSE,cluster=clust,
              permute.cluster=TRUE,nl=6,nr=23,expord=TRUE)
plot.expord(x=res11)


####################################
# Example 2: Two Treatments Applied to the Same Set of Mice
####################################

# read data
# The data file 'data4signedrank.dat' has 1324 rows and 25 columns.
# The 1st column is gene id.
# 'data2' is a 1324 by 24 data frame
# The rows correspond to genes.
# The columns correspond to intensity ratios of 6 mice and their replicates
# 'header=T" means the first row is variable names.
# 'row.names=1' means the first column of the file 'data4signedrank.dat' is
# row names.

data2<-read.table("data4signedrank.dat", header=T, row.names=1)

# 'clust12' is a vector of cluster membership indicators for the columns 
# of 'data2'
# That is, columns 1 and 7 are from cluster 1
#          columns 2 and 8 are from cluster 2
#          ................................
#          columns 6 and 12 are from cluster 6
#          columns 13 and 19 are from cluster 7
#          columns 14 and 20 are from cluster 8
#          ................................
#          columns 18 and 24 are from cluster 12
# 'clust' is (1,2,3,4,5,6,1,2,3,4,5,6,7,8,9,10,11,12,7,8,9,10,11,12)

clust12 <- c(1:6,1:6,7:12,7:12)

options(width=95)

# 'grp' indicates the column numbers in 'data2' corresponding to group 1
# (i.e. mice with treatment A).
# The 1st replicates of these 6 mice correspond to columns 1,...,6 of
# 'data2'; The 2nd replicates correspond to columns 13,...,18 of 'data2'.
# 'grp' is (1,2,3,4,5,6,13,14,15,16,17,18)

grp <- c(1:6,13:18) 


###
# RS1: The Generalized Ranksum Test (All 24 Ratios Ranked Together)
###
#        m1  m2  m3  m4  m5  m6  m1  m2  m3  m4  m5  m6
#repeat1 X11 X21 X31 X41 X51 X61 Y11 Y21 Y31 Y41 Y51 Y61
#repeat2 X12 X22 X32 X42 X52 X62 Y12 Y22 Y32 Y42 Y52 Y62
#
# We rank Xij, Yij, i=1,...,6, j=1,...,6 together (24 ranks)
# e.g. rank(X11) is the rank among X11, X21, X31,X41,X51, X61, Y11, Y21,
# Y31, Y41, Y51, Y61, X12, X22, X32, X42, X52, X62, Y12, Y22, Y32, Y42,
# Y52, Y62.
#
# For each pair (X11, Y11), (X21, Y21), (X31, Y31), (X41, Y41), (X51, Y51),
# (X61, Y61), (X12, Y12), (X22, Y22), (X32, Y32), (X42, Y42), (X52, Y52),
# (X62, Y62), we have two possible permutations.
# e.g.
# Permuation1: X11 is the score from treatment A, 
#              Y11 is the score from treatment B
# Permuation2: Y11 is the score from treatment A, 
#              X11 is the score from treatment B
# There are totally 2^12=4096 possible permutations.
# cluster on mouse-cy combination, using ranks
#
# 
# The 24 columns of 'data2' correspond to
# X11 X21 X31 X41 X51 X61 Y11 Y21 Y31 Y41 Y51 Y61 X12 X22 X32 X42 X52 X62 
# Y12 Y22 Y32 Y42 Y52 Y62
# 
# 'cluster' is specified, 'stratify=FALSE', 'permute.cluster=FALSE'
# statistics are computed as in the independence case, but columns are only
# permuted within clusters, since under the null it is assumed that
# columns from the same cluster are exchangeable, while columns from
# different clusters may not be.

res2.RS1 <- permax(data=data2,ig1=grp,nperm=0,min.np=0,logs=TRUE,
               ranks=TRUE, cluster=clust12,stratify=FALSE,signed.rank=FALSE,
               nl=38,nr=18, expord=TRUE, permute.cluster=FALSE)
summary(object=res2.RS1,nl=38,nr=18)


#####
# ST: Signed Rank Test for Blocked Comparison of Two Treatments
#####
#         mouse1     mouse2     mouse3     mouse4     mouse5      mouse6
#repeat1 (X11, Y11) (X21, Y21) (X31, Y31) (X41, Y41) (X51, Y51) (X61, Y61)
#repeat2 (X12, Y12) (X22, Y22) (X32, Y32) (X42, Y42) (X52, Y52) (X62, Y62)
#
# sr1=sign(Y11-X11)*rank(|Y11-X11|), sr2=sign(Y21-X21)*rank(|Y21-X21|), ...
# sr12=sign(|Y62-X62|)*rank(|Y62-X62|)
# 
# Analysis is based on the signed ranks sr1, sr2, ..., sr12
#
# For each pair (Xij, Xij), there are two possible permutations.
# Permuation1: Xij is the score for treament A, Yij is the score for teatment B
# Permuation2: Yij is the score for treament A, Xij is the score for teatment B
# There are totally 2^12=4096 possible permutations.
# signed-rank test 
#
#
# 'cluster' is specified, 'stratify=FALSE', 'permute.cluster=FALSE'
# statistics are computed as in the independence case, but columns are only
# permuted within clusters, since under the null it is assumed that
# columns from the same cluster are exchangeable, while columns from
# different clusters may not be.
#
# 'signed.rank=TRUE' indicates to use signed rank test

res2.ST <- permax(data=data2,ig1=grp,nperm=0,min.np=0,logs=TRUE,
                 ranks=TRUE, cluster=clust12,stratify=FALSE,signed.rank=TRUE,
                 nl=34,nr=21, expord=TRUE, permute.cluster=FALSE)
summary(object=res2.ST, nl=34, nr=21)

#####
# RS2: The Generalized Ranksum Test (Separate Ranking of Two Sets of 12
#      Intensity Ratios)
#####
#        m1  m2  m3  m4  m5  m6  m1  m2  m3  m4  m5  m6
#repeat1 X11 X21 X31 X41 X51 X61 Y11 Y21 Y31 Y41 Y51 Y61
#repeat2 X12 X22 X32 X42 X52 X62 Y12 Y22 Y32 Y42 Y52 Y62
#
# We rank Xi1, Yj1, i=1,...,6, j=1,...,6 together (12 ranks)
# e.g. rank(X11) is the rank among X11, X21, X31,X41,X51, X61, Y11, Y21,
# Y31, Y41, Y51, Y61.
#
# We rank Xi2, Yi2, i=1,...,6, j=1,...,6 together (12 ranks)
# e.g. rank(X12) is the rank among X12, X22, X32,X42,X52, X62, Y12, Y22,
# Y32, Y42, Y52, Y62.
#
# For each pair (X11, Y11), (X21, Y21), (X31, Y31), (X41, Y41), (X51, Y51),
# (X61, Y61), (X12, Y12), (X22, Y22), (X32, Y32), (X42, Y42), (X52, Y52),
# (X62, Y62), we have two possible permutations.
# e.g.
# Permuation1: X11 is the score from treatment A, 
#              Y11 is the score from treatment B
# Permuation2: Y11 is the score from treatment A, 
#              X11 is the score from treatment B
# There are a total of 2^12=4096 possible permutations.
#
# cluster on mouse-cy combination, separately ranking cy3 and cy5 values
# 'data2[,1:12]' is the first 12 columns of the data frame 'data2'
# 'data2[,13:24]' is a matrix consists of the columns 13, 14,...,24 of the
# data frame 'data2'
#
# 'data3' is a 1324 by 24 matrix. rows correspond to genes.
# Elements of 'data3' are ranks.
# e.g. 'data3[1,2]' is the rank of X21 for gene 1 among the 12 scores 
# X11, X21, X31, X41, X51, X61, Y11, Y21, Y31, Y41, Y51, Y61.
# 'data[3, 14]' is the rank of X12 for gene 3 among the 12 genes
# X12, X22, X32, X42, X52, X62, Y12, Y22, Y32, Y42, Y52, Y62.
#
# The command 'apply(data2[,1:12], 1, rank)' means that we rank elements of
# each row of the matrix 'data2[,1:12]'.
# The function 't(A)' means we get the transpose of the matrix 'A'.
# The command 'apply(data2[,1:12], 1, rank)' will produce 12 by 1324 matrix
# instead of 1324 by 12 matrix.

data3 <- cbind(t(apply(data2[,1:12],1,rank)),t(apply(data2[,13:24],1,rank)))

dimnames(data3) <- dimnames(data2)

# 'cluster' is specified, 'stratify=FALSE', 'permute.cluster=FALSE'
# statistics are computed as in the independence case, but columns are only
# permuted within clusters, since under the null hypothesis it is assumed that
# columns from the same cluster are exchangeable, while columns from
# different clusters may not be.

res2.RS2 <- permax(data=data3,ig1=grp,nperm=0,min.np=0,logs=FALSE,
                   ranks=TRUE, cluster=clust12,stratify=FALSE,
                   signed.rank=FALSE,nl=33,nr=18, expord=TRUE,
                   permute.cluster=FALSE)

summary(object=res2.RS2,nl=33,nr=18)