# README and TEST PROGRAMS FOR USING THE LEAST-RECTANGLE DISTANCE FOR # EVALUATING THE SIMILARITY BETWEEN PROFILES. # When using this function, please cite: # Rooman M., Albert J., Dehouck Y., Haye A., Detection of perturbation phases # and developmental stages in organisms from DNA microarray time series # data. PlosOne (2011) in press. # To use the least-rectangle distance, # first install R (http://www.r-project.org/) and # check that the GNU C compiler is installed. On MacOSX, you need # therefore to install XCODE (http://developer.apple.com/xcode/) and # on Windows, RTools (http://www.murdoch-sutherland.com/Rtools/). # Then compile the C-code of the least-rectangle (LR) distance # contained in the file LR_distance.c. There are two possibilities: # Possibility 1, outside R, type de command: # R CMD SHLIB -o LR_distance.so LR_distance.c # Possibility 2, in R, type: # system("R CMD SHLIB -o LR_distance.so LR_distance.c"); # In R, you can now load the LR_distance function: dyn.load("LR_distance.so"); LR_distance<-function(x,y,n,z){.C("LR_distance", as.double(x), as.double(y), as.integer(length(x)), as.integer(length(y)), as.double(0.0))[[5]] }; # Here are 3 small examples of R-programs that use the least-rectangle distance. # Example 1: computing the least-rectangle distance between two # profiles p1 and p2. The result is given in the variable LR_dis1. p1=c(6.0,9.8,3.1,4.0); p2=c(8.8,10.5,9.0,7.0); LR_dis1<-LR_distance(p1,p2); if(LR_dis1==(-1.0)) {cat("Error, I was unable to compute the distance\n"); }else cat("\nLR_distance between profiles p1 and p2=",LR_dis1,"\n"); # Expected result: LR_dis1=1.282238 # Example 2: loading 20 profiles of 12 time points from the file # "profiles.dat", and computing the least-rectangle distance between # every pair of profiles. Results are in the array LR_dis2. data<-read.table("profiles.dat"); dim<-dim(data); Nprof<-dim[1]; Ntime<-dim[2]; LR_dis2<-array(0.0,dim=c(Nprof,Nprof)); for(i in 1:Nprof){ for(j in 1:Nprof){ LR_dis2[i,j] <- LR_distance(data[i,],data[j,]) }} cat("\nLR_distance between the profiles contained in the file profile.dat\n") cat("Subset of the results:\n") for(i in 1:6){ cat("profile 1 - profile ",i,":",LR_dis2[1,i],"\n"); } # Subset of expected results: LR_dis2[1,1]=0.00000000; LR_dis2[1,2]=0.45744210; # LR_dis2[1,3]=0.034370653; LR_dis2[1,4]=0.178928584; # LR_dis2[1,5]=0.44737025; LR_dis2[1,6]=0.46115618 # Example 3: loading 20 profiles of 12 time points from the file # "profiles.dat", and computing the average least-rectangle distance # between all pairs of subprofiles containing 4 (Nseg=4) consecutive # time points. Results are in the vector LR_dis3. A plot of LR_dis3 as # a function of the time points is also given. Nseg=4; data<-read.table("profiles.dat"); dim<-dim(data); Nprof<-dim[1]; Ntime<-dim[2]; LR_dis3<-array(0.0,dim=c(Ntime-3)); for(t in 1:(Ntime-Nseg+1)){ tt <- t+Nseg-1; k<- 0.0; for(i in 1:(Nprof-1)){ for(j in (i+1):Nprof){ k <- k+LR_distance(data[i,t:tt],data[j,t:tt]); }} LR_dis3[t] <- k*2.0/(Nprof*(Nprof-1.0)); } cat("\nAverage LR_distance between segments of profiles, containing 4 consecutive time points.\nThe profiles are read from the file profile.dat\n") for(t in 1:(Ntime-Nseg+1)){ cat("profile segment",t,"-",t+Nseg-1,":",LR_dis3[t],"\n"); } LR_dis3; str <- paste("Profile segment length = ",Nseg); plot(LR_dis3,type="b",xlab="Time points",ylab="Average least rectangle distance",main=str); # Expected results: LR_dis3[1,1]=0.01758350; LR_dis3[1,2]=0.05184719; # LR_dis3[1,3]=0.04808730; LR_dis3[1,4]=0.07048895; LR_dis3[1,5]=0.10838896; # LR_dis3[1,6]=0.08694900; LR_dis3[1,7]=0.13132768; LR_dis3[1,8]=0.13806445; # LR_dis3[1,9]=0.16128991 # Note that you can run these 3 example programs simply by typing the # command source("README_and_TESTPROGRAMS") in R. ##############################################################################