First we control that all files in Toy_STARcounts directory can be
found in metadata
# We list all file in data/Toy_STARcounts/
ReadCountsFiles<-list.files("data/Toy_STARcounts/",pattern = ".out.tab")
# We remove "ReadsPerGene.out.tab" from the name to only have sampleID
ReadCountsFiles<-gsub("ReadsPerGene.out.tab","",ReadCountsFiles)
# We control that all file in metadata are present:
metadata$SampleID %in% ReadCountsFiles
## [1] TRUE TRUE TRUE TRUE TRUE TRUE
# We could test in we find a FALSE results:
any((metadata$SampleID %in% ReadCountsFiles)== FALSE)
## [1] FALSE
Then we write our function the read data from a sample ID
ReadCountFunction<-function(SampleID){
# skip = 4 allows to skip 4 rows
STARcounts<-read.table(paste("data/Toy_STARcounts/",SampleID,"ReadsPerGene.out.tab",sep=""),skip=4)
# We take the column 4
GeneCount<-STARcounts$V4
# We name our vector of counts
names(GeneCount)<-STARcounts$V1
# We return our vector of counts
return(GeneCount)
}
We then apply our function to all sampleID
counts<-sapply(metadata$SampleID,ReadCountFunction)
We have indeed a matrix of counts
## [1] "matrix" "array"
## T6N2 T6N3 T6N4 T6S1 T6S3 T6S4
## ENSMUSG00000102693 0 0 0 0 0 0
## ENSMUSG00000064842 0 0 0 0 0 0
## ENSMUSG00000051951 1436 1389 2166 1434 1865 1514
## ENSMUSG00000102851 0 0 0 0 0 0
## ENSMUSG00000103377 0 5 1 2 0 0
## ENSMUSG00000104017 0 3 0 1 0 0