Commit 42c6c32f authored by Bertjan Broeksema's avatar Bertjan Broeksema
Browse files

Add the possibility to include clustering results from external tools.

parent 04cf8430
...@@ -2,6 +2,7 @@ PrepareDataForInteractiveBinning <- function(dataset.name, ...@@ -2,6 +2,7 @@ PrepareDataForInteractiveBinning <- function(dataset.name,
file.fasta, file.fasta,
file.abundance, file.abundance,
file.escg, file.escg,
file.clusterings = NULL,
dir.result) { dir.result) {
# Process a fasta file and an abundance file, in order to generate the two rda # Process a fasta file and an abundance file, in order to generate the two rda
# files which are required for the interactive contig binning system. # files which are required for the interactive contig binning system.
...@@ -46,12 +47,20 @@ PrepareDataForInteractiveBinning <- function(dataset.name, ...@@ -46,12 +47,20 @@ PrepareDataForInteractiveBinning <- function(dataset.name,
warning("Not all contig identifiers from the fasta and the abundance file are equal.") warning("Not all contig identifiers from the fasta and the abundance file are equal.")
return(FALSE) return(FALSE)
} }
data <- merge(data, abundance, by="CONTIG")
cluster.results <- NA
if (!is.null(file.clusterings)) {
cluster.results <- read.csv(file.clusterings)
names(cluster.results) <- toupper(names(cluster.results))
stopifnot("CONTIG" %in% names(cluster.results))
data <- merge(data, cluster.results, by="CONTIG")
}
# We're currently not using the fasta in the prototype so let's not add it ot # We're currently not using the fasta in the prototype so let's not add it ot
# the dataset for now. # the dataset for now.
#assign(paste(dataset.name, "fasta", sep="."), fasta) #assign(paste(dataset.name, "fasta", sep="."), fasta)
assign(paste(dataset.name, "escg", sep="."), ExtractESCG(file.escg)) assign(paste(dataset.name, "escg", sep="."), ExtractESCG(file.escg))
assign(dataset.name, merge(data, abundance, by="CONTIG")) assign(dataset.name, data)
nnucleotides <- dim(data.tnf)[2] nnucleotides <- dim(data.tnf)[2]
npentanucleotides <- dim(data.pnf)[2] npentanucleotides <- dim(data.pnf)[2]
...@@ -74,8 +83,24 @@ PrepareDataForInteractiveBinning <- function(dataset.name, ...@@ -74,8 +83,24 @@ PrepareDataForInteractiveBinning <- function(dataset.name,
rep("Frequencies", nnucleotides), rep("Frequencies", nnucleotides),
rep("Frequencies", npentanucleotides), rep("Frequencies", npentanucleotides),
rep("TimeSeries", nsamples)) rep("TimeSeries", nsamples))
assign(paste(dataset.name, "schema", sep="."),
data.frame(name = names(get(dataset.name)), type = type, group = group, group_type = group_type)) schema <- data.frame(name = names(get(dataset.name)),
type = type,
group = group,
group_type = group_type)
if (!is.na(cluster.results)) {
clusterMethods <- names(cluster.results)[2:length(names(cluster.results))]
schema <- schema[-c((nrow(schema) - length(clusterMethods) + 1):nrow(schema)), ]
c.df <- data.frame(name = clusterMethods,
type = rep("factor", length(clusterMethods)),
group = rep("Analytics", length(clusterMethods)),
group_type = rep("Clusterings", length(clusterMethods)))
schema <- rbind(schema, c.df)
}
assign(paste(dataset.name, "schema", sep="."), schema)
save(list = c(as.character(dataset.name)), save(list = c(as.character(dataset.name)),
file = file.path(dir.result, paste(dataset.name, ".rda", sep=""))) file = file.path(dir.result, paste(dataset.name, ".rda", sep="")))
......
...@@ -12,6 +12,7 @@ PrepareDataForInteractiveBinning( ...@@ -12,6 +12,7 @@ PrepareDataForInteractiveBinning(
file.fasta = "data//cstr_assembled.fasta", file.fasta = "data//cstr_assembled.fasta",
file.abundance = "data//cstr_avg_coverage.csv", file.abundance = "data//cstr_avg_coverage.csv",
file.escg = "data/cstr_escg.csv", file.escg = "data/cstr_escg.csv",
file.clusterings = "data//cstr_metabat.csv",
dir.result = "R.ICoBiRe//data" dir.result = "R.ICoBiRe//data"
) )
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment