Classify samples from multiple centroids

classify_multiple(prob_matrix, centroid_list, distancetype = "pearson")

Arguments

prob_matrix

a matrix or data.frame. Must be an expression matrix with features in rows and samples in columns

centroid_list

alist with the centroid matrix for each of the signatures to evaluate, where each column represents the prototypic centroid of a subtype and each row the constituents features of the solution signature. The output of create_centroids can be used.

distancetype

a character that can be either 'pearson' (default), 'spearman' or 'kendall'.

Value

Returns a data.frame with the classes assigned to each sample in each signature, were samples are a rows and signatures in columns

Examples

# load example dataset library(breastCancerTRANSBIG) data(transbig) Train <- transbig rm(transbig)
#> Warning: object 'transbig' not found
expression <- Biobase::exprs(Train) clinical <- Biobase::pData(Train) OS <- survival::Surv(time = clinical$t.rfs, event = clinical$e.rfs) # We will use a reduced dataset for the example expression <- expression[sample(1:nrow(expression), 100), ] # Now we scale the expression matrix expression <- t(scale(t(expression))) # Run galgo output <- GSgalgoR::galgo(generations = 5, population = 15, prob_matrix = expression, OS = OS)
#> Using CPU for computing pearson distance
#> Generation 1 Non-dominated solutions:
#> k rnkIndex CrowD #> result.4 2 0.10362162 136.4151 1 Inf #> result.6 2 0.07386832 174.0976 1 0.8211496 #> result.9 7 0.01764573 293.7798 1 Inf #> result.14 4 0.05345035 232.2309 1 0.9647134
#> Generation 2 Non-dominated solutions:
#> k rnkIndex CrowD #> result.4 2 0.10362162 136.4151 1 Inf #> result.9 7 0.01764573 293.7798 1 Inf #> result.14 4 0.05345035 232.2309 1 0.9490675 #> result.6 2 0.07386832 174.0976 1 0.8086237
#> Generation 3 Non-dominated solutions:
#> k rnkIndex CrowD #> result.4 2 0.103621624 136.4151 1 Inf #> 10 0.007790104 387.2679 1 Inf #> result.14 4 0.053450345 232.2309 1 0.8445360 #> result.9 7 0.017645730 293.7798 1 0.8369923 #> result.6 2 0.073868317 174.0976 1 0.6607029 #> 2 0.101779453 154.3307 1 0.3799069
#> Generation 4 Non-dominated solutions:
#> k rnkIndex CrowD #> result.4 2 0.103621624 136.4151 1 Inf #> 10 0.007790104 387.2679 1 Inf #> result.9 7 0.017645730 293.7798 1 0.7816337 #> result.14 4 0.053450345 232.2309 1 0.7699475 #> result.6 2 0.073868317 174.0976 1 0.5943333 #> 2 0.101779453 154.3307 1 0.3381203
#> Generation 5 Non-dominated solutions:
#> k rnkIndex CrowD #> 10 0.007790104 387.26791 1 Inf #> 2 0.130045088 16.56581 1 Inf #> result.9 7 0.017645730 293.77976 1 0.7321699 #> result.14 4 0.053450345 232.23085 1 0.5334699 #> 2 0.111608101 61.45356 1 0.5020567 #> result.6 2 0.073868317 174.09756 1 0.4353792 #> 2 0.101779453 154.33072 1 0.3123762 #> result.4 2 0.103621624 136.41507 1 0.3123444 #> 4 0.061026347 208.83527 1 0.2982936
outputDF <- to_dataframe(output) outputList <- to_list(output) RESULTS <- non_dominated_summary( output = output, OS = OS, prob_matrix = expression, distancetype = "pearson" )
#> Using CPU for computing pearson distance
CentroidsList <- create_centroids(output, RESULTS$solution, trainset = expression)
#> Using CPU for computing pearson distance
classes <- classify_multiple(prob_matrix = expression, centroid_list = CentroidsList)