Classify samples from multiple centroids

classify_multiple(prob_matrix, centroid_list, distancetype = "pearson")

Arguments

prob_matrix	a `matrix` or `data.frame`. Must be an expression matrix with features in rows and samples in columns
centroid_list	a`list` with the centroid matrix for each of the signatures to evaluate, where each column represents the prototypic centroid of a subtype and each row the constituents features of the solution signature. The output of `create_centroids` can be used.
distancetype	a `character` that can be either `'pearson'` (default), `'spearman'` or `'kendall'`.

Value

Returns a data.frame with the classes assigned to each sample in each signature, were samples are a rows and signatures in columns

Examples

# load example dataset
library(breastCancerTRANSBIG)
data(transbig)
Train <- transbig
rm(transbig)
#> Warning: object 'transbig' not found

expression <- Biobase::exprs(Train)
clinical <- Biobase::pData(Train)
OS <- survival::Surv(time = clinical$t.rfs, event = clinical$e.rfs)

# We will use a reduced dataset for the example
expression <- expression[sample(1:nrow(expression), 100), ]

# Now we scale the expression matrix
expression <- t(scale(t(expression)))

# Run galgo
output <- GSgalgoR::galgo(generations = 5, population = 15,
prob_matrix = expression, OS = OS)
#> Using CPU for computing pearson distance
#> Generation 1 Non-dominated solutions:
#>           k                     rnkIndex     CrowD
#> result.4  2 0.10362162 136.4151        1       Inf
#> result.6  2 0.07386832 174.0976        1 0.8211496
#> result.9  7 0.01764573 293.7798        1       Inf
#> result.14 4 0.05345035 232.2309        1 0.9647134
#> Generation 2 Non-dominated solutions:
#>           k                     rnkIndex     CrowD
#> result.4  2 0.10362162 136.4151        1       Inf
#> result.9  7 0.01764573 293.7798        1       Inf
#> result.14 4 0.05345035 232.2309        1 0.9490675
#> result.6  2 0.07386832 174.0976        1 0.8086237
#> Generation 3 Non-dominated solutions:
#>            k                      rnkIndex     CrowD
#> result.4   2 0.103621624 136.4151        1       Inf
#>           10 0.007790104 387.2679        1       Inf
#> result.14  4 0.053450345 232.2309        1 0.8445360
#> result.9   7 0.017645730 293.7798        1 0.8369923
#> result.6   2 0.073868317 174.0976        1 0.6607029
#>            2 0.101779453 154.3307        1 0.3799069
#> Generation 4 Non-dominated solutions:
#>            k                      rnkIndex     CrowD
#> result.4   2 0.103621624 136.4151        1       Inf
#>           10 0.007790104 387.2679        1       Inf
#> result.9   7 0.017645730 293.7798        1 0.7816337
#> result.14  4 0.053450345 232.2309        1 0.7699475
#> result.6   2 0.073868317 174.0976        1 0.5943333
#>            2 0.101779453 154.3307        1 0.3381203
#> Generation 5 Non-dominated solutions:
#>            k                       rnkIndex     CrowD
#>           10 0.007790104 387.26791        1       Inf
#>            2 0.130045088  16.56581        1       Inf
#> result.9   7 0.017645730 293.77976        1 0.7321699
#> result.14  4 0.053450345 232.23085        1 0.5334699
#>            2 0.111608101  61.45356        1 0.5020567
#> result.6   2 0.073868317 174.09756        1 0.4353792
#>            2 0.101779453 154.33072        1 0.3123762
#> result.4   2 0.103621624 136.41507        1 0.3123444
#>            4 0.061026347 208.83527        1 0.2982936
outputDF <- to_dataframe(output)
outputList <- to_list(output)

RESULTS <- non_dominated_summary(
    output = output, OS = OS,
    prob_matrix = expression,
    distancetype = "pearson"
)
#> Using CPU for computing pearson distance
CentroidsList <- create_centroids(output, RESULTS$solution,
trainset = expression)
#> Using CPU for computing pearson distance
classes <- classify_multiple(prob_matrix = expression,
centroid_list = CentroidsList)