Description
Simultaneous Clustering and Factorial Decomposition of Three-Way Datasets.
Description
Implements two iterative techniques called T3Clus and 3Fkmeans, aimed at simultaneously clustering objects and a factorial dimensionality reduction of variables and occasions on three-mode datasets developed by Vichi et al. (2007) <doi:10.1007/s00357-007-0006-x>. Also, we provide a convex combination of these two simultaneous procedures called CT3Clus and based on a hyperparameter alpha (alpha in [0,1], with 3FKMeans for alpha=0 and T3Clus for alpha= 1) also developed by Vichi et al. (2007) <doi:10.1007/s00357-007-0006-x>. Furthermore, we implemented the traditional tandem procedures of T3Clus (TWCFTA) and 3FKMeans (TWFCTA) for sequential clustering-factorial decomposition (TWCFTA), and vice-versa (TWFCTA) proposed by P. Arabie and L. Hubert (1996) <doi:10.1007/978-3-642-79999-0_1>.
README.md
simuclustfactor
The goal of simuclustfactor is to perform:
- tandem clustering and factor-decomposition procedures sequentially (TWCFTA and TWFCTA).
- simultaneous clustering and factor decomposition procedures simultaneously (T3Clus and 3FKMeans).
- combined T3Clus and 3FKMeans procedures simultaneously (CT3Clus).
Installation
You can install the development version of simuclustfactor from GitHub with:
# install.packages("devtools")
devtools::install_github("prablordeppey/simuclustfactor-r")
Implementations
Synthetic Dataset Generation (Additive noise)
library(simuclustfactor)
# Defining tensor dimensions in full and reduced spaces.
I=8; J=5; K=4 # number of objects, variables and occasions respectively.
G=3; Q=3; R=2 # number of clusters, factors for variable and factors for occasion respectively.
data = generate_dataset(I, J, K, G, Q, R, mean=0, stdev=0.5, random_state=0) # generate synthetic dataset with noise level 0.5.
# Extracting the data
Y_g_qr = data$Y_g_qr # centroids matrix in the reduced space.
Z_i_jk = data$Z_i_jk # score/centroid matrix in the full-space.
X_i_jk = data$X_i_jk # dataset with noise.
# Ground-truth associations
U_i_g = data$U_i_g # binary stochastic membership matrix
B_j_q = data$B_j_q # variables factor matrix
C_k_r = data$C_k_r # occasions factor matrix
# Folding generated data matrices into tensors
X_i_j_k = fold(X_i_jk, mode=1, shape=c(I,J,K))
Z_i_j_k = fold(Z_i_jk, mode=1, shape=c(I,J,K))
Y_g_q_r = fold(Y_g_qr, mode=1, shape=c(G,Q,R))
Tandem Models
# Initialize the tandem model
tandem_model = tandem(random_state=NULL, verbose=TRUE, init='svd', n_max_iter=10, n_loops=10, tol=1e-5, U_i_g=NULL, B_j_q=NULL, C_k_r=NULL)
TWCFTA
twcfta = fit.twcfta(tandem_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R))
TWFCTA
twfcta = fit.twfcta(tandem_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R))
# The following attributes are accessible for the tandem models via the '@' operator
twfcta@U_i_g0 # initial membership matrix
twfcta@B_j_q0 # initial variable-component matrix
twfcta@C_k_r0 # initial occasion-component matrix
twfcta@U_i_g # final membership matrix
twfcta@B_j_q # final variable-component matrix
twfcta@C_k_r # final occasion-component matrix
twfcta@Y_g_qr # The centroids in the reduced space (data matrix).
twfcta@X_i_jk_scaled # Standardized data matrix.
twfcta@BestTimeElapsed # Execution time for the best iterate.
twfcta@BestLoop # Loop that obtained the best iterate.
twfcta@BestKmIteration # Number of iterations until best iterate for the K-means.
twfcta@BestFaIteration # Number of iterations until best iterate for the FA.
twfcta@FaConverged # Flag to check if algorithm converged for the Factor decomposition.
twfcta@KmConverged # Flag to check if algorithm converged for the K-means.
twfcta@nKmConverges # Number of loops that converged for the K-means.
twfcta@nFaConverges # Number of loops that converged for the Factor decomposition.
twfcta@TSS_full # Total deviance in the full-space.
twfcta@BSS_full # Between deviance in the reduced-space.
twfcta@RSS_full # Residual deviance in the reduced-space.
twfcta@TSS_reduced # Total deviance in the reduced-space.
twfcta@BSS_reduced # Between deviance in the reduced-space.
twfcta@RSS_reduced # Residual deviance in the reduced-space.
twfcta@PF_full # PseudoF computed in the full-space.
twfcta@PF_reduced # PseudoF computed in the reduced-space.
twfcta@PF # Actual PseudoF score used to obtain the best loop. PF_reduced for twfcta and PF_full twcfta.
twfcta@Labels # Object cluster assignments.
twfcta@FsKM # Objective function values for the KM best iterate.
twfcta@FsFA # Objective function values for the FA best iterate.
twfcta@Enorm # Average l2norm of residual norm.
Simultaneous Models
# Initialize the model
simultaneous_model = simultaneous(random_state=NULL, verbose=TRUE, init='svd', n_max_iter=10, n_loops=10, tol=1e-5, U_i_g=NULL, B_j_q=NULL, C_k_r=NULL)
T3Clus & 3FKMeans
t3clus = fit.t3clus(simultaneous_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R))
tfkmeans = fit.3fkmeans(simultaneous_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R))
CT3Clus
t3clus = fit.ct3clus(simultaneous_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R), alpha=1)
ct3clus = fit.ct3clus(simultaneous_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R), alpha=0.5)
tfkmeans = fit.ct3clus(simultaneous_model, X_i_jk, full_tensor_shape=c(I,J,K), reduced_tensor_shape=c(G,Q,R), alpha=0)
# The following attributes are accessible for the simultaneous models via the '@' operator
ct3clus@U_i_g0 # initial membership matrix.
ct3clus@B_j_q0 # initial variable-component matrix.
ct3clus@C_k_r0 # initial occasion-component matrix.
ct3clus@U_i_g # final membership matrix.
ct3clus@B_j_q # final variable-component matrix.
ct3clus@C_k_r # final occasion-component matrix.
ct3clus@Y_g_qr # Centroids in the reduced space (data matrix).
ct3clus@X_i_jk_scaled # Standardized data matrix.
ct3clus@BestTimeElapsed # Execution time for the best iterate.
ct3clus@BestLoop # Loop that obtained the best iterate.
ct3clus@BestIteration # Number of iterations until best iterate found.
ct3clus@Converged # Flag to check if the algorithm converged.
ct3clus@nConverges # Number of loops that converged.
ct3clus@TSS_full # Total deviance in the full-space.
ct3clus@BSS_full # Between deviance in the reduced-space.
ct3clus@RSS_full # Residual deviance in the reduced-space.
ct3clus@TSS_reduced # Total deviance in the reduced-space.
ct3clus@BSS_reduced # Between deviance in the reduced-space.
ct3clus@RSS_reduced # Residual deviance in the reduced-space.
ct3clus@PF_full # PseudoF computed in the full-space.
ct3clus@PF_reduced # PseudoF computed in the reduced-space.
ct3clus@PF # Weighted PseudoF score used to obtain the best loop.
ct3clus@Labels # Object cluster assignments.
ct3clus@Fs # Objective function values for the best iterate.
ct3clus@Enorm # Average l2norm of residual norm.