/// ********************************************************************************************************* /// © 2014 www.jakemdrew.com All rights reserved. /// This source code is licensed under The GNU General Public License (GPLv3): /// http://opensource.org/licenses/gpl-3.0.html /// ********************************************************************************************************* /// ********************************************************************************************************* /// RScriptRunner - Run R Programs From C#. /// Created By - Jake Drew /// Version - 1.0, 06/23/2014 /// ********************************************************************************************************* #Get any command line arguments passed in from C# args <- commandArgs(trailingOnly = TRUE) workingDirPath <- args[1] clusteringOutput <- args[2] cutHeight <- args[3] setwd(workingDirPath) checkPackage<-function(packageName){ install <- !packageName %in% installed.packages() if (install) install.packages(packageName) return(install) #if (!packageName %in% installed.packages()) install.packages(packageName) } checkPackage("cluster") checkPackage("dynamicTreeCut") checkPackage("mclust") library(cluster) library(dynamicTreeCut) library(mclust) #*************************************Code starts below********************************* inputMatrixPath <- "ImageDistanceMatrix.csv" #read in the file and convert to a dist matrix, MUST!!! - convert to dist first before doing any subsetting or sorts distMatrix<-read.csv(inputMatrixPath,sep=",",header=T,row.names=1) #convert the input to a distance matrix of class "dist" distMatrix<-as.dist(as.matrix(distMatrix[,1:length(distMatrix)])) #cluster matrix data and create dendrogram jclust<-hclust(distMatrix,method="average") #***************************Note****************************** # The cut height dictates how strict the algorithm is on putting images in the same cluster. # # If the cut height (measure of dissimilarity) is set to .99 all images would be put in the # same cluster. # # If the cut height (measure of dissimilarity) is set to .1 almost all items would be placed in # clusters of 1 (singletons). #use dynamic cut package to find the best cutHeight below a certain threshold. #cuts<-cutreeDynamic(jclust, cutHeight= cutHeight, # minClusterSize = 2, method = "tree", deepSplit = TRUE); #Set your own cut height. cuts<-cutree(jclust,h=cutHeight) #Create the output file. cutsOut <-data.frame(cbind(WebSite=jclust$labels,Cluster=cuts)) #convert the cluster number to int and sort it cutsOut$Cluster <- as.integer(cutsOut$Cluster) cutsOut<-cutsOut[ order(cutsOut[,2]), ] # Write clustering output to a csv file. Column names are removed since the data is read back into C# write.table(cutsOut, file =clusteringOutput,row.names=FALSE, col.names=FALSE,sep=",",quote=FALSE)