/// *********************************************************************************************************
/// © 2014 www.jakemdrew.com All rights reserved.
/// This source code is licensed under The GNU General Public License (GPLv3):
/// http://opensource.org/licenses/gpl-3.0.html
/// *********************************************************************************************************
/// *********************************************************************************************************
/// RScriptRunner - Run R Programs From C#.
/// Created By - Jake Drew
/// Version - 1.0, 06/23/2014
/// *********************************************************************************************************
#Get any command line arguments passed in from C#
args <- commandArgs(trailingOnly = TRUE)
workingDirPath <- args[1]
clusteringOutput <- args[2]
cutHeight <- args[3]
setwd(workingDirPath)
checkPackage<-function(packageName){
install <- !packageName %in% installed.packages()
if (install) install.packages(packageName)
return(install)
#if (!packageName %in% installed.packages()) install.packages(packageName)
}
checkPackage("cluster")
checkPackage("dynamicTreeCut")
checkPackage("mclust")
library(cluster)
library(dynamicTreeCut)
library(mclust)
#*************************************Code starts below*********************************
inputMatrixPath <- "ImageDistanceMatrix.csv"
#read in the file and convert to a dist matrix, MUST!!! - convert to dist first before doing any subsetting or sorts
distMatrix<-read.csv(inputMatrixPath,sep=",",header=T,row.names=1)
#convert the input to a distance matrix of class "dist"
distMatrix<-as.dist(as.matrix(distMatrix[,1:length(distMatrix)]))
#cluster matrix data and create dendrogram
jclust<-hclust(distMatrix,method="average")
#***************************Note******************************
# The cut height dictates how strict the algorithm is on putting images in the same cluster.
#
# If the cut height (measure of dissimilarity) is set to .99 all images would be put in the
# same cluster.
#
# If the cut height (measure of dissimilarity) is set to .1 almost all items would be placed in
# clusters of 1 (singletons).
#use dynamic cut package to find the best cutHeight below a certain threshold.
#cuts<-cutreeDynamic(jclust, cutHeight= cutHeight,
# minClusterSize = 2, method = "tree", deepSplit = TRUE);
#Set your own cut height.
cuts<-cutree(jclust,h=cutHeight)
#Create the output file.
cutsOut <-data.frame(cbind(WebSite=jclust$labels,Cluster=cuts))
#convert the cluster number to int and sort it
cutsOut$Cluster <- as.integer(cutsOut$Cluster)
cutsOut<-cutsOut[ order(cutsOut[,2]), ]
# Write clustering output to a csv file. Column names are removed since the data is read back into C#
write.table(cutsOut, file =clusteringOutput,row.names=FALSE, col.names=FALSE,sep=",",quote=FALSE)