/// *********************************************************************************************************
    ///  © 2014 www.jakemdrew.com All rights reserved. 
    ///  This source code is licensed under The GNU General Public License (GPLv3):  
    ///  http://opensource.org/licenses/gpl-3.0.html
    /// *********************************************************************************************************

    /// *********************************************************************************************************
    /// RScriptRunner - Run R Programs From C#.
    /// Created By - Jake Drew 
    /// Version -    1.0, 06/23/2014
    /// *********************************************************************************************************

	
	#Get any command line arguments passed in from C#
	args <- commandArgs(trailingOnly = TRUE)
	workingDirPath <- args[1]
	clusteringOutput <- args[2]
	cutHeight <- args[3]

	setwd(workingDirPath)	

	checkPackage<-function(packageName){
		install <- !packageName %in% installed.packages()
		if (install) install.packages(packageName)
		return(install)
		#if (!packageName %in% installed.packages()) install.packages(packageName)
	}

	checkPackage("cluster")
	checkPackage("dynamicTreeCut")
	checkPackage("mclust")
		
	library(cluster)							
	library(dynamicTreeCut)							
	library(mclust)							
							
	#*************************************Code starts below*********************************														
						
	inputMatrixPath  <- "ImageDistanceMatrix.csv"													

	#read in the file and convert to a dist matrix, MUST!!! - convert to dist first before doing any subsetting or sorts						
	distMatrix<-read.csv(inputMatrixPath,sep=",",header=T,row.names=1)						
	#convert the input to a distance matrix of class "dist"						
	distMatrix<-as.dist(as.matrix(distMatrix[,1:length(distMatrix)]))			

	#cluster matrix data and create dendrogram 						
	jclust<-hclust(distMatrix,method="average")

	#***************************Note******************************
	# The cut height dictates how strict the algorithm is on putting images in the same cluster.
	#
	#   If the cut height (measure of dissimilarity) is set to .99 all images would be put in the 
	#   same cluster.
	#
	#   If the cut height (measure of dissimilarity) is set to .1 almost all items would be placed in 
	#   clusters of 1 (singletons).  

	#use dynamic cut package to find the best cutHeight	 below a certain threshold.
	#cuts<-cutreeDynamic(jclust, cutHeight= cutHeight,						
	#        minClusterSize = 2, method = "tree", deepSplit = TRUE);							

	#Set your own cut height.
	cuts<-cutree(jclust,h=cutHeight)

	#Create the output file. 		
	cutsOut <-data.frame(cbind(WebSite=jclust$labels,Cluster=cuts))						

	#convert the cluster number to int and sort it
	cutsOut$Cluster <- as.integer(cutsOut$Cluster)
	cutsOut<-cutsOut[ order(cutsOut[,2]), ]	

	# Write clustering output to a csv file.  Column names are removed since the data is read back into C#
	write.table(cutsOut, file =clusteringOutput,row.names=FALSE, col.names=FALSE,sep=",",quote=FALSE)