ConfusionMatrix.java PreprocessingWeighting.java

146 package sentimentanalysis; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.TreeSet; public class Clustering { public static void mainString[] args throws IOException { int k = 5; int minFreq = 3; boolean zscore = false; boolean minmax = true; int n = 5; CSV csv = new CSV; LinkedHashMapInteger, double[] features = csv.readFeaturesFromCSVfeatures.csv; LinkedHashMapInteger, ArrayListString tweets = csv.readTweetsFromCSVtweets.csv; LinkedHashMapString, Integer freq = csv.readFreqFromCSVfreq.csv; System.out.printlnAll Terms = + features.get1.length; double m = features.get1[0]; for int key : features.keySet { for double d : features.getkey { if d m { m = d; } 147 } } System.out.printlnm; ArrayListString terms = new ArrayListfreq.keySet; ArrayListInteger index = new ArrayList; for String key : freq.keySet { if freq.getkey = minFreq { index.addterms.indexOfkey; } } LinkedHashMapInteger, double[] temp = new LinkedHashMapfeatures; features.clear; for int key : temp.keySet { double[] d = new double[index.size]; int i = 0; for int ind : index { d[i] = temp.getkey[ind]; i++; } features.putkey, d; } System.out.println; System.out.printlnTerms after tf thresholding = + features.get1.length; System.out.println; Normalization norm = new Normalization; if zscore { System.out.printZScore : ; 148 features = norm.ZScorefeatures, n; System.out.printlndone; for int key : features.keySet { System.out.println\t + key + | + Arrays.toStringfeatures.getkey; } System.out.println; } if minmax { System.out.printMinMax : ; features = norm.MinMaxfeatures, n; System.out.printlndone; for int key : features.keySet { System.out.println\t + key + | + Arrays.toStringfeatures.getkey; } System.out.println; } System.out.printVariance Initial Centroid : ; VarianceInit vi = new VarianceInitfeatures, k; LinkedHashMapInteger, double[] initCentroid = vi.getInitialCentroid; System.out.printlndone; for int key : initCentroid.keySet { System.out.println\t + key + | + Arrays.toStringinitCentroid.getkey; } System.out.println; System.out.printClustering : ; KmeansClustering kc = new KmeansClustering; LinkedHashMapdouble[], TreeSetInteger kmeans = kc.kmeansClusteringfeatures, initCentroid, k; System.out.printlndone; for double[] key : kmeans.keySet { 149 System.out.println\t + Arrays.toStringkey; } System.out.println; int c = 1; for double[] idCent : kmeans.keySet { System.out.println\tCluster + c + | + kmeans.getidCent; System.out.println\tMembers = + kmeans.getidCent.size; System.out.println; c++; } System.out.println; int[] pred = kc.getPredCluster; 1 cinta 2 marah 3 sedih 4 senang 5 takut int[] act = new int[features.size]; for int i = 1; i = 200; i++ { act[i - 1] = 1; } for int i = 201; i = 400; i++ { act[i - 1] = 2; } for int i = 401; i = 600; i++ { act[i - 1] = 3; } for int i = 601; i = 800; i++ { act[i - 1] = 4; 150 } for int i = 801; i = 1000; i++ { act[i - 1] = 5; } for int i = 1; i = features.size; i++ { System.out.println\t + i + + tweets.geti + pred : + pred[i - 1] + act : + act[i - 1]; } System.out.println; new ConfusionMatrix.getConfusionMatrixpred, act, k; System.out.println; int i = 1; LinkedHashMapInteger, double[] outputCent = new LinkedHashMap; for double[] idCent : kmeans.keySet { outputCent.puti, idCent; i++; } for int idCent : outputCent.keySet { System.out.printlnArrays.toStringoutputCent.getidCent; } csv.writeFeaturesToCSVoutputCent, centroid.csv; } }

15. Test.java

package sentimentanalysis; import java.io.FileNotFoundException;