CSV.java Analisis sentimen data twitter menggunakan K-Means Clustering.

137 map = new LinkedHashMap; int lineNumber = 0; read comma separated file line by line while line = br.readLine = null { lineNumber++; use comma as token separator st = new StringTokenizerline, ,; sToken = new ArrayList; while st.hasMoreTokens { String token = st.nextToken; sToken.addtoken; } map.putsToken.get0, Integer.parseIntsToken.get1; } } catch Exception e { System.err.printlnCSV file cannot be read : + e; } return map; } }

11. KmeansClustering.java

package sentimentanalysis; import java.util.; public class KmeansClustering { 138 LinkedHashMapdouble[], TreeSetInteger clusters = new LinkedHashMap; LinkedHashMapdouble[], TreeSetInteger step = new LinkedHashMap; int[] predicted; public LinkedHashMap kmeansClusteringLinkedHashMapInteger, double[] feature, LinkedHashMapInteger, double[] centroidMap, int k { clusters.clear; step.clear; for int key : centroidMap.keySet { step.putcentroidMap.getkey, new TreeSetInteger; } boolean go = true; int iteration = 1; while go { clusters = new LinkedHashMapstep; predicted = new int[feature.size]; for Integer key : feature.keySet { double[] cent = null; double[] cosSim = new double[k]; double[] distances = new double[k]; double sim = -1; double dist = 9999; int i = 0; for double[] c : clusters.keySet { double csim = new Distance.cosineSimilarityfeature.getkey, c; cosSim[i] = csim; if csim sim { sim = csim; cent = c; 139 } i++; double distance = new Distance.euclideanDistancefeature.getkey, c; double distance = new Distance.manhattanDistancefeature.getkey, c; distances[i] = distance; if distance dist { dist = distance; cent = c; } i++; } clusters.getcent.addkey; int index = 0; for int j = 1; j cosSim.length; j++ { if cosSim[j] cosSim[index] { index = j; } } int index = 0; for int j = 1; j distances.length; j++ { if distances[j] distances[index] { index = j; } } predicted[key - 1] = index + 1; } step.clear; for double[] cent : clusters.keySet { double[] updatec = new double[cent.length]; for int d : clusters.getcent { 140 double[] doc = feature.getd; for int i = 0; i updatec.length; i++ { updatec[i] += doc[i]; } } for int i = 0; i updatec.length; i++ { updatec[i] = clusters.getcent.size; } step.putupdatec, new TreeSetInteger; } System.out.println; System.out.println\tIteration + iteration; for double[] key : clusters.keySet { System.out.println\t + clusters.getkey; } String oldCent = , newCent = ; for double[] x : clusters.keySet { oldCent += Arrays.toStringx; } for double[] x : step.keySet { newCent += Arrays.toStringx; } if oldCent.equalsnewCent { go = false; } iteration++; } return clusters; } 141 public int[] getPredCluster { return predicted; } }

12. ConfusionMatrix.java

package sentimentanalysis; public class ConfusionMatrix { public void getConfusionMatrixint[] pred, int[] act, int centroid { int[][] confMat = new int[centroid][centroid]; int[] row = new int[centroid]; int rightAnswers = 0; for int i = 0; i pred.length; i++ { confMat[act[i] - 1][pred[i] - 1]++; row[pred[i] - 1]++; } System.out.printlnConfusion Matrix : ; for int i = 0; i confMat.length; i++ { int[] x = confMat[i]; for int j = 0; j x.length; j++ { int d = x[j]; System.out.print\t + d + ; if i == j { 142 rightAnswers += d; } } System.out.println; } float accuration; int rows = 0; for int i = 0; i row.length; i++ { rows += row[i]; } System.out.println; accuration = float rightAnswers rows 100; System.out.printlnAccuration = + accuration + ; } }

13. PreprocessingWeighting.java

package sentimentanalysis; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; import sentimentanalysis.WordFreq.Word; public class PreprocessingWeighting { public static void mainString[] args throws IOException, Exception { TODO code application logic here int n = 200;