137
map = new LinkedHashMap; int lineNumber = 0;
read comma separated file line by line while line = br.readLine = null {
lineNumber++; use comma as token separator
st = new StringTokenizerline, ,; sToken = new ArrayList;
while st.hasMoreTokens { String token = st.nextToken;
sToken.addtoken; }
map.putsToken.get0, Integer.parseIntsToken.get1; }
} catch Exception e { System.err.printlnCSV file cannot be read : + e;
} return map;
} }
11. KmeansClustering.java
package sentimentanalysis; import java.util.;
public class KmeansClustering {
138
LinkedHashMapdouble[], TreeSetInteger clusters = new LinkedHashMap; LinkedHashMapdouble[], TreeSetInteger step = new LinkedHashMap;
int[] predicted; public LinkedHashMap kmeansClusteringLinkedHashMapInteger, double[]
feature, LinkedHashMapInteger, double[] centroidMap, int k {
clusters.clear; step.clear;
for int key : centroidMap.keySet { step.putcentroidMap.getkey, new TreeSetInteger;
} boolean go = true;
int iteration = 1; while go {
clusters = new LinkedHashMapstep; predicted = new int[feature.size];
for Integer key : feature.keySet { double[] cent = null;
double[] cosSim = new double[k]; double[] distances = new double[k];
double sim = -1; double dist = 9999;
int i = 0; for double[] c : clusters.keySet {
double csim = new Distance.cosineSimilarityfeature.getkey, c; cosSim[i] = csim;
if csim sim { sim = csim;
cent = c;
139
} i++;
double distance = new Distance.euclideanDistancefeature.getkey, c; double distance = new Distance.manhattanDistancefeature.getkey, c;
distances[i] = distance; if distance dist {
dist = distance; cent = c;
} i++;
} clusters.getcent.addkey;
int index = 0; for int j = 1; j cosSim.length; j++ {
if cosSim[j] cosSim[index] { index = j;
} }
int index = 0; for int j = 1; j distances.length; j++ {
if distances[j] distances[index] { index = j;
} }
predicted[key - 1] = index + 1; }
step.clear; for double[] cent : clusters.keySet {
double[] updatec = new double[cent.length]; for int d : clusters.getcent {
140
double[] doc = feature.getd; for int i = 0; i updatec.length; i++ {
updatec[i] += doc[i]; }
} for int i = 0; i updatec.length; i++ {
updatec[i] = clusters.getcent.size; }
step.putupdatec, new TreeSetInteger; }
System.out.println; System.out.println\tIteration + iteration;
for double[] key : clusters.keySet { System.out.println\t + clusters.getkey;
} String oldCent = , newCent = ;
for double[] x : clusters.keySet { oldCent += Arrays.toStringx;
} for double[] x : step.keySet {
newCent += Arrays.toStringx; }
if oldCent.equalsnewCent { go = false;
} iteration++;
} return clusters;
}
141
public int[] getPredCluster { return predicted;
} }
12. ConfusionMatrix.java
package sentimentanalysis; public class ConfusionMatrix {
public void getConfusionMatrixint[] pred, int[] act, int centroid { int[][] confMat = new int[centroid][centroid];
int[] row = new int[centroid]; int rightAnswers = 0;
for int i = 0; i pred.length; i++ { confMat[act[i] - 1][pred[i] - 1]++;
row[pred[i] - 1]++; }
System.out.printlnConfusion Matrix : ; for int i = 0; i confMat.length; i++ {
int[] x = confMat[i]; for int j = 0; j x.length; j++ {
int d = x[j]; System.out.print\t + d + ;
if i == j {
142
rightAnswers += d; }
} System.out.println;
} float accuration;
int rows = 0; for int i = 0; i row.length; i++ {
rows += row[i]; }
System.out.println; accuration = float rightAnswers rows 100;
System.out.printlnAccuration = + accuration + ; }
}
13. PreprocessingWeighting.java
package sentimentanalysis; import java.io.IOException;
import java.util.ArrayList; import java.util.LinkedHashMap;
import sentimentanalysis.WordFreq.Word; public class PreprocessingWeighting {
public static void mainString[] args throws IOException, Exception { TODO code application logic here
int n = 200;