ConfusionMatrix.java PreprocessingWeighting.java
146
package sentimentanalysis; import java.io.IOException;
import java.util.ArrayList; import java.util.Arrays;
import java.util.LinkedHashMap; import java.util.TreeSet;
public class Clustering { public static void mainString[] args throws IOException {
int k = 5; int minFreq = 3;
boolean zscore = false; boolean minmax = true;
int n = 5; CSV csv = new CSV;
LinkedHashMapInteger, double[] features = csv.readFeaturesFromCSVfeatures.csv; LinkedHashMapInteger,
ArrayListString tweets
= csv.readTweetsFromCSVtweets.csv;
LinkedHashMapString, Integer freq = csv.readFreqFromCSVfreq.csv; System.out.printlnAll Terms = + features.get1.length;
double m = features.get1[0]; for int key : features.keySet {
for double d : features.getkey { if d m {
m = d; }
147
} }
System.out.printlnm; ArrayListString terms = new ArrayListfreq.keySet;
ArrayListInteger index = new ArrayList; for String key : freq.keySet {
if freq.getkey = minFreq { index.addterms.indexOfkey;
} }
LinkedHashMapInteger, double[] temp = new LinkedHashMapfeatures; features.clear;
for int key : temp.keySet { double[] d = new double[index.size];
int i = 0; for int ind : index {
d[i] = temp.getkey[ind]; i++;
} features.putkey, d;
} System.out.println;
System.out.printlnTerms after tf thresholding = + features.get1.length; System.out.println;
Normalization norm = new Normalization; if zscore {
System.out.printZScore : ;
148
features = norm.ZScorefeatures, n; System.out.printlndone;
for int key : features.keySet { System.out.println\t + key + | + Arrays.toStringfeatures.getkey;
} System.out.println;
} if minmax {
System.out.printMinMax : ; features = norm.MinMaxfeatures, n;
System.out.printlndone; for int key : features.keySet {
System.out.println\t + key + | + Arrays.toStringfeatures.getkey; }
System.out.println; }
System.out.printVariance Initial Centroid : ; VarianceInit vi = new VarianceInitfeatures, k;
LinkedHashMapInteger, double[] initCentroid = vi.getInitialCentroid; System.out.printlndone;
for int key : initCentroid.keySet { System.out.println\t + key + | + Arrays.toStringinitCentroid.getkey;
} System.out.println;
System.out.printClustering : ; KmeansClustering kc = new KmeansClustering;
LinkedHashMapdouble[], TreeSetInteger kmeans = kc.kmeansClusteringfeatures, initCentroid, k;
System.out.printlndone; for double[] key : kmeans.keySet {
149
System.out.println\t + Arrays.toStringkey; }
System.out.println; int c = 1;
for double[] idCent : kmeans.keySet { System.out.println\tCluster + c + | + kmeans.getidCent;
System.out.println\tMembers = + kmeans.getidCent.size; System.out.println;
c++; }
System.out.println; int[] pred = kc.getPredCluster;
1 cinta 2 marah
3 sedih 4 senang
5 takut int[] act = new int[features.size];
for int i = 1; i = 200; i++ { act[i - 1] = 1;
} for int i = 201; i = 400; i++ {
act[i - 1] = 2; }
for int i = 401; i = 600; i++ { act[i - 1] = 3;
} for int i = 601; i = 800; i++ {
act[i - 1] = 4;
150
} for int i = 801; i = 1000; i++ {
act[i - 1] = 5; }
for int i = 1; i = features.size; i++ { System.out.println\t + i + + tweets.geti + pred : + pred[i - 1] + act : + act[i -
1]; }
System.out.println; new ConfusionMatrix.getConfusionMatrixpred, act, k;
System.out.println; int i = 1;
LinkedHashMapInteger, double[] outputCent = new LinkedHashMap; for double[] idCent : kmeans.keySet {
outputCent.puti, idCent; i++;
} for int idCent : outputCent.keySet {
System.out.printlnArrays.toStringoutputCent.getidCent; }
csv.writeFeaturesToCSVoutputCent, centroid.csv; }
}