99
centroTempj,:=hasilKmeansclsj,:; end
centroTemp=meancentroTemp,1; centro=[centro;centroTemp];
end calonData=[calonData;centro];
[pembagianCls,cPusat,jmlObyekCls,dist]=Kmeans1N,calonData,3 ,3,1;
assignin
base ,
dist ,dist;
[ nilaiSSE ] = hitungSSE dist ; SSE=nilaiSSE;
End
6. hitungSSE.m
function [ nilaiSSE ] = hitungSSE data
UNTITLED4 Summary of this function goes here Detailed explanation goes here
rerata=meandata,2; [m,n]=sizedata;
nilaiSSETemp=zerosm,n; for
i=1:m for
j=1:n nilaiSSETemp1i,j=powerdatai,j-
reratai,1,2; end
end nilaiSSETemp2=sumnilaiSSETemp1,2;
nilaiSSE=doublesumnilaiSSETemp2; end
7. stemcoba.pl
1. make a rule 2. open text file
3. get one word 4. stem
5. compare with the real root word 6. count the true word stem
100
local suffix_1; local suffix_2;
local suffix_3; local suffix_4;
local suffix_5; local prefix_1;
local prefix_2; local prefix_3;
local prefix_4; local prefix_5;
local prefix_6; local prefix_7;
local prefix_8; local prefix_9;
local prefix_10; local infix_1;
local infix_2; local dict;
my word = ARGV[0]; my word=nakoni;
my fileOp; D:\SKRIPSWEET\Program\doc\ekonomi
fileOp=D:\\SKRIPSWEET\\Program\\Data\\dataAwal.txt; open FILE, , fileOp or die Cant open;
my fileOut=D:\\SKRIPSWEET\\Program\\Data\\hasilStem.txt; open FILEOUT, ,fileOut or die ;
my fileTest=D:\\SKRIPSWEET\\Program\\Data\\dataHasil.txt; open FILETESTH, ,fileTest or die ;
initial; right=0;
whileline=FILE{ splLine=split\s+,line;
print splLine[0]. .splLine[1].\n;; word=lc splLine[0];
print word; my stemWord=stemword;
my stemWord=stemlc word; print word.\n;
print stemWord.\n; print FILETESTH stemWord.\n;
print stemWord.\n;. .splLine[1].\n; if stemWord eq lc splLine[1]
{ print FILEOUT stemWord. .word.\n;
right++; }
else
101
{ print FILEOUT 1 .stemWord. 2 .splLine[1]. 3
.word.\n; }
} print right;
sub initial{ dictionary
hash pasangan substitusi list prefix, suffix, infix
fileOp=D:\\SKRIPSWEET\\Program\\kamus\\kamus.txt; open FILEDIC, , fileOp or die Cant open;
while FILEDIC {
chomp; dict{_}=_;
} daftar tingkat dan substitusinya
suffix_1=ekaken=i,okaken=u,ekake=i,okake=u,oni=u, eni=i,wa=, ya=,
ning=,nipun=,okna=u,ekna=i,onana=u,enana=i,onen= u,enen=i,
enan=i,on=u, onan=u, ku=,mu=; suffix_2=kake=,kaken=,ni=,ing=,nana=,
nane=,nan=, nen=,ipun=,kna=; suffix_3=kaken=n,kake=n,kna=n, ana=, an=,
en=; suffix_4=ake=, aken=,en=i, na=,ne=;
suffix_5=ke=,ken=, n=,a=,i=; suffix_6=e=;
suffix_1=ekake=i,okake=u,oni=u,eni=i,wa=, ya=,ning=,okna=u,onana=u,onane=u,
enan=i,ean=i,on=u, onan=u, onen=u,ku=,mu=,nipun=;
suffix_2=kake=,ni=,ing=,ana=, nan=, nen=,ipun=, nane=, nana=;
suffix_3=kake=n,i=, en=i, an=, ane=; suffix_4=ake=, en=, na=,ne=;
suffix_5=e=, n=,a=; prefix_1=m=,nge=a,ny=s,di=,dak=,tak=,kok=,t
ok=,ka=, ke=,ku=,ang=, sa=, se=, pa=, peng=,
pang=, ing=,u=; prefix_2=m=p,ng=,ny=c, ke=i,pe=,an=,
pen=t, pan=t; prefix_3=m=w,ng=k, k=, pe=, pa=;
102
prefix_4=n=, a=, p=; prefix_5=n=t;
prefix_1=dipun=,peng=,peny=,pem=,pam=,pany=,pr a=,kuma=,kapi=,
bok=,mbok=,dak=,tak=,kok=,tok=,ing=,ang=,a ny=,
am=,sak=, se=,mang=,meng=,nge=,nya=,pi=,ge=,ke=,u=
, po=u,ke=u;
prefix_2=mer=,mi=,sa=,ku=,an=,ka=,ny=s,ng= k,di=,peng=k,pang=k,
pam=p,ke=i,mang=k,meng=k; prefix_3=a=,k=,pam=w,pan=t,
pen=t,mang=w,meng=w, ny=c,ng=; prefix_4=n=t, pan=s, pen=s,man=s,men=s;
prefix_5=pan=,pen=,man=t,men=t,n=; prefix_6=pa=,pe=,man=,men=;
prefix_7=p=,ma=,me=; prefix_8=m=w;
prefix_9=m=p; prefix_10=m=;
infix_1=gum=b,gem=b,kum=p; infix_2=kum=w;
} sub hilangPref{
my word = _[0]; my w=word;
if w =~ dipun|peng|peny|pem|pam|pany|pra|kuma|kapi|bok|mbok|dak|tak|kok
|tok|ing|ang|any|am|sak|se|mang|meng|nge|nya|pi|ge|ke|u|po|ke { stem=prefix_1{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
ifw=~ mer|mi|sa|ku|an|ka|ny|ng|di|peng|pang|pam|ke|mang|meng
{ stem=prefix_2{1}.; print FILETESTH stem.\n;
if exists dict{stem} { return stem;}
} ifw=~ a|k|pam|pan|pen|mang|meng|ny|ng
{ stem=prefix_3{1}.; print FILETESTH stem.\n;
if exists dict{stem} { return stem;}
}
103
ifw=~ n|pan|pen|man|men { stem=prefix_4{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
ifw=~ pan|pen|man|men|n { stem=prefix_5{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
ifw=~ pa|pe|man|men { stem=prefix_6{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
ifw=~ p|ma|me { stem=prefix_7{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
ifw=~ m { stem=prefix_8{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} stem=prefix_9{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} stem=prefix_10{1}.;
print FILETESTH stem.\n; if exists dict{stem}
{ return stem;} }
return w; }
sub hilangSuf{ my word = _[0];
my w=word; if w =~
ekaken|okaken|ekake|okake|oni|eni|wa|ya|ning|nipun|okna|ekna|ona na|enana|onen|enen|enan|on|onan|ku|mu
{ stem=`.suffix_1{1};
print FILETESTH stem.\n; } hilang akhiran 2
elsif w =~ kake|kaken|ni|ing|nana|nane|nan|nen|ipun|kna
{
104
stem=`.suffix_2{1}; print FILETESTH stem.\n;
} hilang akhiran 3 elsif w =~ kaken|kake|kna|ana|an|en
{ stem=`.suffix_3{1}; print FILETESTH stem.\n;
} hilang akhiran 4 elsif w =~ ake|aken|en|na|ne
{ stem=`.suffix_4{1}; print FILETESTH stem.\n;
} hilang ak hiran 5 elsif w =~ ke|ken|n|a|i
{ stem=`.suffix_5{1}; print FILETESTH stem.\n;
} hilang akhiran 6
elsif w =~ e { stem=`.suffix_6{1};
print FILETESTH stem.\n; }
if exists dict{stem} {
return stem; }
else {
hilang prefix my stemPref=hilangPrefstem;
if exists dict{stemPref}{ return stemPref;} }
} sub stem{
my word = _[0]; jika panjang kata 3 keluar
if lengthword3{return word;} print word.\n;
loop hilangkan akhiran tingkat 1 , cek kamus, jika ada break
hilangkan awalan tingkat 1, cek kamus, jika ada break kembalikan akhiran tingkat 1, cek kamus, jika ada break
my w=word; if exists dict{w}{ return w;}
hilang infix if indexw,in == 1 ||indexw,um == 1||indexw,em
== 1||indexw,el == 1||indexw,er == 1 {
_=w; sin|um|em|el|er;
print FILETESTH _.\n; if exists dict{_}{ return _;}
elsifw=~ gum|kum|gem {
105
stem=infix_1{1}.; print FILETESTH stem.\n;
if exists dict{stem} { return stem;}
} else
{ my stemPref=hilangPref_;
if exists dict{stemPref}{ return stemPref;} hilang suffix
my hs=hilangSuf_; if exists dict{hs}{return hs;}
} if _ =~ an|ne
{ stem=`;
if exists dict{stem}{ return stem;} }
} kata reduplikasi
if w =~ m[-] {
_=w; split-; if exists dict{}{ return ;}
else {
hilang suffix if exists dict{hilangSuf}{return ;}
w=; }
} hilang awalan saja
my stemPref=hilangPrefw; if exists dict{stemPref}{ return stemPref;}
hilang suffix my hs=hilangSufw;
if exists dict{hs}{return hs;} hilang reduplikasi tanpa -
if indexw,e == 1 ||indexw,substrw,0,1,2==2 {
dua=substrw,0,2; _=w; sdua;
if exists dict{_}{ return _;} else {w=_;}
} return w;
}
106
8. Kata Unik Seluruh Data 2358 kata