LAMPIRAN A: Kode Program 1. getrss.php

LAMPIRAN A: Kode Program 1. getrss.php

  <?php if (@$_GET["load"] == "stemming") include("step2a_stemming.php"); else if (@$_GET["load"] == "bobot") include("step3_bobot_dan_normalisasi.php"); else if (@$_GET["load"] == "relevansi") include("step5_tabel_relevansi.php"); ?> <?php set_time_limit(300); error_reporting(0); include 'koneksi.php'; $q = $_GET["q"]; $xml = $q; $xmlDoc = new DOMDocument(); $xmlDoc->load($xml); $channel = $xmlDoc->getElementsByTagName('channel')->item(0); $channel_title = $channel->getElementsByTagName('title')

  • >item(0)->childNodes->item(0)->nodeValue; $channel_link = $channel->getElementsByTagName('link')
  • >item(0)->childNodes->item(0)->nodeValue; $channel_desc = $channel->getElementsByTagName('descripti
  • >item(0)->childNodes->item(0)->nodeValue; echo("<p><a href='" . $channel_link. "'>" . $channel_title . "</a>"); echo("<br />"); echo($channel_desc . "</p>"); $x = $xmlDoc->getElementsByTagName('item'); $counter_content = 0; for ($i = 0; $i <= 200; $i++) {

  $item_title = $x->item($i)->getElementsByTagName('title')

  • >item(0)->childNodes->item(0)->nodeValue; $item_link = $x->item($i)->getElementsByTagName('link')
  • >item(0)->childNodes->item(0)->nodeValue; $item_desc = $x->item($i)->getElementsByTagName('descripti
  • >item(0)->childNodes->item(0)->nodeValue; echo ("<p><a href='" . $item_link . "'>" . $item_title . "</a>"); echo ("<br />"); echo ($item_desc . "</p>"); $item_title = stripslashes($item_title); $item_title = mysql_real_escape_string($item_title); $item_title = str_replace("'", " ", $item_title); $item_desc = stripslashes($item_desc); $item_desc = mysql_real_escape_string($item_desc);

  $item_desc = mysql_escape_string($item_desc); $item_desc = str_replace("'", " ", $item_desc); $cekdata = "select judul from tabel_konten_temp where judul='$item_title'"; $ada = mysql_query($cekdata) or die(mysql_error()); if (mysql_num_rows($ada) > 0) { } else { $teks = stripslashes(strip_tags($item_desc)); $teks = mysql_real_escape_string(strip_tags($item_desc)); $teks = mysql_escape_string(strip_tags($item_desc)); $teks = str_replace("'", " ", $teks);

  $kategori = $_GET['kategori']; if (strlen($teks) > 200) { mysql_query("INSERT INTO tabel_konten_temp(url, judul, konten_html, konten_teks, kategori) VALUES('$item_link', '$item_title', '$item_desc', '$teks', '$kategori')") or die(mysql_error() }

  } } ?>

  2. porterstemmer.php

  public static function Stem($word) { if (strlen($word) <= 2) { return $word; } $word = self::step1ab($word); $word = self::step1c($word); $word = self::step2($word); $word = self::step3($word); $word = self::step4($word); $word = self::step5($word); return $word; } private static function step1ab($word) { if (substr($word, -1) == 's') { self::replace($word, 'sses', 'ss') OR self::replace($word, 'ies', 'i') OR self::replace($word, 'ss', 'ss')

  OR self::replace($word, 's', ''); if (substr($word, -2, 1) != 'e' OR !self::replace($word, 'eed', 'ee', 0)) { $v = self::$regex_vowel; if (preg_match("#$v+#", substr($word, 0, -3)) && self::replace($word, 'ing', '')

  OR preg_match("#$v+#", substr($word, 0, -2)) && self::replace($word, 'ed', '')) { if (!self::replace($word, 'at', 'ate')

  AND !self::replace($word, 'bl', 'ble') AND !self::replace($word, 'iz', 'ize')) { if (self::doubleConsonant($word)

  AND substr($word, -2) != 'll' AND substr($word, -2) != 'ss' AND substr($word, -2) != 'zz') {

  $word = substr($word, 0, -1); } else if (self::m($word) == 1 AND self::cvc($word)) { $word .= 'e'; } }

  } return $word; } private static function step1c($word) {

  $v = self::$regex_vowel; if (substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) { self::replace($word, 'y', 'i'); } return $word;

  } private static function step2($word) { switch (substr($word, -2, 1)) { case 'a': self::replace($word, 'ational', 'ate', 0) OR self::replace($word, 'tional', 'tion', 0); break; case 'c': self::replace($word, 'enci', 'ence', 0) OR self::replace($word, 'anci', 'ance', 0); break; case 'e': self::replace($word, 'izer', 'ize', 0); break; case 'g': self::replace($word, 'logi', 'log', 0); break; case 'l': self::replace($word, 'entli', 'ent', 0) OR self::replace($word, 'ousli', 'ous', 0) OR self::replace($word, 'alli', 'al', 0) OR self::replace($word, 'bli', 'ble', 0) OR self::replace($word, 'eli', 'e', 0); break; case 'o': self::replace($word, 'ization', 'ize', 0) OR self::replace($word, 'ation', 'ate', 0) OR self::replace($word, 'ator', 'ate', 0); break; case 's': self::replace($word, 'iveness', 'ive', 0) OR self::replace($word, 'fulness', 'ful', 0) OR self::replace($word, 'ousness', 'ous', 0) OR self::replace($word, 'alism', 'al', 0); break; case 't': self::replace($word, 'biliti', 'ble', 0) OR self::replace($word, 'aliti', 'al', 0) OR self::replace($word, 'iviti', 'ive', 0); break; } return $word;

  } private static function step3($word) { switch (substr($word, -2, 1)) { case 'a': self::replace($word, 'ical', 'ic', 0); break; case 's': self::replace($word, 'ness', '', 0); break; case 't': self::replace($word, 'icate', 'ic', 0) OR self::replace($word, 'iciti', 'ic', 0); break; case 'u': self::replace($word, 'ful', '', 0); break; case 'v': self::replace($word, 'ative', '', 0); break; case 'z': self::replace($word, 'alize', 'al', 0); break; } return $word;

  } private static function step4($word) { switch (substr($word, -2, 1)) { case 'a': self::replace($word, 'al', '', 1); break; case 'c': self::replace($word, 'ance', '', 1) OR self::replace($word, 'ence', '', 1); break; case 'e': self::replace($word, 'er', '', 1); break; case 'i': self::replace($word, 'ic', '', 1); break; case 'l': self::replace($word, 'able', '', 1) OR self::replace($word, 'ible', '', 1); break; case 'n': self::replace($word, 'ant', '', 1) OR self::replace($word, 'ement', '', 1) OR self::replace($word, 'ment', '', 1) OR self::replace($word, 'ent', '', 1); break; case 'o': if (substr($word, -4) == 'tion' OR substr($word, -

  4) == 'sion') { self::replace($word, 'ion', '', 1); } else { self::replace($word, 'ou', '', 1); } break; case 's': self::replace($word, 'ism', '', 1); break; case 't': self::replace($word, 'ate', '', 1) OR self::replace($word, 'iti', '', 1); break; case 'u': self::replace($word, 'ous', '', 1); break; case 'v': self::replace($word, 'ive', '', 1); break; case 'z': self::replace($word, 'ize', '', 1); break; } return $word;

  } private static function step5($word) { if (substr($word, -1) == 'e') { if (self::m(substr($word, 0, -1)) > 1) { self::replace($word, 'e', ''); } else if (self::m(substr($word, 0, -1)) == 1) { if (!self::cvc(substr($word, 0, -1))) { self::replace($word, 'e', ''); } }

  } if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') {

  $word = substr($word, 0, -1); } return $word;

  } private static function replace(&$str, $check, $repl, $m = null) { $len = 0 - strlen($check); if (substr($str, $len) == $check) {

  $substr = substr($str, 0, $len); if (is_null($m) OR self::m($substr) > $m) { $str = $substr . $repl;

  } return true; } return false;

  } private static function m($str) { $c = self::$regex_consonant; $v = self::$regex_vowel; $str = preg_replace("#^$c+#", '', $str); $str = preg_replace("#$v+$#", '', $str); preg_match_all("#($v+$c+)#", $str, $matches); return count($matches[1]);

  } private static function doubleConsonant($str) { $c = self::$regex_consonant; return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};

  } private static function cvc($str) { $c = self::$regex_consonant; $v = self::$regex_vowel; return preg_match("#($c$v$c)$#", $str, $matches) AND strlen($matches[1]) == 3 AND $matches[1]{2} != 'w' AND $matches[1]{2} != 'x' AND $matches[1]{2} != 'y'; }

  } ?>

  3. stemming.php

  function stemming($page_word, $maks_keyword_return) { $stop_words = array("a", "able", "about", "above", "abroad", "abst", "accordance", "according", "accordingly", "across", "act", "actually", "added", "adj", "adopted", "affected", "affecting", "affects", "after", "afterwards", "again", "against", "ago", "ah", "ahead", "ain't", "all", "allow", "allows", "almost", "alone", "along", "alongside", "already", "also", "although", "always", "am", "amid", "amidst", "among", "amongst", "amoungst", "amount", "an", "and", "announce", "another", "any", "anybody", "anyhow", "anymore", "anyone", "anything", "anyway", "anyways", "anywhere", "apart", "apparently", "appear", "appreciate", "appropriate", "approximately", "are", "aren", "arent", "aren't", "arise", "around", "as", "a's", "aside", "ask", "asking", "associated", "at", "auth", "available", "away", "awfully", "b", "back", "backward", "backwards", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "begin", "beginning", "beginnings", "begins", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "bill", "biol", "both", "bottom", "brief", "briefly", "but", "by", "c", "ca", "call", "came", "can", "cannot", "cant", "can't", "caption", "cause", "causes", "certain", "certainly", "changes", "clearly", "c'mon", "co", "co.", "com", "come", "comes", "computer", "con", "concerning", "consequently", "consider", "considering", "contain", "containing", "contains", "corresponding", "could", "couldnt", "couldn't", "course", "cry", "c's", "currently", "d", "dare", "daren't", "date", "de", "definitely", "describe", "described", "despite", "detail", "did", "didn't", "different", "directly", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "due", "during", "e", "each", "ed", "edu", "effect", "eg", "eight", "eighty", "either", "eleven", "else", "elsewhere", "empty", "end", "ending", "enough", "entirely", "especially", "et", "et-al", "etc", "even", "ever", "evermore", "every", "everybody", "everyone", "everything", "everywhere", "ex", "exactly", "example", "except", "f", "fairly", "far", "farther", "few", "fewer", "ff", "fifteen", "fifth", "fify", "fill", "find", "fire", "first", "five", "fix", "followed", "following", "follows", "for", "forever", "former", "formerly", "forth", "forty", "forward", "found", "four", "from", "front", "full", "further", "furthermore", "g", "gave", "get", "gets", "getting", "give", "given", "gives", "giving", "go", "goes", "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't", "half", "happens", "hardly", "has", "hasnt", "hasn't", "have", "haven't", "having", "he", "hed", "he'd", "he'll", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "heres", "here's", "hereupon", "hers", "herse”", "herself", "hes", "he's", "hi", "hid", "him", "himse”", "himself", "his", "hither", "home", "hopefully", "how", "howbeit", "however", "how's", "hundred", "i", "id", "i'd", "ie", "if", "ignored", "i'll", "im", "i'm", "immediate", "immediately", "importance", "important", "in", "inasmuch", "inc", "inc.", "indeed", "index", "indicate", "indicated", "indicates", "information", "inner", "inside", "insofar", "instead", "interest", "into", "invention", "inward", "is", "isn't", "it", "itd", "it'd", "it'll", "its", "it's", "itse”", "itself", "i've", "j", "just", "k", "keep", "keeps", "kept",

  "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "lets", "let's", "like", "liked", "likely", "likewise", "line", "little", "'ll", "look", "looking", "looks", "low", "lower", "ltd", "m", "made", "mainly", "make", "makes", "many", "may", "maybe", "mayn't", "me", "mean", "means", "meantime", "meanwhile", "merely", "mg", "might", "mightn't", "mill", "million", "mine", "minus", "miss", "ml", "more", "moreover", "most", "mostly", "move", "mr", "mrs", "much", "mug", "must", "mustn't", "my", "myse”", "myself", "n", "na", "name", "namely", "nay", "nd", "near", "nearly", "necessarily", "necessary", "need", "needn't", "needs", "neither", "never", "neverf", "neverless", "nevertheless", "new", "next", "nine", "ninety", "no", "nobody", "non", "none", "nonetheless", "noone", "no-one", "nor", "normally", "nos", "not", "noted", "nothing", "notwithstanding", "novel", "now", "nowhere", "o", "obtain", "obtained", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "omitted", "on", "once", "one", "ones", "one's", "only", "onto", "opposite", "or", "ord", "other", "others", "otherwise", "ought", "oughtn't", "our", "ours", "ours ", "ourselves", "out", "outside", "over", "overall", "owing", "own", "p", "page", "pages", "part", "particular", "particularly", "past", "per", "perhaps", "placed", "please", "plus", "poorly", "possible", "possibly", "potentially", "pp", "predominantly", "present", "presumably", "previously", "primarily", "probably", "promptly", "proud", "provided", "provides", "put", "q", "que", "quickly", "quite", "qv", "r", "ran", "rather", "rd", "re", "readily", "really", "reasonably", "recent", "recently", "ref", "refs", "regarding", "regardless", "regards", "related", "relatively", "research", "respectively", "resulted", "resulting", "results", "right", "round", "run", "s", "said", "same", "saw", "say", "saying", "says", "sec", "second", "secondly", "section", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "self", "selves", "sensible", "sent", "serious", "seriously", "seven", "several", "shall", "shan't", "she", "shed", "she'd", "she'll", "shes", "she's", "should", "shouldn't", "show", "showed", "shown", "showns", "shows", "side", "significant", "significantly", "similar", "similarly", "since", "sincere", "six", "sixty", "slightly", "so", "some", "somebody", "someday", "somehow", "someone", "somethan", "something", "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry", "specifically", "specified", "specify", "specifying", "state", "states", "still", "stop", "strongly", "sub", "substantially", "successfully", "such", "sufficiently", "suggest", "sup", "sure", "system", "t", "take", "taken", "taking", "tell", "ten", "tends", "th", "than", "thank", "thanks", "thanx", "that", "that'll", "thats", "that's", "that've", "the", "their", "theirs", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", "thered", "there'd", "therefore", "therein", "there'll", "thereof", "therere", "there're", "theres", "there's", "thereto", "thereupon", "there've", "these", "they", "theyd", "they'd", "they'll", "theyre", "they're", "they've", "thick", "thin", "thing", "things", "think", "third", "thirty", "this", "thorough", "thoroughly", "those", "thou", "though", "thoughh", "thousand", "three", "throug", "through", "throughout", "thru", "thus", "til", "till", "tip", "to", "together", "too", "took", "top", "toward", "towards", "tried", "tries", "truly", "try", "trying", "ts", "t's", "twelve", "twenty", "twice", "two", "u",

  "un", "under", "underneath", "undoing", "unfortunately", "unless", "unlike", "unlikely", "until", "unto", "up", "upon", "ups", "upwards", "us", "use", "used", "useful", "usefully", "usefulness", "uses", "using", "usually", "v", "value", "various", "'ve", "versus", "very", "via", "viz", "vol", "vols", "vs", "w", "want", "wants", "was", "wasn't", "way", "we", "wed", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've", "what", "whatever", "what'll", "whats", "what's", "what've", "when", "whence", "whenever", "when's", "where", "whereafter", "whereas", "whereby", "wherein", "wheres", "where's", "whereupon", "wherever", "whether", "which", "whichever", "while", "whilst", "whim", "whither", "who", "whod", "who'd", "whoever", "whole", "who'll", "whom", "whomever", "whos", "who's", "whose", "why", "why's", "widely", "will", "willing", "wish", "with", "within", "without", "wonder", "won't", "words", "world", "would", "wouldn't", "www", "x", "y", "yes", "yet", "you", "youd", "you'd", "you'll", "your", "youre", "you're", "yours", "yourself", "yourselves", "you've", "z", "zero");

  $words = explode(" ", strtolower($page_word)); $stem_words = array(); foreach ($words as $word) {

  $word = strtolower($word); $word = ambil_abjad($word); $word = hapus_2_karakter($word); if (!in_array($word, $stop_words)) {

  $stem = PorterStemmer::Stem($word); /* Remove stop words */ $stem = hapus_2_karakter($stem); if (!in_array($stem, $stop_words)) { if ($stem != "") {

  $stem_words[] = $stem; } } }

  } $hitung_keyword = array_count_values($stem_words); arsort($hitung_keyword); $keyword_tertinggi = array_slice($hitung_keyword, 0, $maks_keyword_return); return $keyword_tertinggi; } function ambil_abjad($kata) { return preg_replace('/[^A-Za-z]/', '', $kata);

  } function hapus_2_karakter($kata) { if (strlen($kata) > 2) { return $kata; } else { return ""; }

  }

  4. hitungrelevansi.php

  <?php include 'koneksi.php'; set_time_limit(300);

  $query_topik = mysql_query("SELECT * FROM tabel_keyword") or die(mysql_error()); while ($row_topik = mysql_fetch_array($query_topik)) {

  $id_topik = $row_topik['id_konten']; $keyword_topiks = unserialize($row_topik['keyword']); $query_page = mysql_query("SELECT * FROM tabel_keyword WHERE id_konten != $id_topik ORDER BY id_konten ASC") or die(mysql_error()); while ($row_page = mysql_fetch_array($query_page)) { $id_page = $row_page['id_konten']; $keyword_pages = unserialize($row_page['keyword']); $wmax_topik = wmaks($keyword_topiks); $wmax_page = wmaks($keyword_pages); $wkt_sama = 0; $wkp_sama = 0; $keyword_topik_sama = ""; $keyword_page_sama = ""; $keyword_topik = "";

  $keyword_page = ""; foreach ($keyword_topiks as $key_topik => $jumlah_topik) { foreach ($keyword_pages as $key_page => $jumlah_page) { if (trim($key_topik) == trim($key_page) && trim($key_page) != "" && trim($key_topik) != "") {

  $wkt_sama = ($jumlah_topik / $wmax_topik) + $wkt_sama; $wkp_sama = ($jumlah_page / $wmax_page) + $wkp_sama; $keyword_topik_sama = $keyword_topik_sama . " " .

  $key_topik . "=" . $jumlah_topik; $keyword_page_sama =$keyword_page_sama . " " . $key_page . "=" . $jumlah_page; }

  } } //mencari total wkt $wkt = 0; foreach ($keyword_topiks as $key_topik => $jumlah_topik) {

  $wkt = $wkt + ($jumlah_topik / $wmax_topik); //untuk menampilkan keyword topik $keyword_topik = $key_topik . "=" . $jumlah_topik . " " .

  $keyword_topik; } $wkt_pangkat = pow($wkt, 2); //mencari wkp $wkp = 0; foreach ($keyword_pages as $key_page => $jumlah_page) {

  $wkp = $wkp + ($jumlah_page / $wmax_page); //untuk menampilkan keyword page $keyword_page = $key_page . "=" . $jumlah_page . " " . $keyword_page; } $wkp_pangkat = pow($wkp, 2); $akar_wkt_pangkat_dan_wkp_pangkat = sqrt($wkt_pangkat * $wkp_pangkat); //menghindari division by zero if ($akar_wkt_pangkat_dan_wkp_pangkat != 0) {

  $relevansi_topik_page = ($wkt_sama * $wkp_sama) / $akar_wkt_pangkat_dan_wkp_pangkat;

  } //jika nilai relevansinya ada if (isset($relevansi_topik_page) && $relevansi_topik_page != 0) { if ($wkp_sama == 0 || $wkt_sama == 0) { $wkp = 0;

  $wkt = 0; $relevansi_topik_page = 0; } insert_relevansi($id_topik, $id_page, $keyword_topik_sama, $keyword_page_sama, $wkt_sama, $wkp_sama, $wkt, $wkp, $relevansi_topik_page); } ?>

  5. pencarian.php

  <?php include 'crawler1/PorterStemmer.php'; @$cari = $_GET['cari']; $array_cari = explode(" ", strtolower($cari)); $q_cari = ""; if (count($array_cari) < 1) {

  $q_cari = " judul LIKE '%$array_cari[0]%' OR "." keyword LIKE '%$array_cari[0]%'"; } else { for ($i = 0; $i < count($array_cari); $i++) {

  $q_cari = $q_cari." judul LIKE '%$array_cari[0]%' OR "." konten_teks LIKE '%$array_cari[0]%'"; if($i<count($array_cari)-1){ $q_cari = $q_cari. " OR "; } }

  } @$kategori = $_GET['kategori']; $per_page = 10; $q = "SELECT count(*) FROM tabel_konten WHERE $q_cari AND kategori='$kategori'"; $page_query = mysql_query($q) or die(mysql_error()); //echo $q; $pages = ceil(mysql_result($page_query, 0) / $per_page); $page = (isset($_GET['page'])) ? (int) $_GET['page'] : 1; $start = ($page - 1) * $per_page; //$sql = mysql_query("SELECT id_konten,(SELECT judul FROM tabel_konten WHERE tabel_konten.id_konten=tabel_keyword.id_konten AND tabel_konten.kategori='$kategori') as judul FROM tabel_keyword WHERE $q_cari LIMIT $start, $per_page") or die(mysql_error()); $sql = mysql_query("SELECT * FROM tabel_konten WHERE $q_cari LIMIT $start, $per_page") or die(mysql_error()); while ($row = mysql_fetch_array($sql)) {

  $id_konten = $row['id_konten']; $qs = mysql_query("SELECT * FROM tabel_konten WHERE id_konten=" . $id_konten . " AND kategori='$kategori'") or die(mysql_error()); if (mysql_num_rows($qs) > 0) {

  $judul = ucfirst(strtolower($row['judul'])); $judul = str_replace('<h3>', '<h3><a href="baca.php?id=' . $id_konten . '">', $judul);

  $judul = str_replace('</h3>', '</a></h3>', $judul); echo "<a href='baca.php?id=$id_konten'>" . $judul .

  "</a/><br/>"; } }

  ?> <?php for ($x = 1; $x <= $pages; $x++) { echo ($x == $page) ? '<b><a href="?cari=' . $cari .

  '&kategori=' . $kategori . '&page=' . $x . '">' . $x . '</a></b>' : '<a href="?cari=' . $cari . '&kategori=' . $kategori . '&page=' . $x . '">' . $x . '</a> ';

  } }

  ?>