summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefan Suhren <suhren.stefan@fh-swf.de>2015-12-02 19:37:37 +0100
committerStefan Suhren <suhren.stefan@fh-swf.de>2015-12-02 19:37:37 +0100
commit6918059cd5f97720943170f18f6c10361e20b835 (patch)
tree8ff723beb25d1ccc13abb252ce79120683c26739
parent75ac48742bdf2826a135375fe1acf5f399bbbaf3 (diff)
downloadSkriptsprachen-6918059cd5f97720943170f18f6c10361e20b835.tar.gz
Skriptsprachen-6918059cd5f97720943170f18f6c10361e20b835.zip
Build reverse search index
-rw-r--r--Aufgabe5/skript4.pl73
1 files changed, 73 insertions, 0 deletions
diff --git a/Aufgabe5/skript4.pl b/Aufgabe5/skript4.pl
new file mode 100644
index 0000000..0615790
--- /dev/null
+++ b/Aufgabe5/skript4.pl
@@ -0,0 +1,73 @@
+#!/usr/bin/env perl
+#===============================================================================
+#
+# FILE: skript3.pl
+#
+# USAGE: ./skript3.pl
+#
+# DESCRIPTION:
+#
+# OPTIONS: ---
+# REQUIREMENTS: ---
+# BUGS: ---
+# NOTES: ---
+# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de
+# ORGANIZATION: FH Südwestfalen, Iserlohn
+# VERSION: 1.0
+# CREATED: 02.12.2015 14:14:02
+# REVISION: ---
+#===============================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+# Add own module
+use searchengine;
+
+# For dumping data
+use Data::Dumper;
+
+# enforce utf-8 mode
+binmode (STDIN, ":encoding(UTF-8)");
+binmode (STDOUT, ":encoding(UTF-8)");
+binmode (STDERR, ":encoding(UTF-8)");
+use open ":encoding(UTF-8)";
+
+my $stoplistFile = "stoplist.txt";
+my %stoplist;
+
+buildStoplist($stoplistFile, \%stoplist);
+
+my %documentName;
+
+for (my $i = 1; $i <= 4; $i++)
+{
+ $documentName{$i} = 'doc.einfach/doc'.$i.'.txt';
+}
+
+my %reverseIndex;
+
+foreach my $key (keys %documentName)
+{
+ my $wordFileName = $documentName{$key};
+
+ open my $wordFile, '<', $wordFileName
+ or die "$0 : failed to open input file '$wordFileName' : $!\n";
+
+ # Read in slurp mode
+ my $fileContent = do{
+ local $/ = undef;
+ <$wordFile>;
+ };
+
+ close $wordFile
+ or warn "$0 : failed to close input file '$wordFileName' : $!\n";
+
+ foreach my $word (getWords($fileContent, \%stoplist))
+ {
+ ${$reverseIndex{$word}}->{$key} = '';
+ }
+}
+
+print Dumper(%reverseIndex);