summaryrefslogtreecommitdiffstats
path: root/Aufgabe5/skript5.pl
diff options
context:
space:
mode:
Diffstat (limited to 'Aufgabe5/skript5.pl')
-rw-r--r--Aufgabe5/skript5.pl106
1 files changed, 106 insertions, 0 deletions
diff --git a/Aufgabe5/skript5.pl b/Aufgabe5/skript5.pl
new file mode 100644
index 0000000..b052afc
--- /dev/null
+++ b/Aufgabe5/skript5.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/env perl
+#===============================================================================
+#
+# FILE: skript5.pl
+#
+# USAGE: ./skript5.pl
+#
+# DESCRIPTION:
+#
+# OPTIONS: ---
+# REQUIREMENTS: ---
+# BUGS: ---
+# NOTES: ---
+# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de
+# ORGANIZATION: FH Südwestfalen, Iserlohn
+# VERSION: 1.0
+# CREATED: 02.12.2015 19:49:23
+# REVISION: ---
+#===============================================================================
+
+use strict;
+use warnings;
+use utf8;
+
+# Add own module
+use searchengine;
+
+# For dumping data
+use Data::Dumper;
+
+use Storable; # persistence for Perl data structures
+no warnings "once"; # einmalige Benutzung von interwork_56_64bit:
+$Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen
+
+my %reverseIndex;
+
+my $reverseIndexFile = '.index';
+
+if (-e $reverseIndexFile)
+{
+ print "Load index from File.\n";
+
+ my $reverseIndexPtr = retrieve $reverseIndexFile;
+ %reverseIndex = %$reverseIndexPtr;
+}
+else
+{
+ print "Build and save index to File.\n";
+
+ my $stoplistFile = "stoplist.engl.txt";
+ my %stoplist;
+
+ buildStoplist($stoplistFile, \%stoplist);
+
+ my $documentBase = 'doc.lang/*.txt';
+ my @documentList = glob $documentBase;
+
+ my $wordFileId = 0;
+
+ foreach my $wordFileName (@documentList)
+ {
+ $reverseIndex{'#'.$wordFileId} = $wordFileName;
+
+ open my $wordFile, '<', $wordFileName
+ or die "$0 : failed to open input file '$wordFileName' : $!\n";
+
+ # Read in slurp mode
+ my $fileContent = do{
+ local $/ = undef;
+ <$wordFile>;
+ };
+
+ close $wordFile
+ or warn "$0 : failed to close input file '$wordFileName' : $!\n";
+
+ foreach my $word (getWords($fileContent, \%stoplist))
+ {
+ ${$reverseIndex{$word}}->{$wordFileId}++;
+ }
+
+ $wordFileId++;
+ }
+
+ store \%reverseIndex, $reverseIndexFile;
+}
+#print Dumper(%reverseIndex);
+
+print "Suchwort: ";
+
+my $eingabe = <>;
+
+chomp $eingabe;
+$eingabe = lc $eingabe;
+
+if(exists $reverseIndex{$eingabe})
+{
+ print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n";
+ foreach my $fileId (sort keys ${$reverseIndex{$eingabe}})
+ {
+ printf "\t%5d : %30s = %3d\n", $fileId, $reverseIndex{'#'.$fileId}, ${$reverseIndex{$eingabe}}->{$fileId};
+ }
+}
+else
+{
+ print "Suchwort '" . $eingabe . "' nicht gefunden\n";
+}