diff options
| author | Aentfs <aentfs@gmx.de> | 2015-12-02 22:21:40 +0100 |
|---|---|---|
| committer | Aentfs <aentfs@gmx.de> | 2015-12-02 22:21:40 +0100 |
| commit | cdb45a36bdc608a59d14e5e5bed887ad9864307a (patch) | |
| tree | b9381b4049073572e8c905938291d5bb9cfdbaca | |
| parent | d5fcd656514a16ebc86c89922488785bbbc5db34 (diff) | |
| download | Skriptsprachen-cdb45a36bdc608a59d14e5e5bed887ad9864307a.tar.gz Skriptsprachen-cdb45a36bdc608a59d14e5e5bed887ad9864307a.zip | |
Store index and use it
| -rw-r--r-- | Aufgabe5/skript5.pl | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/Aufgabe5/skript5.pl b/Aufgabe5/skript5.pl new file mode 100644 index 0000000..b052afc --- /dev/null +++ b/Aufgabe5/skript5.pl @@ -0,0 +1,106 @@ +#!/usr/bin/env perl +#=============================================================================== +# +# FILE: skript5.pl +# +# USAGE: ./skript5.pl +# +# DESCRIPTION: +# +# OPTIONS: --- +# REQUIREMENTS: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de +# ORGANIZATION: FH Südwestfalen, Iserlohn +# VERSION: 1.0 +# CREATED: 02.12.2015 19:49:23 +# REVISION: --- +#=============================================================================== + +use strict; +use warnings; +use utf8; + +# Add own module +use searchengine; + +# For dumping data +use Data::Dumper; + +use Storable; # persistence for Perl data structures +no warnings "once"; # einmalige Benutzung von interwork_56_64bit: +$Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen + +my %reverseIndex; + +my $reverseIndexFile = '.index'; + +if (-e $reverseIndexFile) +{ + print "Load index from File.\n"; + + my $reverseIndexPtr = retrieve $reverseIndexFile; + %reverseIndex = %$reverseIndexPtr; +} +else +{ + print "Build and save index to File.\n"; + + my $stoplistFile = "stoplist.engl.txt"; + my %stoplist; + + buildStoplist($stoplistFile, \%stoplist); + + my $documentBase = 'doc.lang/*.txt'; + my @documentList = glob $documentBase; + + my $wordFileId = 0; + + foreach my $wordFileName (@documentList) + { + $reverseIndex{'#'.$wordFileId} = $wordFileName; + + open my $wordFile, '<', $wordFileName + or die "$0 : failed to open input file '$wordFileName' : $!\n"; + + # Read in slurp mode + my $fileContent = do{ + local $/ = undef; + <$wordFile>; + }; + + close $wordFile + or warn "$0 : failed to close input file '$wordFileName' : $!\n"; + + foreach my $word (getWords($fileContent, \%stoplist)) + { + ${$reverseIndex{$word}}->{$wordFileId}++; + } + + $wordFileId++; + } + + store \%reverseIndex, $reverseIndexFile; +} +#print Dumper(%reverseIndex); + +print "Suchwort: "; + +my $eingabe = <>; + +chomp $eingabe; +$eingabe = lc $eingabe; + +if(exists $reverseIndex{$eingabe}) +{ + print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n"; + foreach my $fileId (sort keys ${$reverseIndex{$eingabe}}) + { + printf "\t%5d : %30s = %3d\n", $fileId, $reverseIndex{'#'.$fileId}, ${$reverseIndex{$eingabe}}->{$fileId}; + } +} +else +{ + print "Suchwort '" . $eingabe . "' nicht gefunden\n"; +} |
