diff options
| author | Stefan Suhren <suhren.stefan@fh-swf.de> | 2015-12-02 19:37:37 +0100 |
|---|---|---|
| committer | Stefan Suhren <suhren.stefan@fh-swf.de> | 2015-12-02 19:37:37 +0100 |
| commit | 6918059cd5f97720943170f18f6c10361e20b835 (patch) | |
| tree | 8ff723beb25d1ccc13abb252ce79120683c26739 | |
| parent | 75ac48742bdf2826a135375fe1acf5f399bbbaf3 (diff) | |
| download | Skriptsprachen-6918059cd5f97720943170f18f6c10361e20b835.tar.gz Skriptsprachen-6918059cd5f97720943170f18f6c10361e20b835.zip | |
Build reverse search index
| -rw-r--r-- | Aufgabe5/skript4.pl | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/Aufgabe5/skript4.pl b/Aufgabe5/skript4.pl new file mode 100644 index 0000000..0615790 --- /dev/null +++ b/Aufgabe5/skript4.pl @@ -0,0 +1,73 @@ +#!/usr/bin/env perl +#=============================================================================== +# +# FILE: skript3.pl +# +# USAGE: ./skript3.pl +# +# DESCRIPTION: +# +# OPTIONS: --- +# REQUIREMENTS: --- +# BUGS: --- +# NOTES: --- +# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de +# ORGANIZATION: FH Südwestfalen, Iserlohn +# VERSION: 1.0 +# CREATED: 02.12.2015 14:14:02 +# REVISION: --- +#=============================================================================== + +use strict; +use warnings; +use utf8; + +# Add own module +use searchengine; + +# For dumping data +use Data::Dumper; + +# enforce utf-8 mode +binmode (STDIN, ":encoding(UTF-8)"); +binmode (STDOUT, ":encoding(UTF-8)"); +binmode (STDERR, ":encoding(UTF-8)"); +use open ":encoding(UTF-8)"; + +my $stoplistFile = "stoplist.txt"; +my %stoplist; + +buildStoplist($stoplistFile, \%stoplist); + +my %documentName; + +for (my $i = 1; $i <= 4; $i++) +{ + $documentName{$i} = 'doc.einfach/doc'.$i.'.txt'; +} + +my %reverseIndex; + +foreach my $key (keys %documentName) +{ + my $wordFileName = $documentName{$key}; + + open my $wordFile, '<', $wordFileName + or die "$0 : failed to open input file '$wordFileName' : $!\n"; + + # Read in slurp mode + my $fileContent = do{ + local $/ = undef; + <$wordFile>; + }; + + close $wordFile + or warn "$0 : failed to close input file '$wordFileName' : $!\n"; + + foreach my $word (getWords($fileContent, \%stoplist)) + { + ${$reverseIndex{$word}}->{$key} = ''; + } +} + +print Dumper(%reverseIndex); |
