#!/usr/bin/env perl #=============================================================================== # # FILE: skript5.pl # # USAGE: ./skript5.pl # # DESCRIPTION: # # OPTIONS: --- # REQUIREMENTS: --- # BUGS: --- # NOTES: --- # AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de # ORGANIZATION: FH Südwestfalen, Iserlohn # VERSION: 1.0 # CREATED: 02.12.2015 19:49:23 # REVISION: --- #=============================================================================== use strict; use warnings; use utf8; # Add own module use searchengine; # For dumping data use Data::Dumper; use Storable; # persistence for Perl data structures no warnings "once"; # einmalige Benutzung von interwork_56_64bit: $Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen my $reverseIndex = {}; my $reverseIndexFile = '.index'; if (-e $reverseIndexFile) { print "Load index from File.\n"; $reverseIndex = retrieve $reverseIndexFile; } else { print "Build and save index to File.\n"; my $stoplistFile = "stoplist.engl.txt"; my %stoplist; buildStoplist($stoplistFile, \%stoplist); my $documentBase = 'doc.lang/*.txt'; my @documentList = glob $documentBase; my $wordFileId = 0; foreach my $wordFileName (@documentList) { $reverseIndex->{'#'.$wordFileId} = $wordFileName; open my $wordFile, '<', $wordFileName or die "$0 : failed to open input file '$wordFileName' : $!\n"; # Read in slurp mode my $fileContent = do{ local $/ = undef; <$wordFile>; }; close $wordFile or warn "$0 : failed to close input file '$wordFileName' : $!\n"; foreach my $word (getWords($fileContent, \%stoplist)) { ${$reverseIndex->{$word}}->{$wordFileId}++; } $wordFileId++; } store $reverseIndex, $reverseIndexFile; } #print Dumper(%reverseIndex); my $eingabe; do{ print "Query (q=quit): "; $eingabe = <>; chomp $eingabe; $eingabe = lc $eingabe; if ($eingabe ne 'q' && $eingabe !~ /^#/) { print "Search started...\n"; if (exists $reverseIndex->{$eingabe}) { print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n"; my $i = 1; foreach my $fileId (sort keys ${$reverseIndex->{$eingabe}}) { printf "\t%5d : %30s = %3d\n", $i++, $reverseIndex->{'#'.$fileId}, ${$reverseIndex->{$eingabe}}->{$fileId}; if ($i % 5 == 0) { print "\t... weiter [j/n]\n"; my $next = <>; chomp $next; if ($next ne 'j') { last; } } } } else { print "Suchwort '" . $eingabe . "' nicht gefunden\n"; } } }while ($eingabe ne 'q');