blob: b052afc9457b6cf4102d375ec30ecb49fad67d92 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
#!/usr/bin/env perl
#===============================================================================
#
# FILE: skript5.pl
#
# USAGE: ./skript5.pl
#
# DESCRIPTION:
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de
# ORGANIZATION: FH Südwestfalen, Iserlohn
# VERSION: 1.0
# CREATED: 02.12.2015 19:49:23
# REVISION: ---
#===============================================================================
use strict;
use warnings;
use utf8;
# Add own module
use searchengine;
# For dumping data
use Data::Dumper;
use Storable; # persistence for Perl data structures
no warnings "once"; # einmalige Benutzung von interwork_56_64bit:
$Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen
my %reverseIndex;
my $reverseIndexFile = '.index';
if (-e $reverseIndexFile)
{
print "Load index from File.\n";
my $reverseIndexPtr = retrieve $reverseIndexFile;
%reverseIndex = %$reverseIndexPtr;
}
else
{
print "Build and save index to File.\n";
my $stoplistFile = "stoplist.engl.txt";
my %stoplist;
buildStoplist($stoplistFile, \%stoplist);
my $documentBase = 'doc.lang/*.txt';
my @documentList = glob $documentBase;
my $wordFileId = 0;
foreach my $wordFileName (@documentList)
{
$reverseIndex{'#'.$wordFileId} = $wordFileName;
open my $wordFile, '<', $wordFileName
or die "$0 : failed to open input file '$wordFileName' : $!\n";
# Read in slurp mode
my $fileContent = do{
local $/ = undef;
<$wordFile>;
};
close $wordFile
or warn "$0 : failed to close input file '$wordFileName' : $!\n";
foreach my $word (getWords($fileContent, \%stoplist))
{
${$reverseIndex{$word}}->{$wordFileId}++;
}
$wordFileId++;
}
store \%reverseIndex, $reverseIndexFile;
}
#print Dumper(%reverseIndex);
print "Suchwort: ";
my $eingabe = <>;
chomp $eingabe;
$eingabe = lc $eingabe;
if(exists $reverseIndex{$eingabe})
{
print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n";
foreach my $fileId (sort keys ${$reverseIndex{$eingabe}})
{
printf "\t%5d : %30s = %3d\n", $fileId, $reverseIndex{'#'.$fileId}, ${$reverseIndex{$eingabe}}->{$fileId};
}
}
else
{
print "Suchwort '" . $eingabe . "' nicht gefunden\n";
}
|