blob: 4ec4beed96570e853eac7f53d480284f5333f05a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#!/usr/bin/env perl
#===============================================================================
#
# FILE: skript5.pl
#
# USAGE: ./skript5.pl
#
# DESCRIPTION:
#
# OPTIONS: ---
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de
# ORGANIZATION: FH Südwestfalen, Iserlohn
# VERSION: 1.0
# CREATED: 02.12.2015 19:49:23
# REVISION: ---
#===============================================================================
use strict;
use warnings;
use utf8;
# Add own module
use searchengine;
# For dumping data
use Data::Dumper;
use Storable; # persistence for Perl data structures
no warnings "once"; # einmalige Benutzung von interwork_56_64bit:
$Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen
my $reverseIndex = {};
my $reverseIndexFile = '.index';
if (-e $reverseIndexFile)
{
print "Load index from File.\n";
$reverseIndex = retrieve $reverseIndexFile;
}
else
{
print "Build and save index to File.\n";
my $stoplistFile = "stoplist.engl.txt";
my %stoplist;
buildStoplist($stoplistFile, \%stoplist);
my $documentBase = 'doc.lang/*.txt';
my @documentList = glob $documentBase;
my $wordFileId = 0;
foreach my $wordFileName (@documentList)
{
$reverseIndex->{'#'.$wordFileId} = $wordFileName;
open my $wordFile, '<', $wordFileName
or die "$0 : failed to open input file '$wordFileName' : $!\n";
# Read in slurp mode
my $fileContent = do{
local $/ = undef;
<$wordFile>;
};
close $wordFile
or warn "$0 : failed to close input file '$wordFileName' : $!\n";
foreach my $word (getWords($fileContent, \%stoplist))
{
${$reverseIndex->{$word}}->{$wordFileId}++;
}
$wordFileId++;
}
store $reverseIndex, $reverseIndexFile;
}
#print Dumper(%reverseIndex);
my $eingabe;
do{
print "Query (q=quit): ";
$eingabe = <>;
chomp $eingabe;
$eingabe = lc $eingabe;
if ($eingabe ne 'q' && $eingabe !~ /^#/)
{
print "Search started...\n";
if (exists $reverseIndex->{$eingabe})
{
print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n";
my $i = 1;
foreach my $fileId (sort keys ${$reverseIndex->{$eingabe}})
{
printf "\t%5d : %30s = %3d\n", $i++, $reverseIndex->{'#'.$fileId}, ${$reverseIndex->{$eingabe}}->{$fileId};
if ($i % 5 == 0)
{
print "\t... weiter [j/n]\n";
my $next = <>;
chomp $next;
if ($next ne 'j')
{
last;
}
}
}
}
else
{
print "Suchwort '" . $eingabe . "' nicht gefunden\n";
}
}
}while ($eingabe ne 'q');
|