summaryrefslogtreecommitdiffstats
path: root/Aufgabe5/skript5.pl
blob: 4ec4beed96570e853eac7f53d480284f5333f05a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env perl 
#===============================================================================
#
#         FILE: skript5.pl
#
#        USAGE: ./skript5.pl  
#
#  DESCRIPTION: 
#
#      OPTIONS: ---
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: Stefan Suhren (su), suhren.stefan@fh-swf.de
# ORGANIZATION: FH Südwestfalen, Iserlohn
#      VERSION: 1.0
#      CREATED: 02.12.2015 19:49:23
#     REVISION: ---
#===============================================================================

use strict;
use warnings;
use utf8;

# Add own module
use searchengine;

# For dumping data
use Data::Dumper;

use Storable; # persistence for Perl data structures
no warnings "once"; # einmalige Benutzung von interwork_56_64bit:
$Storable::interwork_56_64bit = 1; # notwendig bei 64-Bit-Systemen

my $reverseIndex = {};

my $reverseIndexFile = '.index';

if (-e $reverseIndexFile)
{
	print "Load index from File.\n";

	$reverseIndex = retrieve $reverseIndexFile;
}
else
{
	print "Build and save index to File.\n";

	my $stoplistFile = "stoplist.engl.txt";
	my %stoplist;

	buildStoplist($stoplistFile, \%stoplist);

	my $documentBase = 'doc.lang/*.txt';
	my @documentList = glob $documentBase;

	my $wordFileId = 0;

	foreach my $wordFileName (@documentList)
	{
		$reverseIndex->{'#'.$wordFileId} = $wordFileName;

		open  my $wordFile, '<', $wordFileName
			or die "$0 : failed to open  input file '$wordFileName' : $!\n";

		# Read in slurp mode
		my $fileContent = do{
			local $/ = undef;
			<$wordFile>;
		};

		close  $wordFile
			or warn "$0 : failed to close input file '$wordFileName' : $!\n";

		foreach my $word (getWords($fileContent, \%stoplist))
		{
			${$reverseIndex->{$word}}->{$wordFileId}++;
		}

		$wordFileId++;
	}

	store $reverseIndex, $reverseIndexFile;
}
#print Dumper(%reverseIndex);


my $eingabe;
do{
	print "Query (q=quit): ";
	$eingabe = <>;

	chomp $eingabe;
	$eingabe = lc $eingabe;

	if ($eingabe ne 'q' && $eingabe !~ /^#/)
	{
		print "Search started...\n";
		if (exists $reverseIndex->{$eingabe})
		{
			print "Suchwort '" . $eingabe . "' --- gefundene Dokumente: \n";
			my $i = 1;
			foreach my $fileId (sort keys ${$reverseIndex->{$eingabe}})
			{
				printf "\t%5d : %30s = %3d\n", $i++, $reverseIndex->{'#'.$fileId}, ${$reverseIndex->{$eingabe}}->{$fileId};
				if ($i % 5 == 0)
				{
					print "\t... weiter [j/n]\n";
					my $next = <>;
					chomp $next;
					if ($next ne 'j')
					{
						last;
					}
				}
			}
		}
		else
		{
			print "Suchwort '" . $eingabe . "' nicht gefunden\n";
		}
	}
}while ($eingabe ne 'q');