summaryrefslogtreecommitdiffstats
path: root/Aufgabe5/searchengine.pm
diff options
context:
space:
mode:
authorStefan Suhren <suhren.stefan@fh-swf.de>2015-12-02 16:52:58 +0100
committerStefan Suhren <suhren.stefan@fh-swf.de>2015-12-02 16:53:25 +0100
commit75ac48742bdf2826a135375fe1acf5f399bbbaf3 (patch)
tree6ecc9a10b9165f3a23eee282b533731e7912840c /Aufgabe5/searchengine.pm
parent448a7d3303561035df18267c6ca1e5d9df55c049 (diff)
downloadSkriptsprachen-75ac48742bdf2826a135375fe1acf5f399bbbaf3.tar.gz
Skriptsprachen-75ac48742bdf2826a135375fe1acf5f399bbbaf3.zip
Create word array and test it
Diffstat (limited to 'Aufgabe5/searchengine.pm')
-rw-r--r--Aufgabe5/searchengine.pm26
1 files changed, 24 insertions, 2 deletions
diff --git a/Aufgabe5/searchengine.pm b/Aufgabe5/searchengine.pm
index 9a1690b..9a70367 100644
--- a/Aufgabe5/searchengine.pm
+++ b/Aufgabe5/searchengine.pm
@@ -33,10 +33,10 @@ use open ":encoding(UTF-8)";
our @ISA= qw( Exporter );
# these CAN be exported.
-our @EXPORT_OK = qw( buildStoplist );
+our @EXPORT_OK = qw( buildStoplist getWords );
# these are exported by default.
-our @EXPORT = qw( buildStoplist );
+our @EXPORT = qw( buildStoplist getWords );
sub buildStoplist {
my ( $stoplistFileName, $stoplist ) = @_;
@@ -57,3 +57,25 @@ sub buildStoplist {
or warn "$0 : failed to close input file '$stoplistFileName' : $!\n";
} ## --- end sub buildStoplist
+
+
+sub getWords {
+ my ( $text, $stoplist ) = @_;
+
+ defined $text or die "Text must be supplied";
+ defined $stoplist or die "Stoplist hash must be supplied";
+
+ # Split at whitespaces
+ my @words = split /[[:space:]]+/, $text;
+
+ # Apply regex
+ @words = map /([[:lower:]]{3,})/i , @words;
+
+ # Convert to lower case
+ @words = map {lc $_} @words;
+
+ # Remove all words that are in the stoplist
+ @words = map {!exists $stoplist->{$_} ? ($_) : ()} @words;
+
+ return @words;
+} ## --- end sub getWords