diff options
| author | Stefan Suhren <suhren.stefan@fh-swf.de> | 2015-12-02 16:52:58 +0100 |
|---|---|---|
| committer | Stefan Suhren <suhren.stefan@fh-swf.de> | 2015-12-02 16:53:25 +0100 |
| commit | 75ac48742bdf2826a135375fe1acf5f399bbbaf3 (patch) | |
| tree | 6ecc9a10b9165f3a23eee282b533731e7912840c /Aufgabe5/searchengine.pm | |
| parent | 448a7d3303561035df18267c6ca1e5d9df55c049 (diff) | |
| download | Skriptsprachen-75ac48742bdf2826a135375fe1acf5f399bbbaf3.tar.gz Skriptsprachen-75ac48742bdf2826a135375fe1acf5f399bbbaf3.zip | |
Create word array and test it
Diffstat (limited to 'Aufgabe5/searchengine.pm')
| -rw-r--r-- | Aufgabe5/searchengine.pm | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/Aufgabe5/searchengine.pm b/Aufgabe5/searchengine.pm index 9a1690b..9a70367 100644 --- a/Aufgabe5/searchengine.pm +++ b/Aufgabe5/searchengine.pm @@ -33,10 +33,10 @@ use open ":encoding(UTF-8)"; our @ISA= qw( Exporter ); # these CAN be exported. -our @EXPORT_OK = qw( buildStoplist ); +our @EXPORT_OK = qw( buildStoplist getWords ); # these are exported by default. -our @EXPORT = qw( buildStoplist ); +our @EXPORT = qw( buildStoplist getWords ); sub buildStoplist { my ( $stoplistFileName, $stoplist ) = @_; @@ -57,3 +57,25 @@ sub buildStoplist { or warn "$0 : failed to close input file '$stoplistFileName' : $!\n"; } ## --- end sub buildStoplist + + +sub getWords { + my ( $text, $stoplist ) = @_; + + defined $text or die "Text must be supplied"; + defined $stoplist or die "Stoplist hash must be supplied"; + + # Split at whitespaces + my @words = split /[[:space:]]+/, $text; + + # Apply regex + @words = map /([[:lower:]]{3,})/i , @words; + + # Convert to lower case + @words = map {lc $_} @words; + + # Remove all words that are in the stoplist + @words = map {!exists $stoplist->{$_} ? ($_) : ()} @words; + + return @words; +} ## --- end sub getWords |
