# ODP::Search - An ODP public page search results handling class (Version 0.01) # Copyright (C)2002 Richard P. Fuller # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package ODP::Search; use strict; use ODP::Site; use ODP::Category; # new - Initialises a new ODP::Search object # Parameters: search term, category (optional), restrict to that category (optional, boolean) # Returns: ODP::Site object sub new ($$$) { my $object = {}; if (!$_[1]) { die 'No search term supplied.'; } $object->{'term'} = $_[1]; $object->{'category'} = ODP::Category::normalise($_[2]); $object->{'restrict'} = $_[3]; return bless $object; } # fetch - Fetches the search results page and stores the source in the object # Parameters: Page # Returns: sub fetch($) { my $self = shift; my $page = $_[0]; my $term = $self->{'term'}; my $start; if ($page>1) { $start = '&start='.((($page-1)*20)+1); } my $end; if ($self->{'category'}) { $end = "&cat=".$self->{'category'}; if ($self->{'restrict'}) { $end .= "&all=no"; } } $self->{'content'}[$page] = LWP::Simple::get("http://search.dmoz.org/cgi-bin/search?search=$term$start$end"); } # resultcount - Returns the number of search results for the term # Parameters: # Returns: Number of search results (integer) sub resultcount { my $self = shift; if (!$self->{'content'}[1]) { $self->fetch(1); } my $page = $self->{'content'}[1]; my $return; if ($page =~ /Open Directory Sites<\/b><\/font> \([^\-]+\-[^ ]+ of ([^\)]+)\)

/) { $return = $1; } return $return; } # result - Returns a search result (Note: first result is result(1)... also, last result can be referenced as result(-1)) # Parameters: Number of result # Returns: ODP::Site object for the search result sub result($) { my $self = shift; my $whichresult = $_[0]; if ($whichresult < 0) { $whichresult = $self->resultcount() + $whichresult + 1; } if ($self->{'results'}[$whichresult]) { return $self->{'results'}[$whichresult]; } use integer; my $page = ((($whichresult-1)/20)+1); my $resultnumber = (($whichresult-1) % 20); if (!$self->{'content'}[$page]) { $self->fetch($page); } my $content = $self->{'content'}[$page]; my @localresults; $content =~ s/<\/?b>//g; my $url; my $title; my $desc; my $category; my $cool; my $n=0; while ($content =~ s/

  • ([^<]+)<\/a>(Editor's Choice   )? \- (.*?)
    \-\- [^ ]+  
    [^<]+<\/a>//) { $n++; $url = $1; if ($3){$cool = 1}else{$cool = 0} $title = $2; $desc = $4; $category = $5; push @localresults, new ODP::Site($url, $title, $desc, '', $cool, $category); } $self->{'results'}[$whichresult] = $localresults[$resultnumber]; return $localresults[$resultnumber]; } 1;