# ODP::Site - An ODP public page site handling class (Version 0.01) # Copyright (C)2002 Richard P. Fuller # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package ODP::Site; use strict; use LWP::UserAgent; use URI::Escape; # new - Initialises a new ODP::Site object # Parameters: category # Returns: ODP::Site object sub new ($$$$$) { my $object = {}; if (!$_[1]) { die 'No URL supplied.'; } $object->{'url'} = $_[1]; $object->{'title'} = $_[2]; $object->{'desc'} = $_[3]; $object->{'mediadate'} = $_[4]; $object->{'cool'} = $_[5]; $object->{'category'} = $_[6]; $object->{'agetag'} = $_[7]; if ($object->{'mediadate'}) { $object->{'mediadate_iso'} = _iso8601ify($object->{'mediadate'}); } return bless $object; } # convert - Converts the character set of the title and description # Parameters: from charset, to charset # Returns: sub convert($$) { my $self = shift; my ($from, $to) = @_; require Text::Iconv; Text::Iconv->import(); my $converter = Text::Iconv->new($from, $to); $self->{'title'} = $converter->convert($self->{'title'}); $self->{'desc'} = $converter->convert($self->{'desc'}); } # httpstatus - Returns the HTTP status code of the URL # Parameters: get? (if true, get the page rather than head, then use the content to find error pages that pretend to be 200) # Returns status (Integer) sub httpstatus () { my $self = shift; my $get = shift; my $ua = LWP::UserAgent->new; $ua->agent("ODP::/0.01"); my $req; if ($get) { $req = HTTP::Request->new(GET => $self->{'url'}); } else { $req = HTTP::Request->new(HEAD => $self->{'url'}); } my $res = $ua->request($req); my $movedto = ''; my $status = $$res{'_rc'}; # 500 really is quite unhelpful here if ($status == 500) { if ($res->message() =~ /^Can't connect to .*? \(Bad hostname/) { $status = -1; } if ($res->message() =~ /^Can't connect to .*? \(connect: Connection refused/) { $status = -4; } if ($res->message() =~ /^Can't connect to .*? \(connect: timeout/) { $status = -5; } } if ($status =~ /^2/) { # Check for redirects my $previous = $res->previous; if ($previous) { # There was a redirect $status = $$previous{'_rc'}; my $headers = $previous->headers(); $movedto = $$headers{'location'}; } } return $status; #return (split(/ /,$res->status_line))[0]; } # httpget - Returns the content of a URL # Parameters: None # Returns content of an HTTP get plus a status code sub httpget () { my $self = shift; my $ua = LWP::UserAgent->new; $ua->agent("ODP::/0.01"); my $req = HTTP::Request->new(GET => $self->{'url'}); my $res = $ua->request($req); my $movedto = ''; my $status = $$res{'_rc'}; # 500 really is quite unhelpful here if ($status == 500) { if ($res->message() =~ /^Can't connect to .*? \(Bad hostname/) { $status = -1; } if ($res->message() =~ /^Can't connect to .*? \(connect: Connection refused/) { $status = -4; } if ($res->message() =~ /^Can't connect to .*? \(connect: timeout/) { $status = -5; } } if ($status =~ /^2/) { # Check for redirects my $previous = $res->previous; if ($previous) { # There was a redirect $status = $$previous{'_rc'}; my $headers = $previous->headers(); $movedto = $$headers{'location'}; } } return $res->content, $status; #return (split(/ /,$res->status_line))[0]; } # editurl - Returns the edit URL for the URL # Parameters: none # Returns URL (String) sub editurl() { my $self = shift; return "http://dmoz.org/editors/editurl.cgi?url=".uri_escape($self->{'url'})."&cat=$self->{'category'}"; } # _iso8601ify - Convert an ODP public page date into an ISO8601 compliant date # Parameters: date in ODP public page format # Returns: date in ISO8601 format # TODO: Handle World dates sub _iso8601ify() { my $date = shift; if ($date =~ m!^\d\d\d\d$!) { return $date; } elsif ($date =~ m!^(\w+) (\d+), (\d+)$!) { my ($month, $day, $year) = ($1, $2, $3); my %months = ('January'=>1,'February'=>2,'March'=>3,'April'=>4,'May'=>5,'June'=>6,'July'=>7,'August'=>8,'September'=>9,'October'=>10,'November'=>11,'December'=>12); my $a_month = '0'x(2-length($months{$month})).$months{$month}; my $a_day = '0'x(2-length($day)).$day; my $a_year = $year; return "$a_year-$a_month-$a_day"; } else { return; } } 1;