#!/usr/bin/perl -w # ############################################################################ # # Name: simplerobot.pl # Author: pete@rasterweb.net # # $Id: simplerobot.pl,v 1.3 2003/10/07 18:27:32 pete Exp pete $ # ############################################################################ ############################################################################ # use modules use WWW::SimpleRobot; ############################################################################ # get input my $checkurl = shift (@ARGV); if ( (!(defined($checkurl))) or ($checkurl eq "") ) { print "\n\tusage: simplerobot.pl http://www.example.com/\n\n"; exit; } ############################################################################ # create robot object my $robot = WWW::SimpleRobot->new( URLS => [ $checkurl ], FOLLOW_REGEX => "^$checkurl", DEPTH => 20, TRAVERSAL => 'depth', VISIT_CALLBACK => sub { my ( $url, $depth, $html, $links ) = @_; print "Visiting: $url\n"; print " Depth: $depth\n"; # print "HTML = $html\n"; print " Links: " . @$links . "\n\n"; push (@allurls, $url); } , BROKEN_LINK_CALLBACK => sub { my ( $url, $linked_from, $depth ) = @_; print "$url looks like a broken link on $linked_from\n"; print "Depth = $depth\n"; } ); ############################################################################ # do the traversal $robot->traverse; my @urls = @{$robot->urls}; my @pages = @{$robot->pages}; for my $page ( @pages ) { my $url = $page->{url}; my $depth = $page->{depth}; my $modification_time = $page->{modification_time}; } ############################################################################ # print report of all urls, sorted @allurls = sort (@allurls); print "\nThe following urls were found:\n\n"; foreach $furl (@allurls) { print " $furl\n"; } print "\n\n"; __END__ =head1 NAME simplerobot.pl =head1 DESCRIPTION This script acts like a search engine spider, what it can see and index is what a search engine can see and index. =head1 AUTHOR pete Epete@rasterweb.netE =head1 LICENSE This is free software, you may use it and distribute it under the same terms as Perl itself. =cut