WWW::Muxtape::Scraper
http://github.com/dann/muxtape-scraper/tree/master
先日作ったMuxtapeのscraperをMoose化してみました。
package WWW::Muxtape::Scraper; use Moose; use Module::Pluggable::Fast name => 'scrapers', search => [qw( WWW::Muxtape::Scraper::Pages )]; use String::CamelCase qw( decamelize ); our $VERSION = '0.01'; around 'new' => sub { my ( $next, $class, @args ) = @_; $class->_load_scrapers; return $next->( $class, @args ); }; sub _load_scrapers { foreach my $scraper ( __PACKAGE__->scrapers ) { my ($name) = decamelize( ref $scraper ) =~ /(\w+)$/; __PACKAGE__->meta->add_attribute( $name => ( is => "rw", default => sub { return $scraper; } ) ); } } 1;
実際のscraper定義は、WWW::Muxtape::Scraper::Pages::XXXのモジュールが担当します。
package WWW::Muxtape::Scraper::Pages::TapePage; use Moose; use MooseX::Method; use Web::Scraper; has 'rule' => ( is => 'rw', default => sub { scraper { process 'div.flag h1', 'title', 'TEXT'; process 'div.flag h2', 'description', 'TEXT'; process 'a.drawer_control', 'fans', sub { if ( $_->as_text =~ m/([0-9]+) fans/ ) { return $1; } else { return '0'; } }; process 'li.stripe', 'songs[]' => scraper { process 'span.artist', 'artist', 'TEXT'; process 'span.title', 'title', 'TEXT'; }; }; }, ); method scrape => named( tapename => { isa => 'Str', required => 1 }, ) => sub { my ( $self, $args ) = @_; my $result = $self->rule->scrape( $self->_get_url( tapename => $args->{tapename} ) ); return $result; }; method _get_url => named( tapename => { isa => 'Str', required => 1 }, ) => sub { my ( $self, $args ) = @_; my $url = 'http://' . $args->{tapename} . '.muxtape.com'; return URI->new($url); }; 1;
使うときは、以下のような感じで使えます。
use WWW::Muxtape::Scraper; my $muxtape = WWW::Muxtape::Scraper->new; my $tape_lists = $muxtape->top_page->scrape(); my $tapes = []; foreach my $tape ( @{$tape_lists}) { push @{$tapes}, $muxtape->tape_page->scrape(tapename => $tape->{tapename}); } print Dumper $tapes;
TODO
WWW::Muxtape::Scraper::Pages::XXXのベースクラスを用意したいですね。