WWW::Muxtape::Scraper

http://github.com/dann/muxtape-scraper/tree/master
先日作ったMuxtapeのscraperをMoose化してみました。

package WWW::Muxtape::Scraper;
use Moose;
 
use Module::Pluggable::Fast
    name => &#39;scrapers&#39;,
    search => [qw( WWW::Muxtape::Scraper::Pages )];
use String::CamelCase qw( decamelize );
 
our $VERSION = &#39;0.01&#39;;
 
around &#39;new&#39; => sub {
    my ( $next, $class, @args ) = @_;
    $class->_load_scrapers;
    return $next->( $class, @args );
};
 
sub _load_scrapers {
    foreach my $scraper ( __PACKAGE__->scrapers ) {
        my ($name) = decamelize( ref $scraper ) =~ /(\w+)$/; 
        __PACKAGE__->meta->add_attribute(
            $name => (
                is => "rw",
                default => sub {
                    return $scraper;
                }
            )
        );
    }
}
 
1;

実際のscraper定義は、WWW::Muxtape::Scraper::Pages::XXXのモジュールが担当します。

package WWW::Muxtape::Scraper::Pages::TapePage;
use Moose;
use MooseX::Method;
 
use Web::Scraper;
 
has &#39;rule&#39; => (
    is => &#39;rw&#39;,
    default => sub {
        scraper {
            process &#39;div.flag h1&#39;, &#39;title&#39;, &#39;TEXT&#39;;
            process &#39;div.flag h2&#39;, &#39;description&#39;, &#39;TEXT&#39;;
            process &#39;a.drawer_control&#39;, &#39;fans&#39;, sub {
                if ( $_->as_text =~ m/([0-9]+) fans/ ) {
                    return $1;
                }
                else {
                    return &#39;0&#39;;
                }
            };
            process &#39;li.stripe&#39;, &#39;songs[]&#39; => scraper {
                process &#39;span.artist&#39;, &#39;artist&#39;, &#39;TEXT&#39;;
                process &#39;span.title&#39;, &#39;title&#39;, &#39;TEXT&#39;;
            };
        };
    },
);
 
method scrape => named(
    tapename => { isa => &#39;Str&#39;, required => 1 },
) => sub {
    my ( $self, $args ) = @_;
    my $result = $self->rule->scrape(
        $self->_get_url( tapename => $args->{tapename} ) );
    return $result;
};
 
method _get_url => named(
    tapename => { isa => &#39;Str&#39;, required => 1 },
) => sub {
    my ( $self, $args ) = @_;
    my $url = &#39;http://&#39; . $args->{tapename} . &#39;.muxtape.com&#39;;
    return URI->new($url);
};

1;

使うときは、以下のような感じで使えます。

    use WWW::Muxtape::Scraper;
    my $muxtape = WWW::Muxtape::Scraper->new;
    my $tape_lists = $muxtape->top_page->scrape();
    my $tapes = []; 
    foreach my $tape ( @{$tape_lists}) {
        push @{$tapes}, $muxtape->tape_page->scrape(tapename => $tape->{tapename});
    }   
    print Dumper $tapes;

TODO

WWW::Muxtape::Scraper::Pages::XXXのベースクラスを用意したいですね。