Web::Scraper - search.cpan.org
use Web::Scraper; - 今日のCPANモジュール(跡地)
www.slideshare.net
sudo apt-get install libweb-scraper-perl
sudo apt-get install libxml-treepp-perl
sudo apt-get install libxml-feedpp-perl
sudo apt-get install libunicode-japanese-perl
$ sudo apt-get install build-essential cpanminus
$ export PERL5LIB="$HOME/perl5/lib/perl5:$PERL5LIB"
$ export PATH="$HOME/perl5/bin:$PATH"
$ cpanm --installdeps .
$ cpanm Text::Data::Dumper
http://search.cpan.org/~miyagawa/Web-Scraper-0.38/
http://search.cpan.org/~cjm/HTML-Tree-5.909-TRIAL/
http://search.cpan.org/~mirod/HTML-TreeBuilder-XPath-0.14/
http://search.cpan.org/~corion/HTML-Selector-XPath-0.18/
http://search.cpan.org/~mirod/XML-XPathEngine-0.14/
http://search.cpan.org/~neilb/UNIVERSAL-require-0.18/
#!/usr/bin/perl use strict; use warnings; use Web::Scraper; use URI; my $uri = new URI('http://www.ehime-np.co.jp/news/local/'); my $scraper = scraper { process '#Contents_L > div > div.news_lists > ul > li > a', 'list[]' => { link => '@href', title => 'TEXT' }; }; my $res = $scraper->scrape($uri); foreach my $list (@{$res->{list}}) { print $list->{title},"\n"; print $list->{link},"\n"; }