#!perl #print "BLA"; use WWW::Mechanize; use DateTime::Format::Strptime; use Encode; use EBook::MOBI; use Crypt::SSLeay; require IO::Socket::SSL; open FILE, "<", "lastone.txt" or die $!; my @lines = ; print @lines; close FILE; my $book = EBook::MOBI->new(); # give some meta information about this book $book->set_filename('./netzpolitik_daily.mobi'); $book->set_title ('Netzpolitik Daily'); $book->set_author ('netzpolitik.org'); $book->set_encoding(':encoding(UTF-8)'); # lets create our own title page! $book->add_mhtml_content( '

Netzpolitik Daily

' ); $book->add_pagebreak(); $book->add_toc_once('Inhalt'); $book->add_pagebreak(); my $bot = WWW::Mechanize->new(); $bot->agent_alias( 'Windows IE 6' ); $bot->add_header( Encoding => 'text/UTF-8' ); $|++; my @results; for (my $page=1;$page<4;$page++) { $response = $bot->get("http://www.netzpolitik.org/");#page/" . $page . "/index.php"); print "\nGETting http://www.netzpolitik.org/page/" . $page . "/index.php"; $html = $response->decoded_content(); #$html = encode 'utf8', $bot->content; #print $html; #entry-date">29.06.2012 um 12:13h(.+?)<\/a>.+?zeigen von (\D+?)">/migs) { while( $html =~ /entry-title">(.+?)<\/a>.+?zeigen von (\D+?)">.+?class="entry-date">(.+?) um (.+?)h/migs) { my ($key1, $key2, $key3, $key4, $key5) = ($1, $2, $3, $4, $5); #print "TREFFER: \n"; push @results, { title => $key2, url => $key1, autor => $key3, date => $key4, time => $key5, }; } } print "\n\nI got all the data, lets print it:\n\n"; my $outfile = 'Netzpolitik Daily

'; my $artcount=0; foreach my $result (@results) { $strtime = "$result->{date} $result->{time}"; #print $strtime; my $parser = DateTime::Format::Strptime->new( pattern => '%d.%m.%Y %H:%M' ); my $dt = $parser->parse_datetime($strtime); $tc = $dt->epoch; $hoursfromnow = ((time() - $tc) / 3600); if ($hoursfromnow < 24) { $artcount++; $response = $bot->get($result->{url}); $tmp = $response->decoded_content(); $tmp =~ /articleBody">(.+?)<\/div>