LinHES Forums
http://forum.linhes.org/

Missing Mythmovietime and Apple Trailers Broke
http://forum.linhes.org/viewtopic.php?f=25&t=20968
Page 2 of 2

Author:  huntermcdole [ Mon Jul 05, 2010 9:41 pm ]
Post subject: 

Found a replacement grabber that seems to work for me;

http://github.com/Jonty/Googlemovies/bl ... emovies.pl

Code:
#!/usr/bin/perl
use warnings;
use strict;

use LWP::Simple;
use HTML::Entities;
use HTML::TreeBuilder;
use XML::Writer;

# Google url. You shoudn't need to change this unless fetching totally fails.
# Just change the domain to your local google, i.e. google.com to google.de
my $googleurl = "http://www.google.com/movies?near=";

# Set to 1 to fetch only first page of results
my $fetch_pages = 10;

# Otherwise we can get complaints when unicode is output
binmode STDOUT, ':utf8';

# Fetch the postcode/location to use from the args
# You can also use city name, "New York", "London"
my $location = join '+', @ARGV; # join args with '+' to be able to pass i.e. "New+York" in the url

if (!$location) {
    print "No postcode/location passed in arguments!\n";
    exit;
}

my $out = '';
my $xml = new XML::Writer(
    OUTPUT => $out,
    DATA_MODE => 1,
    DATA_INDENT => 2
);

$xml->xmlDecl();
$xml->startTag('MovieTimes');

my $start = 0;
parse_html(fetch_html($googleurl.$location));

$xml->endTag(); # MovieTimes
$xml->end();

# Tada!
print $out;

sub fetch_html {
    my $response = get(shift() . '&start='.$start);

    if (!defined $response) {
        print "Failed to fetch movie times, did you pass a valid postcode?\n";
        exit;
    }

    return $response;
}

sub parse_html {
    my $tree = HTML::TreeBuilder->new();
    $tree->parse(shift);
    $tree->eof;

    my @rows = $tree->look_down('_tag', 'div', class => 'theater');
    foreach my $row (@rows) {
        $xml->startTag('Theater');
        $xml = parse_cinema($xml, $row);

        my @movierows = $row->look_down('_tag', 'div', class => 'movie');
        $xml->startTag('Movies');
        $xml = parse_movies($xml, @movierows);
        $xml->endTag(); # Movies

        $xml->endTag(); # Theater
    }

    if (--$fetch_pages > 0) {
        my $url = parse_navbar($tree);
        if ($url) {
            parse_html(fetch_html($url)) if $url;
        }
    }
}

sub parse_navbar {
    my $tree = shift;
    my $next_start = $start+10;
    my $return_url;
    my $rooturl = $googleurl;
    $rooturl =~s/^(http:...*?)(\/.*)$/$1/i;

    # look for a link with 'start=$nextstart'
    if (my $navbar = $tree->look_down('_tag', 'div', id => 'navbar')) {
        my @links = $navbar->look_down('_tag', 'a');
        foreach my $a (@links) {
            if ($a->attr('href') =~/^\/movies\?.*start=$next_start$/) {
                if ($a->attr('href') !~/^http:/) {
                    $return_url = $rooturl.$a->attr('href');
                } else {
                    $return_url = $a->attr('href');
                }
                $start = $next_start;
                last;
            }
        }
    }
    return $return_url;
}

sub parse_cinema {
    my ($xml, $cinema) = @_;

    my $name = ($cinema->look_down('_tag', 'h2', class => 'name'))[0]->as_text;
    $name =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp
    $xml->dataElement('Name', $name);

    my $address = ($cinema->look_down('_tag', 'div', class => 'info'))[0]->as_text;
    $address =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp
    $xml->dataElement('Address', $address);

    return $xml;
}

sub parse_movies {
    my $xml = shift;

    foreach my $movierow (@_) {
        $xml->startTag('Movie');

        my $name = ($movierow->look_down('_tag', 'div', class => 'name'))[0]->as_text;
        $xml->dataElement('Name', $name);

        my $info = ($movierow->look_down('_tag', 'span', class => 'info'))[0]->as_text;
        if ($info) {
            my @imgs = $movierow->look_down('_tag', 'img');
            foreach my $img (@imgs) {
                if ($img->attr('alt') =~ /(\d.*$)/i) {
                    $xml->dataElement('Rating', $1);
                }
            }

            if ($info =~ /(\d+\w+\s*\d+\w+)/i) {
                $xml->dataElement('RunningTime', $1);
            }
        }

        my $showtimes = ($movierow->look_down('_tag', 'div', class => 'times'))[0]->as_text;
        if ($showtimes) {
            $showtimes =~ s/[\xC2\xA0]+//g; # Myth can't handle UTF8 nbsp

            # Occasionally this line also contains information about subtitles
            $showtimes =~ /^(.*?)(\d.*$)/;
            my ($info, $times) = ($1, $2);
            $times =~ s/\s+/, /g;

            $xml->dataElement('ShowTimes', $info.$times);
        }

        $xml->endTag(); #Movie
    }

    return $xml;
}


Author:  tscholl [ Tue Jul 06, 2010 10:26 am ]
Post subject: 

Great find! It works great and simple to implement.

In Settings just replace
Code:
Grabber:  /usr/bin/ignyte --zip %z --radius %r

with
Code:
Grabber:  /path_2_file/googlemovies.pl --zip %z --radius %r

and your all set.

Thanks huntermcdole!

Author:  huntermcdole [ Tue Jul 06, 2010 11:09 am ]
Post subject: 

I just used
Code:
/path_2_file/googlemovies.pl %z
not sure it supports the others

Author:  christ [ Tue Jul 06, 2010 7:53 pm ]
Post subject: 

huntermcdole wrote:
I just used
Code:
/path_2_file/googlemovies.pl %z
not sure it supports the others

This is correct, the script only expects the %z option.

Very nice find BTW, now I can actually use the movie listings! Now if only I could find a way to expand the range. "near" for me is not "near" enough. Another 10km range would be perfect.

Page 2 of 2 All times are UTC - 6 hours
Powered by phpBB® Forum Software © phpBB Group
http://www.phpbb.com/