Here is the help output:
Code:
TV.com Batch Grabber (v1.12) by Jesse Anderson based on code by Tim Harvey, Andrei Rjeousski
Looks through the TV shows held in the videos database and performs queries using the www.tv.com website.
usage: ./tvbatchgrabber.pl -hviANHUP [parameters]
-h help
-v display version
-i display info
-A Get TV.com data for ALL movies in database
-N Get TV.com data for NEW movies in database (DEFAULT)
-Host Specify a new host (DEFAULT:127.0.0.1)
-User Specify a new user (DEFAULT:mythtv)
-Password Specify a new password (DEFAULT:mythtv)
-Images Whether to download images or not (Default:No)
-ImageDir Specify a new directory for images (DEFAULT:/myth/movie_posters/)
-SpecifyImage The fully qualified path of the image (NOTE: You must specify a show name)
-Type Specify a format to figure out season/episode (DEFAULT:0)
Currently Supported:
0: Show Name-S1-E1-Episode Name.avi or Show Name-S1-E2.avi
1: Show Name - S01E01 - Episode Name.avi
-ShowName Only process a certain show (DEFAULT:ALL)
Program Description:
You have probably used the Utilities/Setup->Video Manager->Search IMDB function to get the data for your movie of IMDB. However, there was not a way to get TV show data. Batch TV.com Grabber does all of this and gets the images for those shows from the IMDB.
Process Description:
First, it gets all of the database entries based on the ALL or NEW selection. Then it queries TV.com to get the number. If there are multiple search hits, it gives you a list and a prompt to choose the right one. Then it gets the data from TV.com and puts it in the database. Finally all of images are downloaded.
I don't have a link where you can download it yet. For now you will have t o create your own file and make it executable. Here is the code for tvbatchgrabber.pl:
Code:
#!/usr/bin/perl
#
# This perl script is intended to perform movie data lookups based on
# the popular www.imdb.com website
#
# For more information on MythVideo's external movie lookup mechanism, see
# the README file in this directory.
#
# Author: Tim Harvey (tharvey AT alumni.calpoly DOT edu)
# Modified: Andrei Rjeousski
# Modified Again: Jesse Anderson
# v1.1
# - Added amazon.com covers and improved handling for imdb posters
# v1.2
# - when searching amazon, try searching for main movie name and if nothing is found, search for informal name
# - better handling for amazon posters, see if movie title is a substring in the search results returned by amazon
# - fixed redirects for some movies on impawards
use DBI;
use LWP::Simple; # libwww-perl providing simple HTML get actions
use HTML::Entities;
use URI::Escape;
use XML::Simple;
use Getopt::Long;
$title = "TV.com Batch Grabber";
$version = "v1.12";
$author = "Jesse Anderson based on code by Tim Harvey, Andrei Rjeousski";
my $host = '127.0.0.1';
my $db = 'mythconverg';
my $db_user = 'mythtv';
my $db_password = 'mythtv';
my $sql;
my $fileNameType = 0;
my $imageDir = '/myth/movie_posters/';
my $showName = '';
my $specifyImage = '';
my $hflag = '';
my $iflag = '';
my $vflag = '';
my $Nflag = '';
my $Aflag = '';
my $Imageflag = '';
GetOptions("h"=>\$hflag,
"i"=>\$iflag,
"v"=>\$vflag,
"N"=>\$Nflag,
"A"=>\$Aflag,
"Images"=>\$Imageflag,
"Host=s"=>\$host,
"Username=s"=>\$db_user,
"Password=s"=>\$db_password,
"Type=i"=>\$fileNameType,
"ImageDir=s"=>\$imageDir,
"ShowName=s"=>\$showName,
"SpecifyImage=s"=>\$specifyImage
);
# print out info
if ( $vflag ) { version(); exit 1; }
if ( $iflag ) { info(); exit 1; }
# print out usage if needed
if ( $hflag ) { help(); }
if ( $fileNameType == 0 ) {
#Show Name-S1-E1-Episode Name.avi or Show Name-S1-E2.avi
$fileNameSearch = "%$showName%-S%-E%.%";
} elsif ( $fileNameType == 1 ) {
#Show Name - S01E01 - Episode Name.avi
$fileNameSearch = "%$showName% - S%E% - %";
}
if ( $Aflag ) {
print "Processing all database entries\n\n";
#Do all database hits that are in the database regardless of data
$sql = "select inetref, filename, intid from videometadata where filename like \"$fileNameSearch\" order by filename";
startBatchProcess();
}
elsif ( $Nflag ) {
print "Processing new database entries\n\n";
#Do all database hits that haven't been found on IMDB already
$sql = "select inetref, filename, intid from videometadata where inetref = 00000000 and filename like \"$fileNameSearch\" order by filename";
startBatchProcess();
}
elsif ( length( $specifyImage ) != 0 ) {
if ( $showName eq '' ) {
die "You must specify a show name. Please use the -ShowName arguement and specify a new one."
}
print "Changing the image file for $showName\n\n";
#Do all database hits that haven't been found on IMDB already
$sql = "update videometadata set coverfile = \'$specifyImage\' where filename like \"$fileNameSearch\"";
$dbh = DBI->connect("dbi:mysql:$db:$host","$db_user","$db_password");
$sth = $dbh->prepare($sql);
$sth->execute ||
die "Could not execute SQL statement ... maybe invalid?";
}
else {
help();
}
# display usage
sub usage {
print "usage: $0 -hviANHUP [parameters]\n";
print " -h help\n";
print " -v display version\n";
print " -i display info\n";
print "\n";
print " -A Get TV.com data for ALL movies in database\n";
print " -N Get TV.com data for NEW movies in database (DEFAULT)\n";
print " -Host Specify a new host (DEFAULT:$host)\n";
print " -User Specify a new user (DEFAULT:$db_user)\n";
print " -Password Specify a new password (DEFAULT:$db_password)\n";
print " -Images Whether to download images or not (Default:No)\n";
print " -ImageDir Specify a new directory for images (DEFAULT:$imageDir)\n";
print " -SpecifyImage The fully qualified path of the image (NOTE: You must specify a show name)\n";
print " -Type Specify a format to figure out season/episode (DEFAULT:$fileNameType)\n";
print " Currently Supported:\n";
print " 0: Show Name-S1-E1-Episode Name.avi or Show Name-S1-E2.avi\n";
print " 1: Show Name - S01E01 - Episode Name.avi\n";
print " -ShowName Only process a certain show (DEFAULT:ALL)\n";
exit(-1);
}
# display 1-line of info that describes the version of the program
sub version {
print "$title ($version) by $author\n"
}
# display 1-line of info that can describe the type of query used
sub info {
print "Looks through the TV shows held in the videos database and performs queries using the www.tv.com website.\n";
}
# display detailed help
sub help {
version();
info();
usage();
}
$dbh, sth;
sub startBatchProcess {
$dbh = DBI->connect("dbi:mysql:$db:$host","$db_user","$db_password");
$sth = $dbh->prepare($sql);
print $sql;
$sth->execute ||
die "Could not execute SQL statement ... maybe invalid?";
my @allmovies;
my $allMoviesCount = 0;
my @originalMovieName;
my @selectedNum;
my @moviePossibilities;
#process all database results and get IMDB's list of hits
print "\n\n\n\nTime to get all database entries and query TV.com...\nWorking";
my $previousMovieName = "";
#Create a list of the shows so that we can get their data
while (@row=$sth->fetchrow_array)
{
@tempMovieDataArray = getMovieDataFromName( getMovieNameFromPath( $row[1] ) );
if ( $previousMovieName eq $tempMovieDataArray[0] ) {
#Do Nothing
} else {
@tempMovieArray = getMovieList( $tempMovieDataArray[0] );
$allmovies[$allMoviesCount] = [ @tempMovieArray ];
$originalMovieName[$allMoviesCount] = $tempMovieDataArray[0];
$allMoviesCount++;
$previousMovieName = $tempMovieDataArray[0];
}
}
#Get user input for whichever movies dont have a direct match.
print "\n\n\n\nDone getting TV.com show list.\nTime for you to sort out any shows that had more than one hit...";
for $i ( 0 .. $#allmovies ) {
@allmoviesrow = $allmovies[$i];
for $j ( 0 .. $#allmoviesrow ) {
$row = $allmoviesrow[$j];
if ( $#{$row} > 0 ) {
print "\n\n\nFor \"$originalMovieName[$i]\" TV.com has the following names:\n";
for $h ( 0 .. $#{$row} ) {
$tempH = $h + 1;
print "$tempH-$row->[$h][0]-$row->[$h][1]\n";
}
print "\nPlease input corresponding movie number:";
$selectedNum = <STDIN>;
$selectedNum--;
} else {
$selectedNum = 0;
}
$selectedNum[$i] = $allmoviesrow[$j];
}
}
#exit(-1);
$sth = $dbh->prepare($sql);
$sth->execute ||
die "Could not execute SQL statement ... maybe invalid?";
#Now get movie data and put in database
print "\n\n\n\nDone getting TV.com refs. Time to get data and put it in database...\nWorking";
while (@row=$sth->fetchrow_array)
{
@tempMovieDataArray = getMovieDataFromName( getMovieNameFromPath( $row[1] ) );
my @selectedNumRow;
#name num workingname
#figure out which on the user selected
for $i ( 0 .. $#originalMovieName ) {
if ( $originalMovieName[$i] eq $tempMovieDataArray[0] ) {
@selectedNumRow = $selectedNum[$i];
}
}
# dbindex real name number working name season
# episode
getMovieData( $row[2], $selectedNumRow[0][0][0], $selectedNumRow[0][0][1], $selectedNumRow[0][0][2], $tempMovieDataArray[1], $tempMovieDataArray[2] );
print ".";
}
####
#Start of IMDB image code!
####
#process all database results and get IMDB's list of hits
if ( $Imageflag ) {
print "\n\n\n\nStarting to find IMDB Id's to download their images.\n\nTime to get all database entries and query IMDB...\nWorking";
$allMoviesCount = 0;
for $i ( 0 .. $#originalMovieName )
{
@tempMovieArray = getIMDBMovieList( $originalMovieName[$i] );
$allmovies[$allMoviesCount] = [ @tempMovieArray ];
$allMoviesCount++;
print ".";
}
#Get user input for whichever movies dont have a direct match.
print "\n\n\n\nDone getting IMDB movie list.\nTime for you to sort out any movies that had more than one hit...";
for $i ( 0 .. $#allmovies ) {
@allmoviesrow = $allmovies[$i];
for $j ( 0 .. $#allmoviesrow ) {
$row = $allmoviesrow[$j];
if ( $#{$row} > 0 ) {
print "\n\n\nFor \"$originalMovieName[$i]\" IMDB has the following names:\n";
for $h ( 0 .. $#{$row} ) {
$tempH = $h + 1;
print "$tempH-$row->[$h][0]-$row->[$h][1]\n";
}
print "\nPlease input corresponding movie number:";
$selectedNum = <STDIN>;
$selectedNum--;
} else {
$selectedNum = 0;
}
$selectedInetref[$i] = $row->[$selectedNum][1];
}
}
#Now get Images
print "\n\n\n\nDone getting IMDB data.\nTime to get images...\nWorking";
for $i ( 0 .. $#selectedInetref ) {
$imageURL = getMoviePoster( $selectedInetref[$i] );
system ( "wget $imageURL -O $imageDir$selectedInetref[$i].jpg -q" );
$sql2 = "update videometadata set coverfile=\"$imageDir$selectedInetref[$i].jpg\" where title like \"$originalMovieName[$i]%\"";
$sth2 = $dbh->prepare($sql2);
$sth2->execute ||
die "Could not execute SQL statement ... maybe invalid?";
print ".";
}
}
####
#End of IMDB image code!
####
print "\n\n\n\nAll Done.\n";
$dbh->disconnect();
}
sub getMovieDataFromName {
my ($moviename)=@_;
$moviename = lc( $moviename );
my @returnData;
if ( $fileNameType == 0 ) {
#Show Name-S1-E1-Episode Name.avi or Show Name-S1-E1.avi
my $strPos1 = index( $moviename, "-" );
my $strPos2 = index( $moviename, "-", $strPos1 + 1 );
my $strPos3 = index( $moviename, "-", $strPos2 + 1 );
#Show Name
$returnData[0] = substr( $moviename, 0, $strPos1 );
#Season
$returnData[1] = substr( $moviename, $strPos1 + 2, $strPos2 - $strPos1 - 2 );
#Episode
$returnData[2] = substr( $moviename, $strPos2 + 2, $strPos3 - $strPos2 - 2 );
if ( $strPos3 == -1 ) {
my $strPos3 = length( $moviename ) - 2;
$returnData[2] = substr( $moviename, $strPos2 + 2, $strPos3 - $strPos2 );
}
return @returnData;
} elsif ( $fileNameType == 1 ) {
#Show Name - S01E01 - Episode Name.avi
my $strPos1 = index( $moviename, "-" );
my $strPos2 = index( $moviename, "e", $strPos1 + 1 );
my $strPos3 = index( $moviename, "-", $strPos2 + 1 );
#Show Name
$returnData[0] = substr( $moviename, 0, $strPos1 - 1 );
#Season
$returnData[1] = substr( $moviename, $strPos1 + 3, $strPos2 - $strPos1 - 3 );
#Episode
$returnData[2] = substr( $moviename, $strPos2 + 1, $strPos3 - $strPos2 - 2 );
return @returnData;
}
}
###### Start of Code for movie list
###
###
###
###
##
##
##
##
sub downloadSeason {
my ($workingname, $movieid, $season, $showFileName)=@_; # grab movieid parameter
print "Downloading season\n";
#hasn't been cached, need to get it and save it
my $request = "http://www.tv.com/$workingname/show/$movieid/episode_guide.html&season=$season";
#print "Request:$request";
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) { printf("%s", $response); }
open( OUTPUT, ">$showFileName" ) or die "can't open $showFileName $!";
print OUTPUT "$response";
close OUTPUT;
#print $response;
}
# get Movie Data
sub getMovieData {
my ($dbindex, $realname, $movieid, $workingname, $season, $episode)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
$showFileName = "/home/mythtv/$realname-$season.htm";
#print "File name is:$showFileName";
$response = "";
if (-e $showFileName ) {
#is cached locally
print "Cache Hit!\n";
open(showFileName) or die("Could not open cached file.");
foreach $line (<showFileName>) {
$response = "$response $line";
}
close(showFileName);
if ( length( $response ) == 0 ) {
print "Something is wrong with the cached file. Trying to download again.\n";
$response = downloadSeason( $workingname, $movieid, $season, $showFileName );
}
} else {
$response = downloadSeason( $workingname, $movieid, $season, $showFileName );
}
if ( $workingname eq "battlestar-galactica-2003" && $season == 1 ) {
#workaround since new battlestar galactica started with the miniseries
$episode = $episode + 2;
}
if ( $workingname eq "the-simpsons" && $season == 7 ) {
#the 7th season started with a Most Wanted Spoof, ignore it
$episode = $episode + 1;
}
$episode = $episode + 0;
$season = $season + 0;
$realname = stripParensFromName( $realname );
$startOfData = index( $response, "<li class=\"box on f-white\">Episode Guide</li>" );
$endOfData = index( $response, "<div id=\"showspace_foot\" class=\"pod\">" );
$response = substr( $response, $startOfData, $endOfData - $startOfData );
$fakeepisode = $episode;
#seasons after one dont start with one, they start at the number of the last season
if ( $season > 1 ) {
$tempFirstSeasonEpisode = parseBetween( $response, "<div class=\"pl-5 pr-5\">", "<span class=\"f-medium\">" );
$tempFirstSeasonEpisode = trim( parseBetween( $response, "class=\"f-big\">", "</a>" ) );
$firstEpisode = substr( $tempFirstSeasonEpisode, 0, index( $tempFirstSeasonEpisode, "." ) );
#convert to number usable with TV.com
$fakeepisode = ( $firstEpisode - 1 ) + $episode;
}
$startOfEpisodeString = index( $response, $fakeepisode . ".\n" );
if ( $startOfEpisodeString == -1 ) {
print "Warning: startOfEpisode not found";
}
$startOfEpisodeData = rindex( $response, "<div class=\"pl-5 pr-5\">", $startOfEpisodeString );
if ( $startOfEpisodeData == -1 ) {
print "Warning: startOfEpisodeData not found";
}
$endOfEpisodeData = index( $response, "<div class=\"divider fl\"></div>", $startOfEpisodeData );
if ( $endOfEpisodeData == -1 ) {
print "Warning: endOfEpisode not found";
}
#Narrow the results down to the part we want
$response = substr( $response, $startOfEpisodeData, $endOfEpisodeData - $startOfEpisodeData );
# parse title and year
my $title = parseBetween( $response, "$fakeepisode.", "</a> <span class=\"f-medium\">" );
$title = trim( $title );
my $year = trim( parseBetween( $response, "First aired:</span> <span class=\"f-red\">", "</span><br/>" ) );
$year = substr( $year, rindex( $year, "/" ) + 1 );
# parse director
my $director = parseBetween($response, "<span class=\"f-bold\">Director:</span>", "/a>");
$director = stripParensFromName( parseBetween($director, ">", "<") );
# parse writer
# (Note: this takes the 'first' writer, may want to include others)
my $writer = parseBetween($response, "<span class=\"f-bold\">Writer:</span>", "/a>");
$writer = stripParensFromName( parseBetween($writer, ">", "<") );
# parse plot
my $plot = parseBetween($response, "<p>", "</p>");
$plot = $dbh->quote( $plot );
# parse user rating
my $userrating = parseBetween($response, "<div class=\"com_score\" style=\"border:0px;width:100px;\">", "/a>");
$userrating = parseBetween($userrating, ">", "<");
#If it can find the data, do the next database row
if ( $title eq "" && $director eq "" && $writer eq "" ) {
print ("Could not find $realname-" . sprintf("%0${pad_len}d", $season ) . "-" . sprintf("%0${pad_len}d", $episode ) . " moving on...\n" );
next;
}
$movierating = "TV";
$runtime = "0";
$cast = "";
$lgenres = "";
$lcountries = "";
$inetref = "TV-$movieid-$season-$episode";
$pad_len = 2;
$title = "$realname-" . sprintf("%0${pad_len}d", $season ) . "-" . sprintf("%0${pad_len}d", $episode ) . "-$title";
$title = $dbh->quote( $title );
#output fields (these field names must match what MythVideo is looking for)
$sql2 = "update videometadata set title=$title, year=$year, director=\"$director\",
plot=$plot, userrating=\"$userrating\", rating=\"$movierating\", length=$runtime,
inetref=\"$inetref\"
where intid=$dbindex";
$sth2 = $dbh->prepare($sql2);
$sth2->execute ||
die "Query Was:$sql2\n\nCould not execute SQL statement ... maybe invalid?";
#print "Title:$title\n";
#print "Year:$year\n";
#print "Director:$director\n";
#print "Plot:$plot\n";
#print "UserRating:$userrating\n";
#print "MovieRating:$movierating\n";
#print "Runtime:$runtime\n";
#print "Writers: $writer\n";
#print "Cast: $cast\n";
#print "Genres: $lgenres\n";
#print "Countries: $lcountries\n";
#exit( -1 );
}
# dump Movie Poster
##
##
##
##
sub getMoviePoster {
my ($movieid)=@_; # grab movieid parameter
if (defined $opt_d) { printf("# looking for movie id: '%s'\n", $movieid);}
# get the search results page
my $request = "http://www.imdb.com/title/tt" . $movieid . "/posters";
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) { printf("%s", $response); }
my $uri = "";
# look for references to impawards.com posters - they are high quality
my $site = "http://www.impawards.com";
my $impsite = parseBetween($response, "<a href=\"".$site, "\">".$site);
# jersey girl fix
$impsite = parseBetween($response, "<a href=\"http://impawards.com","\">http://impawards.com") if ($impsite eq "");
if ($impsite) {
$impsite = $site . $impsite;
if (defined $opt_d) { print "# Searching for poster at: ".$impsite."\n"; }
my $impres = get $impsite;
if (defined $opt_d) { printf("# got %i bytes\n", length($impres)); }
if (defined $opt_r) { printf("%s", $impres); }
# making sure it isnt redirect
$uri = parseBetween($impres, "0;URL=..", "\">");
if ($uri ne "") {
if (defined $opt_d) { printf("# processing redirect to %s\n",$uri); }
# this was redirect
$impsite = $site . $uri;
$impres = get $impsite;
}
# do stuff normally
$uri = parseBetween($impres, "<img SRC=\"posters/", "\" ALT");
# uri here is relative... patch it up to make a valid uri
if (!($uri =~ /http:(.*)/ )) {
my $path = substr($impsite, 0, rindex($impsite, '/') + 1);
$uri = $path."posters/".$uri;
}
if (defined $opt_d) { print "# found ipmawards poster: $uri\n"; }
}
# if the impawards site attempt didn't give a filename grab it from imdb
if ($uri eq "") {
if (defined $opt_d) { print "# looking for imdb posters\n"; }
my $host = "http://posters.imdb.com/posters/";
$uri = parseBetween($response, $host, "\"><td><td><a href=\"");
if ($uri ne "") {
$uri = $host.$uri;
} else {
if (defined $opt_d) { print "# no poster found\n"; }
}
}
# now we couldnt even find lowres poster from IMDB, lets try looking for dvd
# cover on amazon.com
my @movie_titles;
my $found_low_res = 0;
my $k = 0;
# no poster found, take lowres image from imdb
if ($uri eq "") {
if (defined $opt_d) { print "# looking for lowres imdb posters\n"; }
my $host = "http://www.imdb.com/title/tt" . $movieid . "/";
$response = get $host;
$uri = parseBetween($response, "alt=\"cover\" src=\"http://ia.imdb.com/media/imdb/", "\"");
if (defined $opt_d) { print "# starting to look for movie title\n"; }
# get main title
if (defined $opt_d) { print "# Getting possible movie titles:\n"; }
$movie_titles[$k++] = parseBetween($response, "<title>", "<\/title>");
if (defined $opt_d) { print "# Title: ".$movie_titles[$k-1]."\n"; }
# now we get all other possible movie titles and store them in the titles array
while($response =~ m/>([^>^\(]*)([ ]{0,1}\([^\)]*\)[^\(^\)]*[ ]{0,1}){0,1}\(informal title\)/g) {
$movie_titles[$k++] = $1;
chomp($movie_titles[$k-1]);
$movie_titles[$k-1] =~ s/^\s+//;
$movie_titles[$k-1] =~ s/\s+$//;
if (defined $opt_d) { print "# Title: ".$movie_titles[$k-1]."\n"; }
}
if ($uri ne "" ) {
$uri = "http://ia.imdb.com/media/imdb/".$uri;
$found_low_res = 1;
} else {
if (defined $opt_d) { print "# no poster found\n"; }
}
}
# now we couldnt even find lowres poster from IMDB, lets try looking for dvd
# cover on amazon.com
if ($uri eq "" or $found_low_res) {
if (defined $opt_d) { print "# starting to look for poster on Amazon.com\n"; }
my $titleid = 0;
my $found = 0;
my $ama_uri = "";
my $xml_parser = XML::Simple->new();
do {
# get rid of the year
$movie_titles[$titleid] =~ s/ ?\([^\)]+\) ?//g;
$movie_titles[$titleid] =~ /(.*), The$/i;
if ($1) { $movie_titles[$titleid] = $1; }
$movie_titles[$titleid] =~ s/[\"\']//g; # if we give amazon quotes they give them back
if (defined $opt_d) { print "# Searching for: $movie_titles[$titleid]\n"; }
# Encode the movie title to be save
my $safe_movie_title = $movie_titles[$titleid];
$safe_movie_title =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
# request XML info from amazon
my $xml_uri = "http://xml.amazon.com/onca/xml3?t=000&dev-t=000&KeywordSearch=".$safe_movie_title."&mode=dvd&type=lite&page=1&f=xml";
if (defined $opt_d) { print "# Amazon request string is: $xml_uri\n";}
# get the response
$response = get $xml_uri;
if (defined $opt_r) { printf("%s", $response); }
# parse the response
my $xml_doc = $xml_parser->XMLin($response);
# if we only got one result, fake it as array
if (ref($xml_doc->{Details}) ne 'ARRAY') {
my @tmpArray = ($xml_doc->{Details});
$xml_doc->{Details} = \@tmpArray;
}
$k = 0;
do {
if (ref($xml_doc->{Details}->[$k]) eq 'HASH') {
my $tmp_movie_title = $xml_doc->{Details}->[$k]->{ProductName};
$tmp_movie_title =~ s/[\"\']//g;
if (defined $opt_d) { print "# Amazon: comparing (" . $tmp_movie_title . ") to (" . $movie_titles[$titleid] . ")\n"; }
if ($tmp_movie_title =~ /.*$movie_titles[$titleid].*/) {
if (defined $opt_d) { print "# Amazon: found poster " . $xml_doc->{Details}->[$k]->{ImageUrlLarge} . "\n"; }
$ama_uri = $xml_doc->{Details}->[$k]->{ImageUrlLarge};
$found = 1;
}
}
$k++;
} until ($found || $k == 5);
#only search through first 5 matches
$titleid++;
} until ($found || $titleid > $#movie_titles);
my $image = get $ama_uri if (defined($ama_uri) && $ama_uri ne "");
if ($ama_uri ne "" && length($image) eq "807") {
if (defined $opt_d) { printf("# this image is blank\n"); }
$ama_uri = "";
}
if (!defined($ama_uri)) {
$ama_uri = "";
}
if ($ama_uri ne "") {
$uri = $ama_uri;
}
}
return $uri;
}
## Movie List
##
##
##
# dump Movie list: 1 entry per line, each line as 'movieid:Movie Title'
sub getMovieList {
my ($filename, $options)=@_; # grab parameters
# If we wanted to inspect the file for any reason we can do that now
#
# Convert filename into a query string
# (use same rules that Metadata::guesTitle does)
my $query = $filename;
$query = uri_unescape($query); # in case it was escaped
# Strip off the file extension
if (rindex($query, '.') != -1) {
$query = substr($query, 0, rindex($query, '.'));
}
# Strip off anything following '(' - people use this for general comments
if (rindex($query, '(') != -1) {
$query = substr($query, 0, rindex($query, '('));
}
# Strip off anything following '[' - people use this for general comments
if (rindex($query, '[') != -1) {
$query = substr($query, 0, rindex($query, '['));
}
# IMDB searches do better if any trailing ,The is left off
$query =~ /(.*), The$/i;
if ($1) { $query = $1; }
# prepare the url
$query = uri_escape($query);
if (!$options) { $options = "" ;}
if (defined $opt_d) {
printf("# query: '%s', options: '%s'\n", $query, $options);
}
# get the search results page
# some known IMDB options are:
# type=[fuzy] looser search
# from_year=[int] limit matches to year (broken at imdb)
# to_year=[int] limit matches to year (broken at imdb)
# sort=[smart] ??
# tv=[no|both|only] limits between tv and movies (broken at imdb)
#$options = "tt=on;nm=on;mx=20"; # not exactly clear what these options do
my $request = "http://www.tv.com/search.php?type=11&stype=program&qs=$query&x=0&y=0";
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) {
print $response;
exit(0);
}
# extract possible matches
# possible matches are grouped in several catagories:
# exact, partial, and approximate
#my $popular_results = parseBetween($response, "<table id=\"search-results\" cellpadding=\"0\" cellspacing=\"0\" style=\"width:100%;clear:both;\">",
# "</table>");
my $popular_results = parseBetween($response, "<table id=\"search-results\"",
"</table>");
# parse movie list from matches
my $beg = "<tr>";
my $end = "</tr>";
my $count = 0;
my @movies;
# my $data = $exact_matches.$partial_matches;
my $data = $popular_results;
# resort to partial matches if no exact
# resort to approximate matches if no exact or partial
# if ($data eq "") { $data = $approx_matches; }
if ($data eq "") {
if (defined $opt_d) { printf("# no results\n"); }
return;
}
my $start = index($data, $beg);
my $finish = index($data, $end, $start);
my $year;
my $type;
my $title;
while ($start != -1 && $start < length($data)) {
$start += length($beg);
my $entry = substr($data, $start, $finish - $start);
$start = index($data, $beg, $finish + 1);
$finish = index($data, $end, $start);
my $title = "";
my $year = "";
my $type = "";
my $movienum = "";
my $workingname = "";
my $link_end = "</a>";
$fl_end = rindex($entry, $link_end);
$fl_end += length($link_end);
my $lhs = substr($entry, 0, $fl_end);
my $rhs = substr($entry, $fl_end);
if ($lhs =~ m/<a href="\http:\/\/www.tv.com\/(.+)\/show\/(\d+)\/.*\">(.+)<\/span><\/a>/i) {
$workingname = $1;
$movienum = $2;
$title = $3;
} else {
if (defined $opt_d) {
print("Unrecognized entry format\n");
}
next;
}
if ($rhs =~ m/\((\d+)\) \((.+)\)/) {
$year = $1;
$type = $2;
} elsif ($rhs =~ m/\((\d+)\)/) {
$year = $1;
}
# add to array
if (!$skip) {
my $moviename = $title;
if ($year ne "") {
$moviename .= " ($year)";
}
$movies[$count][0] = $moviename;
$movies[$count][1] = $movienum;
$movies[$count++][2] = $workingname;
}
}
# display array of values
return @movies;
}
sub getIMDBMovieList {
my ($filename, $options)=@_; # grab parameters
# If we wanted to inspect the file for any reason we can do that now
#
# Convert filename into a query string
# (use same rules that Metadata::guesTitle does)
my $query = $filename;
$query = uri_unescape($query); # in case it was escaped
# Strip off the file extension
if (rindex($query, '.') != -1) {
$query = substr($query, 0, rindex($query, '.'));
}
# Strip off anything following '(' - people use this for general comments
if (rindex($query, '(') != -1) {
$query = substr($query, 0, rindex($query, '('));
}
# Strip off anything following '[' - people use this for general comments
if (rindex($query, '[') != -1) {
$query = substr($query, 0, rindex($query, '['));
}
# IMDB searches do better if any trailing ,The is left off
$query =~ /(.*), The$/i;
if ($1) { $query = $1; }
# prepare the url
$query = uri_escape($query);
if (!$options) { $options = "" ;}
if (defined $opt_d) {
printf("# query: '%s', options: '%s'\n", $query, $options);
}
# get the search results page
# some known IMDB options are:
# type=[fuzy] looser search
# from_year=[int] limit matches to year (broken at imdb)
# to_year=[int] limit matches to year (broken at imdb)
# sort=[smart] ??
# tv=[no|both|only] limits between tv and movies (broken at imdb)
#$options = "tt=on;nm=on;mx=20"; # not exactly clear what these options do
#$options = "tv=only";
my $request = "http://www.imdb.com/find?q=$query;$options";
if (defined $opt_d) { printf("# request: '%s'\n", $request); }
my $response = get $request;
if (defined $opt_r) {
print $response;
exit(0);
}
# check to see if we got a results page or a movie page
# looking for 'add=<movieid>" target=' which only exists
# in a movie description page
my $movienum = parseBetween($response, "add=", "\" target=");
if ($movienum) {
if (defined $opt_d) { printf("# redirected to movie page\n"); }
my $movietitle = parseBetween($response, "<title>", "</title>");
$movietitle =~ m#(.+) \((\d+)\)#;
$movietitle = $1;
$movies[0][0] = $movietitle;
$movies[0][1] = $movienum;
return @movies;
}
# extract possible matches
# possible matches are grouped in several catagories:
# exact, partial, and approximate
my $popular_results = parseBetween($response, "<b>Popular Titles</b>",
"</ol>");
my $exact_matches = parseBetween($response, "<b>Titles (Exact Matches)</b>",
"</ol>");
my $partial_matches = parseBetween($response, "<b>Titles (Partial Matches)</b>",
"</ol>");
# my $approx_matches = parseBetween($response, "<b>Approximate Matches</b>",
# "</ol>");
# parse movie list from matches
my $beg = "<li>";
my $end = "</li>";
my $count = 0;
my @movies;
# my $data = $exact_matches.$partial_matches;
my $data = $popular_results.$exact_matches;
# resort to partial matches if no exact
if ($data eq "") { $data = $partial_matches; }
# resort to approximate matches if no exact or partial
# if ($data eq "") { $data = $approx_matches; }
if ($data eq "") {
if (defined $opt_d) { printf("# no results\n"); }
return;
}
my $start = index($data, $beg);
my $finish = index($data, $end, $start);
my $year;
my $type;
my $title;
while ($start != -1 && $start < length($data)) {
$start += length($beg);
my $entry = substr($data, $start, $finish - $start);
$start = index($data, $beg, $finish + 1);
$finish = index($data, $end, $start);
my $title = "";
my $year = "";
my $type = "";
my $movienum = "";
my $link_end = "</a>";
$fl_end = index($entry, $link_end);
$fl_end += length($link_end);
my $lhs = substr($entry, 0, $fl_end);
my $rhs = substr($entry, $fl_end);
if ($lhs =~ m/<a href="\/title\/tt(\d+)\/.*\">(.+)<\/a>/i) {
$movienum = $1;
$title = $2;
} else {
if (defined $opt_d) {
print("Unrecognized entry format\n");
}
next;
}
if ($rhs =~ m/\((\d+)\) \((.+)\)/) {
$year = $1;
$type = $2;
} elsif ($rhs =~ m/\((\d+)\)/) {
$year = $1;
}
my $skip = 0;
# fix broken 'tv=no' option
if ($options =~ /tv=no/) {
if ($type eq "TV") {
if (defined $opt_d) {printf("# skipping TV program: %s\n", $title);}
$skip = 1;
}
}
if ($options =~ /tv=only/) {
if ($type eq "") {
if (defined $opt_d) {printf("# skipping Movie: %s\n", $title);}
$skip = 1;
}
}
# fix broken 'from_year=' option
if ($options =~ /from_year=(\d+)/) {
if ($year < $1) {
if (defined $opt_d) {printf("# skipping b/c of yr: %s\n", $title);}
$skip = 1;
}
}
# fix broken 'to_year=' option
if ($options =~ /to_year=(\d+)/) {
if ($year > $1) {
if (defined $opt_d) {printf("# skipping b/c of yr: %s\n", $title);}
$skip = 1;
}
}
# option to strip out videos (I think that's what '(V)' means anyway?)
if ($options =~ /video=no/) {
if ($type eq "V") {
if (defined $opt_d) {
printf("# skipping Video program: %s\n", $title);
}
$skip = 1;
}
}
# (always) strip out video game's (why does IMDB give these anyway?)
if ($type eq "VG") {
if (defined $opt_d) {printf("# skipping videogame: %s\n", $title);}
$skip = 1;
}
# add to array
if (!$skip) {
my $moviename = $title;
if ($year ne "") {
$moviename .= " ($year)";
}
$movies[$count][0] = $moviename;
$movies[$count++][1] = $movienum;
}
}
# display array of values
return @movies;
}
# returns text within 'data' between 'beg' and 'end' matching strings
sub parseBetween {
my ($data, $beg, $end)=@_; # grab parameters
my $ldata = lc($data);
my $start = index($ldata, lc($beg)) + length($beg);
my $finish = index($ldata, lc($end), $start);
if ($start != (length($beg) -1) && $finish != -1) {
my $result = substr($data, $start, $finish - $start);
# return w/ decoded numeric character references
# (see http://www.w3.org/TR/html4/charset.html#h-5.3.1)
decode_entities($result);
return $result;
}
return "";
}
sub trim {
my $string = shift;
for ($string) {
s/^\s+//;
s/\s+$//;
}
return $string;
}
sub getMovieNameFromPath {
my ($moviePath)=@_; # grab parameters
my $lastSlashIndex = rindex( $moviePath, '/' ) + 1;
my $lastPeriod = rindex( $moviePath, '.' );
return substr( $moviePath, $lastSlashIndex, ( $lastPeriod - $lastSlashIndex ) );
}
sub stripParensFromName {
my ($parensName)=@_; # grab parameters
if ( rindex( $parensName, "(" ) != -1 ) {
return trim( substr( $parensName, 0, rindex( $parensName, "(" ) ) );
} else {
return trim( $parensName );
}
}