#!/usr/bin/perl use LWP::Simple; use threads; use constant { ERR_SUCCESS => 0, ERR_USAGE => -1, ERR_HTTPGET => -2, ERR_EOD => -3, }; sub GetAlbums { my $format_url = "http://www.amazon.com/gp/aw/s/?rh=n%3A5174%2Cp_n_format_browse-bin%3A492503011|492510011|1258509011|1294043011|1294041011|1294042011|1294546011&sort=-releasedate&page=%u"; my $found = 0; my $page = 215251; do { $found = 0; my $html = get(sprintf($format_url, $page++)); if (!defined($html)) { printf("Died on page %u\n", $page); return ERR_HTTPGET; } while ($html =~ s/^[\s\S]*? \/\>\n[0-9]*\n\create('GetAlbum', $follow_url); $thr->detach(); } } while ($found); printf("Died on page %u\n", $page); return ERR_SUCCESS; } sub GetAlbum { my $base_url = shift; my $ratings; my $id = GetAlbumId($base_url); GetAlbumMeta($id); GetAlbumRatings($id); return ERR_SUCCESS; } sub GetAlbumId { $url = shift; #$url =~ m/qid\=[0-9]*\&a\=([\s\S]*?)\&/; $url =~ m/\/gp\/aw\/d\/([\s\S]*?)\?/; return $1; } sub GetAlbumMeta { my $id = shift; my $base_url = "http://www.amazon.com/gp/aw/d/$id/"; my ($html, $album, $artist, $year, @tracks) = ("", "", "", "", ()); my $meta; ## Main Page: Album and Artist $html = get($base_url); if (!defined($html)) { printf("Died meta %s\n", $id); return ERR_HTTPGET; } # Album if ($html =~ m/\([\s\S]*?)\:Amazon\:Music\<\/title\>\n/) { $album = $1; } # Artist if ($html =~ m/\nArtist ([\s\S]*?)\
\n/) { $artist = $1; } ## Product Description: Year and Tracks $html = get("$base_url?pd=1"); if (!defined($html)) { printf("Died meta %s\n", $id); return ERR_HTTPGET; } # Release date if ($html =~ m/Release date\:\&\#160\;[\s\S]*?([0-9]{4})\
/) { $year = $1; } # Track listing if ($html =~ m/Track Listings[\s\S]*?\n([\s\S]*?)\n/) { $track_html = $1; while ($track_html =~ s/^\ \;[0-9]* ([\s\S]*?)\
//) { push(@tracks, $1); } } # Create formatted metadata string if ($#tracks >= 0) { $meta = join("::",($id, $album, $artist, $year, join("::", @tracks))); } else { $meta = join("::",($id, $album, $artist, $year)); } # Write metadata open META, ">>meta.txt"; print META "$meta\n"; close META; return ERR_SUCCESS; } sub GetAlbumRatings { my $id = shift; my $format_url = "http://www.amazon.com/gp/aw/cr/$id/?p=%u"; my $found = 0; my $page = 1; do { $found = 0; my $html = get(sprintf($format_url, $page++)); if (!defined($html)) { printf("Died rating %s page %u\n", $id, $page); return ERR_HTTPGET; } while ($html =~ s/^[\s\S]*?\"(\/gp\/aw\/cr\/[a-zA-Z0-9]*?)\"\>\//) { $found++; my $follow_url = $1; if (substr($follow_url, 0, 7) ne "http://") { $follow_url = "http://www.amazon.com/$follow_url"; } GetRating($id, $follow_url); } } while ($found); return ERR_SUCCESS; } sub GetRating { my $id = shift; my $html = get(shift); my ($user, $stars) = ("", ""); my $rating; if (!defined($html)) { printf("Died rating %s\n", $id); return ERR_HTTPGET; } # Album if ($html =~ m/([0-9]\.[0-9])\/5\.0/) { $stars = $1; } # Artist if ($html =~ m/\ \;([^\&]*?)\
\
/) { $user = $1; } # Create formatted metadata string $rating = join("::",($user, $id, $stars)); # Write rating open RATINGS, ">>ratings.txt"; print RATINGS "$rating\n"; close RATINGS; return ERR_SUCCESS; } exit(GetAlbums());