#!/usr/bin/ruby require 'rubygems' require 'mechanize' url = "http://fantasyfilmfest.com/pages/filme.html"; fw = "http://www.freshwap.com/index.php?do=search&subaction=search&full_search=1&catlist[]=5&titleonly=3&story="; # Set up the user agent. agent = Mechanize.new agent.history.max_size = 0 agent.user_agent_alias = 'Linux Firefox' agent.read_timeout = 3 # Set proxy if environment variable is set. proxy_regex = /:\/\/(.[^:]*):(\d*)/ if ENV['http_proxy'] != nil && ENV['http_proxy'].match(proxy_regex) agent.set_proxy(Regexp.last_match(1), Regexp.last_match(2)) end page = agent.get(url) rows = page./('div.ROW') rows.each do |row| title = row./('th.filmtitel ul.LIST li.LIST a.LIST').text.strip puts title links = agent.get("#{fw}#{title}"); links./('div.title a').each do |link| url = link.attribute('href').to_s if url.match(/movies/) puts url end end end
Image may be NSFW.
Clik here to view.

Clik here to view.
