#!/usr/local/bin/ruby =begin = Overview This script is for caching banner-images of others' sites. Check abuse with referrer and file size. == See also = Usage cache.rb?http://example.com/banner = License Creative Commons `by' (c) 2005 cho45 =end @config = { # Cache directory. # Cache directory must be writable. "Cache-Directory" => ".", # Image when Error. "No-Banner" => "../nobanner.png", # Limit Cached File Size. # Raise OverFilesizeError # if caching file over this limit. "Filesize-Limit" => 40000, # (byte) # Recache after this days. # HEAD access -> GET if filesize is changed "Crawl-Interval" => 7, # (day) # Acceptable Content-types. # Raise NotConfiguredContentTypeError # if you try caching file with not configured Content-Type. "Content-Type" => { "image/gif" => "gif", "image/jpeg" => "jpg", "image/png" => "png", }, } require "net/http" require "uri" class CacheImg class CacheImgError < StandardError; end class RequestFailedError < CacheImgError; end class OverFilesizeError < CacheImgError; end class NotConfiguredContentTypeError < CacheImgError; end VERSION = "0.2" UA = "Mozilla/4.0 (MSIE 6) ImageCache/#{VERSION}" DEFAULT_CONTENT_TYPE_CONF = { "image/gif" => "gif", "image/jpeg" => "jpg", "image/png" => "png" } attr_accessor :dir, :content_type, :crawl_interval, :max_filesize, :content_type def initialize(dir, crawl_interval=7, max_filesize=40000, content_type=DEFAULT_CONTENT_TYPE_CONF) @dir = dir @crawl_interval = crawl_interval @max_filesize = max_filesize @content_type = content_type end def get_image(uri) filename = nil content_type = nil basename = "#{@dir}/#{uri2filename(uri)}" @content_type.each do |k, v| if File.exist?("#{basename}.#{v}") filename = "#{basename}.#{v}" content_type = k end end if File.exist?("#{basename}.err") filename = "#{basename}.err" end if filename if File.mtime(filename) + (@crawl_interval * 24 * 60 * 60) < Time.now uri = URI.parse(uri) res = nil Net::HTTP.start(uri.host, uri.port) do |http| header = { "User-Agent" => UA, "Accept" => @content_type.keys.join(",") } res = http.head(uri.request_uri, header) end if res["content-length"].to_i != File.size(filename) File.delete(filename) content_type, content = restore_img(uri) else t = Time.now File.utime(t, t, filename) content_type = @content_type.index(File.extname(filename)[1..-1]) content = File.open(filename, "rb") {|f| f.read} end else content_type = @content_type.index(File.extname(filename)[1..-1]) content = File.open(filename, "rb") {|f| f.read} end else content_type, content = restore_img(uri) end unless content_type raise CacheImg.const_get(content).new end [content_type, content] end private def restore_img(uri) uri = URI.parse(uri) content_type = nil ext = "err" content = "" Net::HTTP.start(uri.host, uri.port) do |http| header = { "User-Agent" => UA, "Accept" => @content_type.keys.join(",") } res = http.get(uri.request_uri, header) case res.code when "200" ext = @content_type[res["content-type"]] if ext if res.body.size < @max_filesize content_type = res["content-type"] content = res.body else ext = "err" content = "OverFilesizeError" end else ext = "err" content = "NotConfiguredContentTypeError" end else content = "RequestFailedError" end end File.open("#{@dir}/#{uri2filename(uri.to_s)}.#{ext}", "wb") do |f| f.print content end [content_type, content] end def uri2filename(uri) uri.gsub(/[\\\/\*\?\|"<>:,;% ']/) do |m| "%#{Regexp.last_match[0].unpack("H2")}" end end def filename2uri(filename) filename.gsub(/%([A-Za-z0-9]{2})/) do |m| Regexp.last_match[1].hex.chr end end end require "cgi" @cgi = CGI.new puts @cgi.header("Content-Type" => "text/plain") if $DEBUG begin class InvalidReferrer < StandardError; end raise InvalidReferrer unless @cgi.referer =~ /^http:\/\/#{@cgi.host}.*$/ uri = @cgi.query_string uri ||= "" if uri.empty? @cgi.out("Content-Type" => "image/png") do File.open(@config["No-Banner"], "rb") {|f| f.read} end unless $DEBUG else @ci = CacheImg.new(@config["Cache-Directory"], @config["Crawl-Interval"], @config["Filesize-Limit"], @config["Content-Type"]) begin content_type, content = @ci.get_image(uri.gsub(/\s/, '')) rescue CacheImg::CacheImgError => e content_type = "image/png" content = File.open(@config["No-Banner"], "rb") {|f| f.read} end @cgi.out("Content-Type" => content_type) do content end unless $DEBUG end rescue InvalidReferrer => e @cgi.out("Content-Type" => "text/plain") do "Invalid Referrer" end rescue Exception => e if $DEBUG puts @cgi.header("Content-Type" => "text/plain") p e.message p e.backtrace else @cgi.out("Content-Type" => "image/png") do File.open(@config["No-Banner"], "rb") {|f| f.read} end end end