# A model for accessing the mint_visit table that contains all the hit stats. class Mint < ActiveRecord::Base # If you are using the RSS/Atom importer, put this line in your environment: # # Mint.log_file_path = /home/topfunky/logs/your_site.com/http/access.log.0 # # NOTE: Rails seems to load the individual environment files before the rest of # environment.rb. I have to put this at the end of environment.rb instead # of in production.rb. cattr_accessor :log_file_path # Useful fields: # referer, referer_is_local, resource, resource_title, search_terms, dt set_table_name 'mint_visit' # The time the resource was hit, as a Ruby Time object. def visited_at Time.at(dt) end # Returns an array of Mint objects for what pages are currently popular. # # If given an argument, it will restrict the search to the last few weeks. # # The following fields are available: resource, resource_title, visit_count def self.popular_pages(weeks_ago=nil) options = { :select => "resource, resource_title, COUNT(resource) AS visit_count", :group => 'resource', :order => 'visit_count DESC' } options[:conditions] = ["dt > ?", weeks_ago_as_epoch_seconds(weeks_ago)] unless weeks_ago.nil? find :all, *options # TODO WHERE resource NOT IN (#{resource_ignore_list}) end # Returns a single integer representing the number of visits to a resource over a number of weeks. # Shows all visits by default. # # Uses a wildcard at the front so you can use something like # # /dashboard/2006/2 # # or # # dashboard_url(:year => 2006, :month => 2, :only_path => true) # # and it will match all of these: # # http://nubyonrails.com/dashboard/2006/2 # http://nubyonrails.topfunky.com/dashboard/2006/2 # # Or, you can add a wildcard at the end like this: # # /dashboard/2006% # # For root urls you will need a trailing slash: # # http://nubyonrails.com/ # def self.visits_to_resource(url, weeks_ago=nil) query = " resource LIKE ? " parameters = ["%#{url}"] if weeks_ago query += " AND dt > ? " parameters << weeks_ago_as_epoch_seconds(weeks_ago) end count [query, *parameters] end # Returns a Ruby Time object. def self.earliest_visit_to_resource(url) if visit = find(:first, :conditions => ["resource LIKE ?", "%#{url}"], :order => "dt ASC") return visit.visited_at end end # Call with # # rake mint:parse_logs # # You must set the path to your current log file for today. # # Mint.log_file_path = 'log/http_access.log' # # This will process all hits in the logfile. It assumes that you run it # once a day and that your host rotates the logs (but symlinks the latest # day's log to a consistent place). # # On Dreamhost, it's at # # ~/logs/your_site.com/http/access.log.0 # def self.parse_log_file(log_file=nil) log_file ||= @@log_file_path raise "No log file specified" unless log_file lp = LogParser.new transaction do File.open(log_file) do |file| lp.parse_io_stream(file) { |stat| # Only process Typo feed resources (atom and rss) if /feed\.xml$/.match(stat['PATH_INFO']) Mint.append(stat) sleep(0.1) # To keep load down on shared hosts end } end end end # Adds a hit to the Mint visits table. # # Used by the log parser to add hits for resources not tracked by Mint (like RSS). # def self.append(env) subscriber_count = 1 if /(\d+) subscriber/.match(env['HTTP_USER_AGENT']) subscriber_count = $1.to_i end browser_family = browser_version = '' # TODO Check for browsers that start with a url # TODO Check for browsers without a slash-version (Google-FeedReader) # ^([^\(]+) Whole user agent # ^([^/]+)/([^/]\S+) Better ua/version splitter if %r{^([^/]+)/(\S+)}.match(env['HTTP_USER_AGENT']) browser_family = $1 browser_version = $2 end time_of_hit = Time.now # [10/Oct/1999:21:15:05 +0500] if %r{([^/]+)/([^/]+)/([^:]+):([^:]+):([^:]+):(\S+)}.match(env['DATETIME']) time_of_hit = Time.local $3, $2, $1, $4, $5, $6 end # Register multiple hits if necessary (1..subscriber_count).each do |i| self.create(:dt => time_of_hit.utc.to_i, :ip_long => ip2long(env['REMOTE_ADDR']), :referer => '', :referer_checksum => 0, :domain_checksum => 0, :referer_is_local => -1, :resource => env['PATH_INFO'], :resource_checksum => Zlib::crc32(env['PATH_INFO']), :resource_title => 'Feed', :search_terms => '', :browser_family => browser_family, :browser_version => browser_version, :platform => 'Feed', :resolution => '0x0', :flash_version => 0) # 0 interpreted as "None" by UA007 v121 end end private def self.weeks_ago_as_epoch_seconds(weeks_ago=2) (Time.now - weeks_ago.weeks).utc.to_i end def self.ip2long(ip) long = 0 ip.split(/\./).reverse.each_with_index do |x, i| long += x.to_i << (i * 8) end long end end # RSS Hit: 72.14.199.2 - - [17/Feb/2006:00:39:50 -0800] "GET /xml/rss20/article/242/feed.xml HTTP/1.1" 200 8301 "-" "FeedFetcher-Google; (+http://www.google.com/feedfetcher.html)" # INSERT INTO `mint_visit` (`id`, `dt`, `ip_long`, `referer`, `referer_checksum`, `domain_checksum`, `referer_is_local`, `resource`, `resource_checksum`, `resource_title`, `search_terms`, `browser_family`, `browser_version`, `platform`, `resolution`, `flash_version`, `window_width`) VALUES (110, 1137386366, 410415437, 'http://rubyonrailsworkshops.com/dashboard/2006/3', 668827901, -1803874782, 1, 'http://rubyonrailsworkshops.com/dashboard/2006/2', 1356509291, 'Ruby on Rails Workshops and Conferences around the World', '', 'Firefox', '1.5', 'Macintosh', '1280x854', 8, 1152);