module LogParser

Utility methods to turn server logs into hashes of interesting data

Constants

ERROR_LOG_DIR
HITTOTAL
IGNORED_URIS
IGNORE_TRACEBACKS
REFERER
REMAINDER
RUSER
TIME_OFFSET
TRUNCATE

Related to timestamps in error log output

URIHIT
WHIMSY_APPS

Constants and ignored regex for whimsy_access logs

Public Instance Methods

collate_whimsy_access(logs, apphash = WHIMSY_APPS) click to toggle source

Collate/partition whimsy_access entries by app areas @param logs full set of items to scan @return apps categorized by apphash, with REMAINDER entry all others not captured

# File lib/whimsy/logparser.rb, line 97
def collate_whimsy_access(logs, apphash = WHIMSY_APPS)
  remainder = logs
  apps = {}
  apphash.each_key do |a|
    apps[a] = Hash.new {|h, k| h[k] = [] }
    apps[a][RUSER] = Hash.new {|h, k| h[k] = 0 }
    apps[a][REFERER] = Hash.new {|h, k| h[k] = 0 }
    apps[a][URIHIT] = Hash.new {|h, k| h[k] = 0 }
  end
  apps.each do |app, data|
    items, remainder = remainder.partition{ |l| l['uri'] =~ /\A\/#{app}/ }
    items.each do |l|
      data[RUSER][l[RUSER]] += 1
      data[REFERER][l[REFERER]] += 1
      data[URIHIT][l[URIHIT]] += 1
    end
  end
  apps[REMAINDER] = Hash.new {|h, k| h[k] = [] }
  apps[REMAINDER][RUSER] = Hash.new {|h, k| h[k] = 0 }
  apps[REMAINDER][REFERER] = Hash.new {|h, k| h[k] = 0 }
  apps[REMAINDER][URIHIT] = Hash.new {|h, k| h[k] = 0 }
  apps[REMAINDER]['useragent'] = Hash.new {|h, k| h[k] = 0 }
  remainder.each do |l|
    apps[REMAINDER][RUSER][l[RUSER]] += 1
    apps[REMAINDER][REFERER][l[REFERER]] += 1
    apps[REMAINDER][URIHIT][l[URIHIT]] += 1
    apps[REMAINDER]['useragent'][l['useragent']] += 1
  end
  return apps
end
get_access_reports(f = File.join(ERROR_LOG_DIR, 'whimsy_access.log')) click to toggle source

Get a simplistic hash report of access entries @param f filepath to whimsy_access.log @return app_report, misses_data

# File lib/whimsy/logparser.rb, line 131
def get_access_reports(f = File.join(ERROR_LOG_DIR, 'whimsy_access.log'))
  access = parse_whimsy_access(f)
  hits, miss = access.partition { |l| l['status'] == 200 }
  apps = collate_whimsy_access(hits)
  return apps, miss
end
get_errors(current, dir: ERROR_LOG_DIR) click to toggle source

Get a list of all current|available error logs interesting entries @param current - only scan current day? or scan all week’s logs @param d directory to scan for error.log @return hash of arrays of interesting entries

# File lib/whimsy/logparser.rb, line 225
def get_errors(current, dir: ERROR_LOG_DIR)
  if current
    whimsy_log = latest(File.join(dir, 'whimsy_error.log*'))
    logs = LogParser.parse_whimsy_error(whimsy_log)
    error_log = latest(File.join(dir, 'error?log*'))
    LogParser.parse_error_log(error_log, logs) if error_log
  else
    logs = LogParser.parse_whimsy_errors(dir)
    LogParser.parse_error_logs(dir, logs)
  end
  return logs.sort.to_h # Sort by time order
end
latest(path) click to toggle source

get the most recently modified matching file Note that Dir may return files in any order

# File lib/whimsy/logparser.rb, line 217
def latest(path)
  Dir.glob(path).max_by {|f| File.mtime(f) }
end
parse_error_log(f, logs = {}) click to toggle source

Parse error.log and return interesting entries @param f filename of error.log or .gz @param logs hash to append to (created if nil) @return hash of string|array of interesting entries

"timestamp" => "Passenger restarts and messages",
"timestamp" => ['_ERROR msg', '_WARN msg'... ]
# File lib/whimsy/logparser.rb, line 144
def parse_error_log(f, logs = {})
  last_time = 'uninitialized_time' # Cheap marker
  ignored = Regexp.union(IGNORE_TRACEBACKS)
  read_logz(f).lines.each do |l|
    begin
      # Emit each interesting item in order we read it
      #   Include good-enough timestamping, even for un-timestamped items
      # (Date.today.to_time + 4/100000.0).iso8601(TRUNCATE)
      if l =~ /\[ . (.{24}) .+\]: (.+)/
        last_time = $1
        capture = $2
        if capture =~ /Passenger/
          logs[Time.parse(last_time).iso8601(TRUNCATE)] = capture
        end
      elsif (l =~ /(_ERROR|_WARN  (.+)whimsy)/) && l !~ ignored
        # Offset our time so it doesn't overwrite any Passenger entries
        (logs[(Time.parse(last_time) + 1 / TIME_OFFSET).iso8601(TRUNCATE)] ||= []) << l
      end
    rescue StandardError => e
      puts e
    end
  end
  return logs
end
parse_error_logs(d = ERROR_LOG_DIR, logs = {}) click to toggle source

Parse error.log* files in dir and return interesting entries @param d directory to scan for error.log* @return hash of arrays of interesting entries

# File lib/whimsy/logparser.rb, line 172
def parse_error_logs(d = ERROR_LOG_DIR, logs = {})
  Dir[File.join(d, 'error?lo*')].each do |f|
    parse_error_log(f, logs)
  end
  return logs
end
parse_whimsy_access(f) click to toggle source

Parse whimsy_access and return interesting entries @param f filename of whimsy_access.log or .gz @return array of reduced, scrubbed entries as hashes

# File lib/whimsy/logparser.rb, line 80
def parse_whimsy_access(f)
  access = read_logz(f).scan(/<%JSON:apache_access%> (\{.*\})/).flatten
  logs = JSON.parse('[' + access.join(',') + ']').reject do |i|
    (i['useragent'] =~ /Ping My Box/) || (i['uri'] =~ Regexp.union(IGNORED_URIS)) || (i['status'] == 304)
  end
  logs.each do |i|
    %w(geo_country geo_long geo_lat geo_coords geo_city geo_combo duration request
       bytes vhost document request_method clientip query_string).each do |g|
      i.delete(g)
    end
  end
  return logs
end
parse_whimsy_error(f, logs = {}) click to toggle source

Parse whimsy_error.log and return interesting entries @param f filename of error.log or .gz @return hash of string of interesting entries

"timestamp" => "AH01215: undefined method `map' for #<String:0x0000000240e1e0> (NoMethodError): /x1/srv/whimsy/www/status/errors.cgi"
..date..
cgi:error
pid …
client …

End of script output before headers: site.cgi

..date..
cgi:error
pid …
client …

AH01215: /var/lib/g…

..date..
proxy:error
pid …
client …

AH00898: Error during SSL Handshake with remote server returned by /board/agenda/websocket/

..date..
proxy:error
pid …

(20014)Internal error (specific information not available): [client …] AH01084: pass request body failed to 127.0.0.1:34234 (localhost)

# File lib/whimsy/logparser.rb, line 187
def parse_whimsy_error(f, logs = {})
  r = Regexp.new('\[(?<errdate>[^\]]*)\] \[[\w_]+:error\] \[.+?\] (.+: )?\[.+?\] (?<errline>.+)')
  ignored = Regexp.union(IGNORE_TRACEBACKS)
  read_logz(f).lines.each do |l|
    r.match(l) do |m|
      unless ignored =~ m[2]
        begin
          logs[Time.parse(m[1]).iso8601(6)] = m[2]
        rescue StandardError
          # Fallback to merely using the string representation
          logs[m[1]] = m[2]
        end
      end
    end
  end
  return logs
end
parse_whimsy_errors(d = ERROR_LOG_DIR, logs = {}) click to toggle source

Parse whimsy_error.log* files in dir and return interesting entries @param d directory to scan for whimsy_error.log* @return hash of arrays of interesting entries

# File lib/whimsy/logparser.rb, line 208
def parse_whimsy_errors(d = ERROR_LOG_DIR, logs = {})
  Dir[File.join(d, 'whimsy_error.lo*')].each do |f|
    parse_whimsy_error(f, logs)
  end
  return logs
end
read_logz(f) click to toggle source

Read a text or .gz file @param f filename: .log or .log.gz @return File.read(f)

# File lib/whimsy/logparser.rb, line 66
def read_logz(f)
  if f.end_with? '.gz'
    reader = Zlib::GzipReader.open(f)
    logfile = reader.read
    reader.close
  else
    logfile = File.read(f)
  end
  return logfile
end