from datetime import datetime from datetime import timedelta from durus.btree import BTree, BNode512 from durus.persistent import PersistentObject from durus.persistent_list import PersistentList from glob import glob from os.path import basename from qp.lib.spec import Specified, spec, specify, add_getters, datetime_with_tz, sequence, mapping, nspec, init from qp.lib.stored_file import StoredFile, new_file from qp.lib.tz import UTC from qp.lib.util import integer from qp.pub.common import get_publisher from qp.pub.user import User from os import stat from socket import gethostname try: import GeoIP country_code_by_addr = GeoIP.new(GeoIP.GEOIP_MEMORY_CACHE).country_code_by_addr except ImportError: def country_code_by_addr(x): return None def get_log_db(): return get_publisher().get_connection().get_root().get('log_db') class LogLine (Specified): # Not persistent, but slotted time_is = datetime_with_tz status_is = int bytes_is = int generation_time_is = float remote_address_is = str country_code_is = (None, str) user_is = User method_is = str referrer_is = str agent_is = str path_is = str def __init__(self, line): parts = line.split() self.time = None if len(parts) == 16: date_parts = parts[0].split('-') time_parts = parts[1].split(':') null_user = get_publisher().get_users()[''] if len(date_parts) == 3 and len(time_parts)==3: year, month, day = map(integer, date_parts) hour, minute, second = map(integer, time_parts) time = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second, tzinfo=UTC) try: if ':' in parts[6]: try: minutes, seconds = parts[6].split(':') generation_time = 60 * integer(minutes) + float(seconds) except ValueError: generation_time = 0.0 else: generation_time = float(parts[6]) specify(self, time=time, status=integer(parts[2]), bytes=integer(parts[3]), generation_time=generation_time, remote_address=parts[7], country_code=country_code_by_addr(parts[7]), user=get_publisher().get_users().get(parts[8], null_user), method=parts[12], path=parts[13], referrer=parts[14], agent=parts[-1]) except: print(parts) raise def __eq__(self, other): for n in self.__slots__: if getattr(self, n) != getattr(other, n): return False return True def __str__(self): return ' '.join("%s=%s" % (n, getattr(self, n)) for n in self.__slots__) def __nonzero__(self): return self.time is not None add_getters(LogLine) class Log (PersistentObject, Specified): host_is = spec(str, "Host on which log was made") rotation_is = nspec(datetime_with_tz, "Time of rotation") file_is = nspec(StoredFile, "The file") data_is = spec( sequence(element_spec=LogLine, container_spec=PersistentList), "Parsed data from file") offset_is = nspec( int, "Position at end of last read log line in the file") def __init__(self, path): logfile = get_publisher().get_site().get_logfile() if path != logfile: self.rotation = datetime.strptime(path[len(logfile) + 1:], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=UTC) self.file = new_file(open(path)) filename = basename(path) self.file.set_filename(filename) self.host = gethostname() init(self) self.data = PersistentList(self.parse(path)) def parse(self, path=None): if path is None: path = get_publisher().get_site().get_logfile() log = open(path, 'r') log_size = stat(path).st_size if self.get_offset() is not None and log_size >= self.get_offset(): log.seek(self.get_offset()) else: self.offset = 0 data = [] while 1: line = log.readline() if not line: break log_line = LogLine(line) if log_line: data.append(log_line) self.offset = log.tell() return data add_getters(Log) class LogDatabase (PersistentObject, Specified): logs_is = spec( mapping({datetime_with_tz:Log}, BTree), "Logs by rotation date") log_lines_is = spec( mapping({datetime_with_tz:LogLine}, BTree), "Log lines rotation time") current_log_is = nspec( Log, "The current log.") def __init__(self): self.logs = BTree(node_constructor=BNode512) self.log_lines = BTree(node_constructor=BNode512) self.current_log = None def update_log_lines(self, log_lines_list): for log_line in log_lines_list: time = log_line.get_time() while time in self.log_lines: if log_line != self.log_lines.get(time): time += timedelta(seconds=1e-6) if time not in self.log_lines: self.log_lines[time] = log_line def update(self): logfile = get_publisher().get_site().get_logfile() for path in glob(logfile + '.*'): try: rotation_time = datetime.strptime(path[len(logfile) + 1:], "%Y-%m-%dT%H:%M:%S.%f").replace(tzinfo=UTC) except ValueError: continue if rotation_time not in self.logs: log = Log(path) self.logs[rotation_time] = log self.update_log_lines(log.parse(path=path)) if self.get_current_log() is None: self.current_log = Log(logfile) new_lines = self.current_log.parse() self.current_log.get_data().extend(new_lines) self.update_log_lines(new_lines) def get_last_log_line(self): return self.log_lines.get_max_item()[1] def gen_log_lines_in_range(self, start, end): for log_line in self.log_lines.items_range(start, end): yield log_line def get_log_lines(self): return self.log_lines add_getters(LogDatabase)