# -*- coding: utf-8 -*- """ This script takes multiple .log data files from PCScreenWatcher and a Firefox history database (places.sqlite) inside a folder to extract each activity title, their url if applicable and usage timestamps into a pseudo-csv file. """ import os import sqlite3 import re import datetime from io import StringIO import pandas as pd import csv import codecs #takes the log data string and returns a list of activity titles and their time windows def extract_activities(log_data): #regex which matches squared brackets titles = re.compile("\[.*?\]") #regex for total/active time time = re.compile("(? col_count: col_count = commas #print(col_count) column_names = [i for i in range(0, col_count-1)] #table = StringIO(agg_data) table = StringIO(data) df = pd.read_csv(table, header=None, sep=',', quotechar='"', names=column_names,quoting=csv.QUOTE_ALL) df.insert(1, "url", "") df.rename(columns = {0:'title'}, inplace = True) df.rename(columns = {1:'total-active'}, inplace = True) df.to_csv('%s/%s.csv' % (dir, dir), sep=';', quoting=csv.QUOTE_ALL) match_urls(get_history_db(dir), df) #somehow parse total/active time for aggregation - no idea right now # df.insert(2, "active_time", 0) # for index, row in df.iterrows(): # total_string = row[1][8:13] # print(total_string) # #df.at[i, 2] = active_time # #df.at[i, 1] = total_time df.to_csv('%s/%s.csv' % (dir, dir), sep=';', quoting=csv.QUOTE_ALL) input("*Press enter to close*")