From 556dd4a49bb336eb309287d291ad36f0df90b01b Mon Sep 17 00:00:00 2001 From: Niclas Dobbertin Date: Thu, 15 Jun 2023 16:06:08 +0200 Subject: new log parser returning start/end time, title,url --- bjoern/videoanalyse/LogParser.py | 275 ++++++++++++++++----------------------- 1 file changed, 114 insertions(+), 161 deletions(-) (limited to 'bjoern') diff --git a/bjoern/videoanalyse/LogParser.py b/bjoern/videoanalyse/LogParser.py index c444867..c580cfe 100755 --- a/bjoern/videoanalyse/LogParser.py +++ b/bjoern/videoanalyse/LogParser.py @@ -5,185 +5,138 @@ inside a folder to extract each activity title, their url if applicable and usag """ import os -import sqlite3 import re -import datetime -from io import StringIO -import pandas as pd +from datetime import datetime import csv import codecs +from read_sqlite import get_url_from_sqlite +from pathlib import Path - -#takes the log data string and returns a list of activity titles and their time windows +# takes the log data string and returns a list of activity titles and their time windows def extract_activities(log_data): - #regex which matches squared brackets - titles = re.compile("\[.*?\]") - #regex for total/active time - time = re.compile("(? col_count: - col_count = commas - #print(col_count) - column_names = [i for i in range(0, col_count-1)] - #table = StringIO(agg_data) - table = StringIO(data) - df = pd.read_csv(table, header=None, sep=',', quotechar='"', names=column_names,quoting=csv.QUOTE_ALL) - df.insert(1, "url", "") - df.rename(columns = {0:'title'}, inplace = True) - df.rename(columns = {1:'total-active'}, inplace = True) - - df.to_csv('%s/%s.csv' % (dir, dir), sep=';', quoting=csv.QUOTE_ALL) - - match_urls(get_history_db(dir), df) - - #somehow parse total/active time for aggregation - no idea right now - # df.insert(2, "active_time", 0) - # for index, row in df.iterrows(): - # total_string = row[1][8:13] - # print(total_string) - # #df.at[i, 2] = active_time - # #df.at[i, 1] = total_time - - df.to_csv('%s/%s.csv' % (dir, dir), sep=';', quoting=csv.QUOTE_ALL) - - - + for vp_dir in [f.name for f in os.scandir() if f.is_dir()]: + print(vp_dir) + log = extract_activities(get_log_data(vp_dir)) + log = generate_log(log) + + history = get_history_db(vp_dir) + log = match_urls(history, log) + + path = Path(f"{vp_dir}/{vp_dir}.csv") + with open(path, "w") as csvfile: + writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC) + writer.writerow(["Starttime", "Endtime", "Title", "URL"]) + for row in log: + writer.writerow(row) + + input("*Press enter to close*") -- cgit v1.2.3