From d52ede0288a5db23563ec29f998a1ad26e8aec28 Mon Sep 17 00:00:00 2001 From: Niclas Dobbertin Date: Wed, 4 Oct 2023 12:33:39 +0200 Subject: use LogParser in post_processing --- bjoern/videoanalyse/LogParser.py | 39 +++++++++++++++++++++------------- bjoern/videoanalyse/post_processing.py | 11 ++++++++-- 2 files changed, 33 insertions(+), 17 deletions(-) (limited to 'bjoern/videoanalyse') diff --git a/bjoern/videoanalyse/LogParser.py b/bjoern/videoanalyse/LogParser.py index 448b283..1104e7f 100755 --- a/bjoern/videoanalyse/LogParser.py +++ b/bjoern/videoanalyse/LogParser.py @@ -12,11 +12,10 @@ import codecs from read_sqlite import get_url_from_sqlite from pathlib import Path -BROWSER_TITLE_SUFFIX = " - Mozilla Firefox" - # takes the log data string and returns a list of activity titles and their time windows def extract_activities(log_data): + BROWSER_TITLE_SUFFIX = " - Mozilla Firefox" # regex which matches between squared brackets reg_titles = re.compile("(?<=\[).*?(?=\])") # regex for total/active time @@ -86,6 +85,7 @@ def match_urls(history_db, log): entry.append(url) return log + def generate_log(activities: dict): # For each start time in ascending order, make an entry with title and timestamp log = [] @@ -101,26 +101,23 @@ def generate_log(activities: dict): smallest_start_time = (title, idx) log.append( [ - activities[smallest_start_time[0]][smallest_start_time[1]][0].isoformat(), - activities[smallest_start_time[0]][smallest_start_time[1]][1].isoformat(), + activities[smallest_start_time[0]][smallest_start_time[1]][ + 0 + ].isoformat(), + activities[smallest_start_time[0]][smallest_start_time[1]][ + 1 + ].isoformat(), smallest_start_time[0], ] ) del activities[smallest_start_time[0]][smallest_start_time[1]] if not activities[smallest_start_time[0]]: del activities[smallest_start_time[0]] - return(log) - - -for vp_dir in [f.name for f in os.scandir() if f.is_dir()]: - print(vp_dir) - log = extract_activities(get_log_data(vp_dir)) - log = generate_log(log) + return log - history = get_history_db(vp_dir) - log = match_urls(history, log) - path = Path(f"{vp_dir}/{vp_dir}.csv") +def write_logfile(vp_dir, log): + path = Path(f"{vp_dir}/logs.csv") with open(path, "w") as csvfile: writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC) writer.writerow(["Starttime", "Endtime", "Title", "URL"]) @@ -128,4 +125,16 @@ for vp_dir in [f.name for f in os.scandir() if f.is_dir()]: writer.writerow(row) -input("*Press enter to close*") +def main(): + for vp_dir in [f.name for f in os.scandir() if f.is_dir()]: + print(vp_dir) + log = extract_activities(get_log_data(vp_dir)) + log = generate_log(log) + + history = get_history_db(vp_dir) + log = match_urls(history, log) + write_logfile(vp_dir, log) + + +if __name__ == "__main__": + main() diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index e24edcf..bdf76c3 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -6,6 +6,7 @@ from pprint import pprint import pandas as pd import utils +import LogParser argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") argparser.add_argument("vp_dir", help="Directory with all VPs") @@ -17,12 +18,18 @@ all_vp = [x for x in data_path.iterdir() if x.is_dir()] vp_results = [] for vp_path in all_vp: + log = LogParser.extract_activities(LogParser.get_log_data(vp_path)) + log = LogParser.generate_log(log) + history = LogParser.get_history_db(vp_path) + log = LogParser.match_urls(history, log) + LogParser.write_logfile(vp_path, log) + video_path = next(vp_path.glob("*.mkv")) ocr_path = vp_path / "analysis_results.csv" - log_path = vp_path / f"{vp_path.stem}.csv" + log_path = vp_path / "logs.csv" df = utils.combine_ocr_logs(video_path, ocr_path, log_path) - df = df.fillna('') + df = df.fillna("") df["vp_code"] = vp_path.stem df = utils.calc_levenshtein_distance(df) -- cgit v1.2.3