From 152ec0e2f948e2fcdc4ae63ac2d8837f28c75d46 Mon Sep 17 00:00:00 2001 From: Niclas Dobbertin Date: Wed, 4 Oct 2023 15:43:08 +0200 Subject: add tab switching results --- bjoern/videoanalyse/post_processing.py | 9 +++++-- bjoern/videoanalyse/tab_switch.py | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 bjoern/videoanalyse/tab_switch.py diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index b5e3c65..445b72f 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -8,6 +8,7 @@ import pandas as pd import LogParser import utils +import tab_switch argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") argparser.add_argument("vp_dir", help="Directory with all VPs") @@ -18,6 +19,7 @@ data_path = Path(args.vp_dir) all_vp = [x for x in data_path.iterdir() if x.is_dir()] vp_results = [] +tab_results = [] for vp_path in all_vp: log = LogParser.extract_activities(LogParser.get_log_data(vp_path)) log = LogParser.generate_log(log) @@ -43,8 +45,11 @@ for vp_path in all_vp: df = pd.read_csv(f"{vp_path}/metrics_grps.csv") + tab_df = tab_switch.tab_switches_per_type(df) + tab_results.append(tab_df) + tab_df.to_csv(f"{vp_path}/tabswitches.csv") + vp_results.append(df) -evals = utils.evaluate_results(vp_results) -pprint(evals) +evals = utils.evaluate_results(tab_results) evals.to_csv(f"{data_path}/evaluation.csv") diff --git a/bjoern/videoanalyse/tab_switch.py b/bjoern/videoanalyse/tab_switch.py new file mode 100644 index 0000000..2efa647 --- /dev/null +++ b/bjoern/videoanalyse/tab_switch.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import pandas as pd + + +def tab_switches_per_type(df): + # url_types = ["url", "log_url", "longest", "most_frequent"] + # dist_types = [ + # "levenshtein-distance", + # "levenshtein-distance", + # "longest-distance", + # "most_frequent-distance", + # ] + + result = { + "vp_code": [], + "log_url": [], + "count": [], + "group": [], + "longest_url": [], + "longest-distance": [], + "most_frequent_url": [], + "most_frequent-distance": [], + } + last_group = -1 + count = -1 + for row in df.iterrows(): + row = row[1] + if row["group_index"] != last_group: + result["vp_code"].append(row["vp_code"]) + result["group"].append(row["group_index"]) + result["log_url"].append(row["log_url"]) + result["longest_url"].append(row["longest"]) + result["longest-distance"].append(row["longest-distance"]) + result["most_frequent_url"].append(row["most_frequent"]) + result["most_frequent-distance"].append(row["most_frequent-distance"]) + + last_group = row["group_index"] + if count == -1: + count = 1 + continue + result["count"].append(count) + count = 1 + else: + count += 1 + result["count"].append(count) + result_df = pd.DataFrame(result) + return result_df -- cgit v1.2.3