summaryrefslogtreecommitdiff
path: root/bjoern/videoanalyse
diff options
context:
space:
mode:
Diffstat (limited to 'bjoern/videoanalyse')
-rw-r--r--bjoern/videoanalyse/post_processing.py9
-rw-r--r--bjoern/videoanalyse/tab_switch.py48
2 files changed, 55 insertions, 2 deletions
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py
index b5e3c65..445b72f 100644
--- a/bjoern/videoanalyse/post_processing.py
+++ b/bjoern/videoanalyse/post_processing.py
@@ -8,6 +8,7 @@ import pandas as pd
import LogParser
import utils
+import tab_switch
argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation")
argparser.add_argument("vp_dir", help="Directory with all VPs")
@@ -18,6 +19,7 @@ data_path = Path(args.vp_dir)
all_vp = [x for x in data_path.iterdir() if x.is_dir()]
vp_results = []
+tab_results = []
for vp_path in all_vp:
log = LogParser.extract_activities(LogParser.get_log_data(vp_path))
log = LogParser.generate_log(log)
@@ -43,8 +45,11 @@ for vp_path in all_vp:
df = pd.read_csv(f"{vp_path}/metrics_grps.csv")
+ tab_df = tab_switch.tab_switches_per_type(df)
+ tab_results.append(tab_df)
+ tab_df.to_csv(f"{vp_path}/tabswitches.csv")
+
vp_results.append(df)
-evals = utils.evaluate_results(vp_results)
-pprint(evals)
+evals = utils.evaluate_results(tab_results)
evals.to_csv(f"{data_path}/evaluation.csv")
diff --git a/bjoern/videoanalyse/tab_switch.py b/bjoern/videoanalyse/tab_switch.py
new file mode 100644
index 0000000..2efa647
--- /dev/null
+++ b/bjoern/videoanalyse/tab_switch.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+import pandas as pd
+
+
+def tab_switches_per_type(df):
+ # url_types = ["url", "log_url", "longest", "most_frequent"]
+ # dist_types = [
+ # "levenshtein-distance",
+ # "levenshtein-distance",
+ # "longest-distance",
+ # "most_frequent-distance",
+ # ]
+
+ result = {
+ "vp_code": [],
+ "log_url": [],
+ "count": [],
+ "group": [],
+ "longest_url": [],
+ "longest-distance": [],
+ "most_frequent_url": [],
+ "most_frequent-distance": [],
+ }
+ last_group = -1
+ count = -1
+ for row in df.iterrows():
+ row = row[1]
+ if row["group_index"] != last_group:
+ result["vp_code"].append(row["vp_code"])
+ result["group"].append(row["group_index"])
+ result["log_url"].append(row["log_url"])
+ result["longest_url"].append(row["longest"])
+ result["longest-distance"].append(row["longest-distance"])
+ result["most_frequent_url"].append(row["most_frequent"])
+ result["most_frequent-distance"].append(row["most_frequent-distance"])
+
+ last_group = row["group_index"]
+ if count == -1:
+ count = 1
+ continue
+ result["count"].append(count)
+ count = 1
+ else:
+ count += 1
+ result["count"].append(count)
+ result_df = pd.DataFrame(result)
+ return result_df