#!/usr/bin/env python3 import argparse from pathlib import Path from pprint import pprint import pandas as pd import LogParser import utils import tab_switch argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") argparser.add_argument("vp_dir", help="Directory with all VPs") args = argparser.parse_args() data_path = Path(args.vp_dir) all_vp = [x for x in data_path.iterdir() if x.is_dir()] vp_results = [] tab_results = [] log_tab_results = [] for vp_path in all_vp: log = LogParser.extract_activities(LogParser.get_log_data(vp_path)) log = LogParser.generate_log(log) history = LogParser.get_history_db(vp_path) log = LogParser.match_urls(history, log) LogParser.write_logfile(vp_path, log) video_path = next(vp_path.glob("*.mkv")) ocr_path = vp_path / "analysis_results.csv" log_path = vp_path / "logs.csv" df = utils.combine_ocr_logs(video_path, ocr_path, log_path) df = df.fillna("") df["vp_code"] = vp_path.stem df = utils.calc_levenshtein_distance(df) url_groups = utils.group_urls(list(df["url"].values)) pprint(len(url_groups)) df.to_csv(f"{vp_path}/metrics.csv") utils.write_grouped_metrics(df, url_groups, vp_path) df = pd.read_csv(f"{vp_path}/metrics_grps.csv") tab_df = tab_switch.tab_switches_per_type(df, "group_index") tab_results.append(tab_df) tab_df.to_csv(f"{vp_path}/tabswitches.csv") log_tab_df = tab_switch.tab_switches_per_type(df, "log_url") log_tab_results.append(log_tab_df) log_tab_df.to_csv(f"{vp_path}/log_tabswitches.csv") vp_results.append(df) evals = utils.evaluate_results(tab_results, log_tab_results) evals.to_csv(f"{data_path}/evaluation.csv")