summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bjoern/videoanalyse/post_processing.py34
-rw-r--r--bjoern/videoanalyse/utils.py28
2 files changed, 49 insertions, 13 deletions
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py
index a8d37c4..6ffff1e 100644
--- a/bjoern/videoanalyse/post_processing.py
+++ b/bjoern/videoanalyse/post_processing.py
@@ -3,27 +3,39 @@
import argparse
from pathlib import Path
from pprint import pprint
+import pandas as pd
import utils
argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation")
-argparser.add_argument("vp_dir", help="VP Directory")
+argparser.add_argument("vp_dir", help="Directory with all VPs")
args = argparser.parse_args()
data_path = Path(args.vp_dir)
-video_path = next(data_path.glob("*.mkv"))
-ocr_path = data_path / "analysis_results.csv"
-log_path = data_path / f"{data_path.stem}.csv"
+all_vp = [x for x in data_path.iterdir() if x.is_dir()]
-df = utils.combine_ocr_logs(video_path, ocr_path, log_path)
-df = df.fillna('')
+vp_results = []
+for vp_path in all_vp:
+ video_path = next(vp_path.glob("*.mkv"))
+ ocr_path = vp_path / "analysis_results.csv"
+ log_path = vp_path / f"{vp_path.stem}.csv"
-df = utils.calc_levenshtein_distance(df)
+ df = utils.combine_ocr_logs(video_path, ocr_path, log_path)
+ df = df.fillna('')
+ df["vp_code"] = vp_path.stem
+ df = utils.calc_levenshtein_distance(df)
-url_groups = utils.group_urls(list(df["url"].values))
-pprint(len(url_groups))
+ url_groups = utils.group_urls(list(df["url"].values))
+ pprint(len(url_groups))
-df.to_csv(f"{data_path}/metrics.csv")
-utils.write_grouped_metrics(df, url_groups, data_path)
+ df.to_csv(f"{vp_path}/metrics.csv")
+ utils.write_grouped_metrics(df, url_groups, vp_path)
+
+ df = pd.read_csv(f"{vp_path}/metrics_grps.csv")
+
+ vp_results.append(df)
+
+evals = utils.evaluate_results(vp_results)
+pprint(evals)
diff --git a/bjoern/videoanalyse/utils.py b/bjoern/videoanalyse/utils.py
index 69215ff..b1eaa4f 100644
--- a/bjoern/videoanalyse/utils.py
+++ b/bjoern/videoanalyse/utils.py
@@ -20,13 +20,11 @@ def combine_ocr_logs(video_path, ocr_path, log_path):
start = timedelta(seconds=int(round(x)))
return (start + video_date).time().isoformat()
- # analysis = pd.read_csv(vp_path / "analysis_results.csv")
analysis = pd.read_csv(ocr_path)
analysis["Starttime"] = analysis["start_time"].apply(
add_video_time_to_start, args=(video_date,)
)
- # logs = pd.read_csv(vp_path / f"{vp_path.name}.csv")
logs = pd.read_csv(log_path)
def get_log_url(start_time):
@@ -128,3 +126,29 @@ def write_grouped_metrics(df, url_groups, data_path):
row.append(str(most_frequent_in_grp))
row.append(levendist_normalized(log_url, most_frequent_in_grp, log_url))
csv_writer.writerow(row)
+
+
+def evaluate_results(vp_results):
+ vp_code = [df["vp_code"].values[0] for df in vp_results]
+ mean_lev = [
+ sum(df["levenshtein-distance"].values) / len(df["levenshtein-distance"])
+ for df in vp_results
+ ]
+ mean_long = [
+ sum(df["longest-distance"].values) / len(df["longest-distance"])
+ for df in vp_results
+ ]
+ mean_freq = [
+ sum(df["most_frequent-distance"].values) / len(df["most_frequent-distance"])
+ for df in vp_results
+ ]
+
+ metrics = {
+ "vp_code": vp_code,
+ "mean_lev": mean_lev,
+ "mean_long": mean_long,
+ "mean_freq": mean_freq,
+ }
+ evals = pd.DataFrame(metrics)
+
+ return evals