diff options
Diffstat (limited to 'bjoern/videoanalyse/post_processing.py')
-rw-r--r-- | bjoern/videoanalyse/post_processing.py | 34 |
1 files changed, 23 insertions, 11 deletions
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index a8d37c4..6ffff1e 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -3,27 +3,39 @@ import argparse from pathlib import Path from pprint import pprint +import pandas as pd import utils argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") -argparser.add_argument("vp_dir", help="VP Directory") +argparser.add_argument("vp_dir", help="Directory with all VPs") args = argparser.parse_args() data_path = Path(args.vp_dir) -video_path = next(data_path.glob("*.mkv")) -ocr_path = data_path / "analysis_results.csv" -log_path = data_path / f"{data_path.stem}.csv" +all_vp = [x for x in data_path.iterdir() if x.is_dir()] -df = utils.combine_ocr_logs(video_path, ocr_path, log_path) -df = df.fillna('') +vp_results = [] +for vp_path in all_vp: + video_path = next(vp_path.glob("*.mkv")) + ocr_path = vp_path / "analysis_results.csv" + log_path = vp_path / f"{vp_path.stem}.csv" -df = utils.calc_levenshtein_distance(df) + df = utils.combine_ocr_logs(video_path, ocr_path, log_path) + df = df.fillna('') + df["vp_code"] = vp_path.stem + df = utils.calc_levenshtein_distance(df) -url_groups = utils.group_urls(list(df["url"].values)) -pprint(len(url_groups)) + url_groups = utils.group_urls(list(df["url"].values)) + pprint(len(url_groups)) -df.to_csv(f"{data_path}/metrics.csv") -utils.write_grouped_metrics(df, url_groups, data_path) + df.to_csv(f"{vp_path}/metrics.csv") + utils.write_grouped_metrics(df, url_groups, vp_path) + + df = pd.read_csv(f"{vp_path}/metrics_grps.csv") + + vp_results.append(df) + +evals = utils.evaluate_results(vp_results) +pprint(evals) |