#!/usr/bin/env python3 import argparse from pathlib import Path from pprint import pprint import utils argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") argparser.add_argument("vp_dir", help="VP Directory") args = argparser.parse_args() data_path = Path(args.vp_dir) video_path = next(data_path.glob("*.mkv")) ocr_path = data_path / "analysis_results.csv" log_path = data_path / f"{data_path.stem}.csv" df = utils.combine_ocr_logs(video_path, ocr_path, log_path) df = df.fillna('') df = utils.calc_levenshtein_distance(df) url_groups = utils.group_urls(list(df["url"].values)) pprint(len(url_groups)) df.to_csv(f"{data_path}/metrics.csv") utils.write_grouped_metrics(df, url_groups, data_path)