blob: 6ffff1eac0eb21212270aa36b9962a997608d165 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/usr/bin/env python3
import argparse
from pathlib import Path
from pprint import pprint
import pandas as pd
import utils
argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation")
argparser.add_argument("vp_dir", help="Directory with all VPs")
args = argparser.parse_args()
data_path = Path(args.vp_dir)
all_vp = [x for x in data_path.iterdir() if x.is_dir()]
vp_results = []
for vp_path in all_vp:
video_path = next(vp_path.glob("*.mkv"))
ocr_path = vp_path / "analysis_results.csv"
log_path = vp_path / f"{vp_path.stem}.csv"
df = utils.combine_ocr_logs(video_path, ocr_path, log_path)
df = df.fillna('')
df["vp_code"] = vp_path.stem
df = utils.calc_levenshtein_distance(df)
url_groups = utils.group_urls(list(df["url"].values))
pprint(len(url_groups))
df.to_csv(f"{vp_path}/metrics.csv")
utils.write_grouped_metrics(df, url_groups, vp_path)
df = pd.read_csv(f"{vp_path}/metrics_grps.csv")
vp_results.append(df)
evals = utils.evaluate_results(vp_results)
pprint(evals)
|