summaryrefslogtreecommitdiff
path: root/bjoern/videoanalyse/combine_ocr-logs.py
blob: 1d996299211bc334ff11fdc362a7054fa361ed20 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3

import argparse
from pathlib import Path
from datetime import datetime, timedelta
import pandas as pd
import csv

argparser = argparse.ArgumentParser(
    description="Combines results of OCR analysis with log files"
)
argparser.add_argument(
    "vp_dir", help="Directory containing analysis_results.csv and VPCODE.csv"
)

args = argparser.parse_args()

vp_path = Path(args.vp_dir)

video_path = next(vp_path.glob("*.mkv"))
date_format = "%Y-%m-%d %H-%M-%S"
video_date = datetime.strptime(video_path.stem, date_format)
print(video_date)
# video_delta = timedelta(hours=video_date.hour, minutes=video_date.minute, seconds=video_date.second)

def add_video_time_to_start(x, video_date):
    start = timedelta(seconds=int(round(x)))
    return (start + video_date).time().isoformat()

analysis = pd.read_csv(vp_path / "analysis_results.csv")
analysis["Starttime"] = analysis["start_time"].apply(add_video_time_to_start, args=(video_date,))

logs = pd.read_csv(vp_path / f"{vp_path.name}.csv")

def get_log_url(start_time):
    start_time = datetime.strptime(start_time, "%H:%M:%S")

    for _, row in logs.iterrows():
        log_start = datetime.strptime(row[0], "%H:%M:%S")
        log_end = datetime.strptime(row[1], "%H:%M:%S")
        if start_time >= log_start and start_time <= log_end:
            return row[3]
    return 0



analysis["log_url"] = analysis.apply(
        lambda row: get_log_url(row.Starttime), axis=1
    )


analysis.to_csv(vp_path / "merged.csv", quoting=csv.QUOTE_NONNUMERIC)