diff options
Diffstat (limited to 'bjoern/videoanalyse/combine_ocr-logs.py')
-rw-r--r-- | bjoern/videoanalyse/combine_ocr-logs.py | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/bjoern/videoanalyse/combine_ocr-logs.py b/bjoern/videoanalyse/combine_ocr-logs.py new file mode 100644 index 0000000..76c59ed --- /dev/null +++ b/bjoern/videoanalyse/combine_ocr-logs.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 + +import argparse +from pathlib import Path +from datetime import datetime, timedelta +import pandas as pd +import csv + +argparser = argparse.ArgumentParser( + description="Combines results of OCR analysis with log files" +) +argparser.add_argument( + "vp_dir", help="Directory containing analysis_results.csv and VPCODE.csv" +) + +args = argparser.parse_args() + +vp_path = Path(args.vp_dir) + +video_path = next(vp_path.glob("*.mkv")) +date_format = "%Y-%m-%d %H-%M-%S" +video_date = datetime.strptime(video_path.stem, date_format) +print(video_date) +# video_delta = timedelta(hours=video_date.hour, minutes=video_date.minute, seconds=video_date.second) + +def add_video_time_to_start(x, video_date): + start = timedelta(seconds=int(x)) + return (start + video_date).time().isoformat() + +analysis = pd.read_csv(vp_path / "analysis_results.csv") +analysis["Starttime"] = analysis["start_time"].apply(add_video_time_to_start, args=(video_date,)) + +logs = pd.read_csv(vp_path / f"{vp_path.name}.csv") + +def get_log_url(start_time): + start_time = datetime.strptime(start_time, "%H:%M:%S") + + for _, row in logs.iterrows(): + log_start = datetime.strptime(row[0], "%H:%M:%S") + log_end = datetime.strptime(row[1], "%H:%M:%S") + if start_time >= log_start and start_time <= log_end: + return row[3] + return 0 + + + +analysis["log_url"] = analysis.apply( + lambda row: get_log_url(row.Starttime), axis=1 + ) + + +analysis.to_csv(vp_path / "merged.csv", quoting=csv.QUOTE_NONNUMERIC) |