#!/usr/bin/env python3 import argparse from pathlib import Path from datetime import datetime, timedelta import pandas as pd import csv argparser = argparse.ArgumentParser( description="Combines results of OCR analysis with log files" ) argparser.add_argument( "vp_dir", help="Directory containing analysis_results.csv and VPCODE.csv" ) args = argparser.parse_args() vp_path = Path(args.vp_dir) video_path = next(vp_path.glob("*.mkv")) date_format = "%Y-%m-%d %H-%M-%S" video_date = datetime.strptime(video_path.stem, date_format) print(video_date) # video_delta = timedelta(hours=video_date.hour, minutes=video_date.minute, seconds=video_date.second) def add_video_time_to_start(x, video_date): start = timedelta(seconds=int(round(x))) return (start + video_date).time().isoformat() analysis = pd.read_csv(vp_path / "analysis_results.csv") analysis["Starttime"] = analysis["start_time"].apply(add_video_time_to_start, args=(video_date,)) logs = pd.read_csv(vp_path / f"{vp_path.name}.csv") def get_log_url(start_time): start_time = datetime.strptime(start_time, "%H:%M:%S") for _, row in logs.iterrows(): log_start = datetime.strptime(row[0], "%H:%M:%S") log_end = datetime.strptime(row[1], "%H:%M:%S") if start_time >= log_start and start_time <= log_end: return row[3] return 0 analysis["log_url"] = analysis.apply( lambda row: get_log_url(row.Starttime), axis=1 ) analysis.to_csv(vp_path / "merged.csv", quoting=csv.QUOTE_NONNUMERIC)