diff options
Diffstat (limited to 'bjoern/videoanalyse/utils.py')
-rw-r--r-- | bjoern/videoanalyse/utils.py | 28 |
1 files changed, 26 insertions, 2 deletions
diff --git a/bjoern/videoanalyse/utils.py b/bjoern/videoanalyse/utils.py index e060a89..69ffa96 100644 --- a/bjoern/videoanalyse/utils.py +++ b/bjoern/videoanalyse/utils.py @@ -20,13 +20,11 @@ def combine_ocr_logs(video_path, ocr_path, log_path): start = timedelta(seconds=int(round(x))) return (start + video_date).time().isoformat() - # analysis = pd.read_csv(vp_path / "analysis_results.csv") analysis = pd.read_csv(ocr_path) analysis["Starttime"] = analysis["start_time"].apply( add_video_time_to_start, args=(video_date,) ) - # logs = pd.read_csv(vp_path / f"{vp_path.name}.csv") logs = pd.read_csv(log_path) def get_log_url(start_time): @@ -123,3 +121,29 @@ def write_grouped_metrics(df, url_groups, data_path): row.append(str(most_frequent_in_grp)) row.append(levendist(row[5], most_frequent_in_grp)) csv_writer.writerow(row) + + +def evaluate_results(vp_results): + vp_code = [df["vp_code"].values[0] for df in vp_results] + mean_lev = [ + sum(df["levenshtein-distance"].values) / len(df["levenshtein-distance"]) + for df in vp_results + ] + mean_long = [ + sum(df["longest-distance"].values) / len(df["longest-distance"]) + for df in vp_results + ] + mean_freq = [ + sum(df["most_frequent-distance"].values) / len(df["most_frequent-distance"]) + for df in vp_results + ] + + metrics = { + "vp_code": vp_code, + "mean_lev": mean_lev, + "mean_long": mean_long, + "mean_freq": mean_freq, + } + evals = pd.DataFrame(metrics) + + return evals |