summaryrefslogtreecommitdiff
path: root/bjoern/videoanalyse/utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'bjoern/videoanalyse/utils.py')
-rw-r--r--bjoern/videoanalyse/utils.py28
1 files changed, 26 insertions, 2 deletions
diff --git a/bjoern/videoanalyse/utils.py b/bjoern/videoanalyse/utils.py
index e060a89..69ffa96 100644
--- a/bjoern/videoanalyse/utils.py
+++ b/bjoern/videoanalyse/utils.py
@@ -20,13 +20,11 @@ def combine_ocr_logs(video_path, ocr_path, log_path):
start = timedelta(seconds=int(round(x)))
return (start + video_date).time().isoformat()
- # analysis = pd.read_csv(vp_path / "analysis_results.csv")
analysis = pd.read_csv(ocr_path)
analysis["Starttime"] = analysis["start_time"].apply(
add_video_time_to_start, args=(video_date,)
)
- # logs = pd.read_csv(vp_path / f"{vp_path.name}.csv")
logs = pd.read_csv(log_path)
def get_log_url(start_time):
@@ -123,3 +121,29 @@ def write_grouped_metrics(df, url_groups, data_path):
row.append(str(most_frequent_in_grp))
row.append(levendist(row[5], most_frequent_in_grp))
csv_writer.writerow(row)
+
+
+def evaluate_results(vp_results):
+ vp_code = [df["vp_code"].values[0] for df in vp_results]
+ mean_lev = [
+ sum(df["levenshtein-distance"].values) / len(df["levenshtein-distance"])
+ for df in vp_results
+ ]
+ mean_long = [
+ sum(df["longest-distance"].values) / len(df["longest-distance"])
+ for df in vp_results
+ ]
+ mean_freq = [
+ sum(df["most_frequent-distance"].values) / len(df["most_frequent-distance"])
+ for df in vp_results
+ ]
+
+ metrics = {
+ "vp_code": vp_code,
+ "mean_lev": mean_lev,
+ "mean_long": mean_long,
+ "mean_freq": mean_freq,
+ }
+ evals = pd.DataFrame(metrics)
+
+ return evals