From a6594776753504e7c6c0c389e6dcfd9dbc713cec Mon Sep 17 00:00:00 2001 From: Niclas Dobbertin Date: Wed, 4 Oct 2023 15:43:24 +0200 Subject: improve avg dist metrics --- bjoern/videoanalyse/utils.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'bjoern/videoanalyse') diff --git a/bjoern/videoanalyse/utils.py b/bjoern/videoanalyse/utils.py index b1eaa4f..df00482 100644 --- a/bjoern/videoanalyse/utils.py +++ b/bjoern/videoanalyse/utils.py @@ -130,22 +130,35 @@ def write_grouped_metrics(df, url_groups, data_path): def evaluate_results(vp_results): vp_code = [df["vp_code"].values[0] for df in vp_results] - mean_lev = [ - sum(df["levenshtein-distance"].values) / len(df["levenshtein-distance"]) - for df in vp_results - ] - mean_long = [ - sum(df["longest-distance"].values) / len(df["longest-distance"]) - for df in vp_results - ] - mean_freq = [ - sum(df["most_frequent-distance"].values) / len(df["most_frequent-distance"]) - for df in vp_results - ] + # mean_lev = [ + # sum(df["levenshtein-distance"].values) / len(df["levenshtein-distance"]) + # for df in vp_results + # ] + mean_long = [] + mean_freq = [] + for df in vp_results: + groups = set(df["group"].values) + group_long = 0 + group_freq = 0 + for group in groups: + group_df = df.loc[df['group'] == group] + group_long += group_df["longest-distance"].values[0] + group_freq += group_df["most_frequent-distance"].values[0] + mean_long.append(group_long / len(groups)) + mean_freq.append(group_freq / len(groups)) + + # mean_long = [ + # sum(df["longest-distance"].values) / len(df["longest-distance"]) + # for df in vp_results + # ] + # mean_freq = [ + # sum(df["most_frequent-distance"].values) / len(df["most_frequent-distance"]) + # for df in vp_results + # ] metrics = { "vp_code": vp_code, - "mean_lev": mean_lev, + # "mean_lev": mean_lev, "mean_long": mean_long, "mean_freq": mean_freq, } -- cgit v1.2.3