From ef5ff235f5379d66e68550db583cdef9540343b3 Mon Sep 17 00:00:00 2001 From: areyoumee Date: Thu, 10 Aug 2023 19:14:36 +0200 Subject: add levenshtein distance for metrics --- bjoern/videoanalyse/post_processing.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'bjoern') diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index 4d82c4a..6ab2b0f 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -90,12 +90,16 @@ with open (data_path / "metrics.csv", "r") as input_file, \ csv_reader = csv.reader(input_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) csv_writer = csv.writer(output_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) header = next(csv_reader) - header.extend(["group_index","longest","most_frequent"]) + header.extend(["group_index","longest","longest-distance","most_frequent","most_frequent-distance"]) csv_writer.writerow(header) for row in csv_reader: for idx, grp in enumerate(url_groups): if row[3] in grp: row.append(idx) - row.append(max(grp, key=len)) - row.append(max(set(grp), key=grp.count)) + longest_in_grp = max(grp, key=len) + row.append(longest_in_grp) + row.append(Levenshtein.distance(row[6], longest_in_grp)) + most_frequent_in_grp = max(set(grp), key=grp.count) + row.append(most_frequent_in_grp) + row.append(Levenshtein.distance(row[6], most_frequent_in_grp)) csv_writer.writerow(row) -- cgit v1.2.3