summaryrefslogtreecommitdiff
path: root/bjoern/videoanalyse/post_processing.py
diff options
context:
space:
mode:
Diffstat (limited to 'bjoern/videoanalyse/post_processing.py')
-rw-r--r--bjoern/videoanalyse/post_processing.py10
1 files changed, 7 insertions, 3 deletions
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py
index 4d82c4a..6ab2b0f 100644
--- a/bjoern/videoanalyse/post_processing.py
+++ b/bjoern/videoanalyse/post_processing.py
@@ -90,12 +90,16 @@ with open (data_path / "metrics.csv", "r") as input_file, \
csv_reader = csv.reader(input_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
csv_writer = csv.writer(output_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
header = next(csv_reader)
- header.extend(["group_index","longest","most_frequent"])
+ header.extend(["group_index","longest","longest-distance","most_frequent","most_frequent-distance"])
csv_writer.writerow(header)
for row in csv_reader:
for idx, grp in enumerate(url_groups):
if row[3] in grp:
row.append(idx)
- row.append(max(grp, key=len))
- row.append(max(set(grp), key=grp.count))
+ longest_in_grp = max(grp, key=len)
+ row.append(longest_in_grp)
+ row.append(Levenshtein.distance(row[6], longest_in_grp))
+ most_frequent_in_grp = max(set(grp), key=grp.count)
+ row.append(most_frequent_in_grp)
+ row.append(Levenshtein.distance(row[6], most_frequent_in_grp))
csv_writer.writerow(row)