diff options
Diffstat (limited to 'bjoern/videoanalyse')
-rw-r--r-- | bjoern/videoanalyse/post_processing.py | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index 4d82c4a..6ab2b0f 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -90,12 +90,16 @@ with open (data_path / "metrics.csv", "r") as input_file, \ csv_reader = csv.reader(input_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) csv_writer = csv.writer(output_file, quotechar='"', quoting=csv.QUOTE_NONNUMERIC) header = next(csv_reader) - header.extend(["group_index","longest","most_frequent"]) + header.extend(["group_index","longest","longest-distance","most_frequent","most_frequent-distance"]) csv_writer.writerow(header) for row in csv_reader: for idx, grp in enumerate(url_groups): if row[3] in grp: row.append(idx) - row.append(max(grp, key=len)) - row.append(max(set(grp), key=grp.count)) + longest_in_grp = max(grp, key=len) + row.append(longest_in_grp) + row.append(Levenshtein.distance(row[6], longest_in_grp)) + most_frequent_in_grp = max(set(grp), key=grp.count) + row.append(most_frequent_in_grp) + row.append(Levenshtein.distance(row[6], most_frequent_in_grp)) csv_writer.writerow(row) |