blob: a917b7a326f358c232eeebc3bdd80a51f9f96d0b (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#!/usr/bin/env python3
import argparse
from pathlib import Path
import pandas as pd
import csv
from Levenshtein import distance as levendist
argparser = argparse.ArgumentParser(
description="Distance evaluation"
)
argparser.add_argument(
"vp_dir", help="Directory containing merged.csv"
)
args = argparser.parse_args()
vp_path = Path(args.vp_dir)
df = pd.read_csv(vp_path / "merged.csv")
df["levenshtein-distance"] = df.apply(
lambda row: levendist(str(row.url), str(row.log_url)), axis=1
)
df.to_csv(vp_path / "metrics.csv", quoting=csv.QUOTE_NONNUMERIC)
|