1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
#!/usr/bin/env python3
import pandas as pd
def tab_switches_per_type(df):
# url_types = ["url", "log_url", "longest", "most_frequent"]
# dist_types = [
# "levenshtein-distance",
# "levenshtein-distance",
# "longest-distance",
# "most_frequent-distance",
# ]
result = {
"vp_code": [],
"log_url": [],
"count": [],
"group": [],
"longest_url": [],
"longest-distance": [],
"most_frequent_url": [],
"most_frequent-distance": [],
}
last_group = -1
count = -1
for row in df.iterrows():
row = row[1]
if row["group_index"] != last_group:
result["vp_code"].append(row["vp_code"])
result["group"].append(row["group_index"])
result["log_url"].append(row["log_url"])
result["longest_url"].append(row["longest"])
result["longest-distance"].append(row["longest-distance"])
result["most_frequent_url"].append(row["most_frequent"])
result["most_frequent-distance"].append(row["most_frequent-distance"])
last_group = row["group_index"]
if count == -1:
count = 1
continue
result["count"].append(count)
count = 1
else:
count += 1
result["count"].append(count)
result_df = pd.DataFrame(result)
return result_df
|