#!/usr/bin/env python3 import pandas as pd def tab_switches_per_type(df): # url_types = ["url", "log_url", "longest", "most_frequent"] # dist_types = [ # "levenshtein-distance", # "levenshtein-distance", # "longest-distance", # "most_frequent-distance", # ] result = { "vp_code": [], "log_url": [], "count": [], "group": [], "longest_url": [], "longest-distance": [], "most_frequent_url": [], "most_frequent-distance": [], } last_group = -1 count = -1 for row in df.iterrows(): row = row[1] if row["group_index"] != last_group: result["vp_code"].append(row["vp_code"]) result["group"].append(row["group_index"]) result["log_url"].append(row["log_url"]) result["longest_url"].append(row["longest"]) result["longest-distance"].append(row["longest-distance"]) result["most_frequent_url"].append(row["most_frequent"]) result["most_frequent-distance"].append(row["most_frequent-distance"]) last_group = row["group_index"] if count == -1: count = 1 continue result["count"].append(count) count = 1 else: count += 1 result["count"].append(count) result_df = pd.DataFrame(result) return result_df