summaryrefslogtreecommitdiff
path: root/bjoern/videoanalyse/tab_switch.py
blob: 2efa647d155503d8beec7b3f71c38bd95c4be95c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3

import pandas as pd


def tab_switches_per_type(df):
    # url_types = ["url", "log_url", "longest", "most_frequent"]
    # dist_types = [
    #     "levenshtein-distance",
    #     "levenshtein-distance",
    #     "longest-distance",
    #     "most_frequent-distance",
    # ]

    result = {
        "vp_code": [],
        "log_url": [],
        "count": [],
        "group": [],
        "longest_url": [],
        "longest-distance": [],
        "most_frequent_url": [],
        "most_frequent-distance": [],
    }
    last_group = -1
    count = -1
    for row in df.iterrows():
        row = row[1]
        if row["group_index"] != last_group:
            result["vp_code"].append(row["vp_code"])
            result["group"].append(row["group_index"])
            result["log_url"].append(row["log_url"])
            result["longest_url"].append(row["longest"])
            result["longest-distance"].append(row["longest-distance"])
            result["most_frequent_url"].append(row["most_frequent"])
            result["most_frequent-distance"].append(row["most_frequent-distance"])

            last_group = row["group_index"]
            if count == -1:
                count = 1
                continue
            result["count"].append(count)
            count = 1
        else:
            count += 1
    result["count"].append(count)
    result_df = pd.DataFrame(result)
    return result_df