change LogParser to unix line endings

author: Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de> 2023-10-04 12:34:02 +0200
committer: Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de> 2023-10-04 12:34:02 +0200
commit: 791eb3ee3f0e3e9d5ba1225eb6feed41915e035b (patch)
tree: 0749e9f83ded67273e9587361095ee2af1c76f2d
parent: d52ede0288a5db23563ec29f998a1ad26e8aec28 (diff)
1 files changed, 140 insertions, 140 deletions
diff --git a/bjoern/videoanalyse/LogParser.py b/bjoern/videoanalyse/LogParser.py
index 1104e7f..c314de7 100755
--- a/bjoern/videoanalyse/LogParser.py
+++ b/bjoern/videoanalyse/LogParser.py
@@ -1,140 +1,140 @@
-# -*- coding: utf-8 -*-
-"""
-This script takes multiple .log data files from PCScreenWatcher and a Firefox history database (places.sqlite)
-inside a folder to extract each activity title, their url if applicable and usage timestamps into a pseudo-csv file.
-"""
-
-import os
-import re
-from datetime import datetime
-import csv
-import codecs
-from read_sqlite import get_url_from_sqlite
-from pathlib import Path
-
-
-# takes the log data string and returns a list of activity titles and their time windows
-def extract_activities(log_data):
-    BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
-    # regex which matches between squared brackets
-    reg_titles = re.compile("(?<=\[).*?(?=\])")
-    # regex for total/active time
-    reg_time = re.compile(
-        "(?<!\()Total: \d*\.*\d*m*\d*\.*\d*s*, Active: \d*\.*\d*m*\d*\.*\d*s*"
-    )
-    # regex which matches number + . and splits into different strings
-    reg_split = re.compile("\d+\. ")
-
-    extracted_data = {}
-    for log_date in log_data:
-        windows = reg_split.split(log_date)
-        brackets = []
-
-        del_flag = True
-        # extract squared brackets per string
-        for s in windows:
-            if not del_flag:
-                found_brackets = reg_titles.findall(s)
-                if found_brackets:
-                    # Only use Browser titles
-                    if found_brackets[0].endswith(BROWSER_TITLE_SUFFIX):
-                        title = found_brackets[0].replace(BROWSER_TITLE_SUFFIX, "")
-                        brackets.append(title)
-                        enter_exit = s.split("Enter-Exit: ")[-1]
-                        timestamps = reg_titles.findall(enter_exit)
-                        for timestamp in timestamps:
-                            t_enter, t_exit = timestamp.split("-")
-                            if not title in extracted_data:
-                                extracted_data[title] = []
-                            enter_date = datetime.strptime(t_enter, "%H:%M:%S").time()
-                            exit_date = datetime.strptime(t_exit, "%H:%M:%S").time()
-                            extracted_data[title].append((enter_date, exit_date))
-            if "Activies in each window" in s:
-                del_flag = False
-            if "Restricted Windows Summary:" in s:
-                del_flag = True
-
-    return extracted_data
-
-
-def get_log_data(data_path):
-    files = os.listdir(data_path)
-    log_files = []
-    log_data = []
-    for s in files:
-        if s.endswith(".log"):
-            log_files.append(os.path.join(data_path, s))
-    for l in log_files:
-        with codecs.open(l, "r", "utf-8") as reader:
-            log_data.append(reader.read())
-    return log_data
-
-
-def get_history_db(data_path):
-    files = os.listdir(data_path)
-    for s in files:
-        if s.endswith(".sqlite"):
-            history_db = os.path.join(data_path, s)
-            return history_db
-    return None
-
-
-def match_urls(history_db, log):
-    for entry in log:
-        url = get_url_from_sqlite(history_db, entry[2])
-        entry.append(url)
-    return log
-
-
-def generate_log(activities: dict):
-    # For each start time in ascending order, make an entry with title and timestamp
-    log = []
-    while activities:
-        first_title = list(activities.keys())[0]
-        smallest_start_time = (first_title, 0)
-        for title in activities.keys():
-            for idx, timestamp in enumerate(activities[title]):
-                if (
-                    timestamp[0]
-                    < activities[smallest_start_time[0]][smallest_start_time[1]][0]
-                ):
-                    smallest_start_time = (title, idx)
-        log.append(
-            [
-                activities[smallest_start_time[0]][smallest_start_time[1]][
-                    0
-                ].isoformat(),
-                activities[smallest_start_time[0]][smallest_start_time[1]][
-                    1
-                ].isoformat(),
-                smallest_start_time[0],
-            ]
-        )
-        del activities[smallest_start_time[0]][smallest_start_time[1]]
-        if not activities[smallest_start_time[0]]:
-            del activities[smallest_start_time[0]]
-    return log
-
-
-def write_logfile(vp_dir, log):
-    path = Path(f"{vp_dir}/logs.csv")
-    with open(path, "w") as csvfile:
-        writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC)
-        writer.writerow(["Starttime", "Endtime", "Title", "URL"])
-        for row in log:
-            writer.writerow(row)
-
-
-def main():
-    for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
-        print(vp_dir)
-        log = extract_activities(get_log_data(vp_dir))
-        log = generate_log(log)
-
-        history = get_history_db(vp_dir)
-        log = match_urls(history, log)
-        write_logfile(vp_dir, log)
-
-
-if __name__ == "__main__":
-    main()
+# -*- coding: utf-8 -*-
+"""
+This script takes multiple .log data files from PCScreenWatcher and a Firefox history database (places.sqlite)
+inside a folder to extract each activity title, their url if applicable and usage timestamps into a pseudo-csv file.
+"""
+
+import os
+import re
+from datetime import datetime
+import csv
+import codecs
+from read_sqlite import get_url_from_sqlite
+from pathlib import Path
+
+
+# takes the log data string and returns a list of activity titles and their time windows
+def extract_activities(log_data):
+    BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
+    # regex which matches between squared brackets
+    reg_titles = re.compile("(?<=\[).*?(?=\])")
+    # regex for total/active time
+    reg_time = re.compile(
+        "(?<!\()Total: \d*\.*\d*m*\d*\.*\d*s*, Active: \d*\.*\d*m*\d*\.*\d*s*"
+    )
+    # regex which matches number + . and splits into different strings
+    reg_split = re.compile("\d+\. ")
+
+    extracted_data = {}
+    for log_date in log_data:
+        windows = reg_split.split(log_date)
+        brackets = []
+
+        del_flag = True
+        # extract squared brackets per string
+        for s in windows:
+            if not del_flag:
+                found_brackets = reg_titles.findall(s)
+                if found_brackets:
+                    # Only use Browser titles
+                    if found_brackets[0].endswith(BROWSER_TITLE_SUFFIX):
+                        title = found_brackets[0].replace(BROWSER_TITLE_SUFFIX, "")
+                        brackets.append(title)
+                        enter_exit = s.split("Enter-Exit: ")[-1]
+                        timestamps = reg_titles.findall(enter_exit)
+                        for timestamp in timestamps:
+                            t_enter, t_exit = timestamp.split("-")
+                            if not title in extracted_data:
+                                extracted_data[title] = []
+                            enter_date = datetime.strptime(t_enter, "%H:%M:%S").time()
+                            exit_date = datetime.strptime(t_exit, "%H:%M:%S").time()
+                            extracted_data[title].append((enter_date, exit_date))
+            if "Activies in each window" in s:
+                del_flag = False
+            if "Restricted Windows Summary:" in s:
+                del_flag = True
+
+    return extracted_data
+
+
+def get_log_data(data_path):
+    files = os.listdir(data_path)
+    log_files = []
+    log_data = []
+    for s in files:
+        if s.endswith(".log"):
+            log_files.append(os.path.join(data_path, s))
+    for l in log_files:
+        with codecs.open(l, "r", "utf-8") as reader:
+            log_data.append(reader.read())
+    return log_data
+
+
+def get_history_db(data_path):
+    files = os.listdir(data_path)
+    for s in files:
+        if s.endswith(".sqlite"):
+            history_db = os.path.join(data_path, s)
+            return history_db
+    return None
+
+
+def match_urls(history_db, log):
+    for entry in log:
+        url = get_url_from_sqlite(history_db, entry[2])
+        entry.append(url)
+    return log
+
+
+def generate_log(activities: dict):
+    # For each start time in ascending order, make an entry with title and timestamp
+    log = []
+    while activities:
+        first_title = list(activities.keys())[0]
+        smallest_start_time = (first_title, 0)
+        for title in activities.keys():
+            for idx, timestamp in enumerate(activities[title]):
+                if (
+                    timestamp[0]
+                    < activities[smallest_start_time[0]][smallest_start_time[1]][0]
+                ):
+                    smallest_start_time = (title, idx)
+        log.append(
+            [
+                activities[smallest_start_time[0]][smallest_start_time[1]][
+                    0
+                ].isoformat(),
+                activities[smallest_start_time[0]][smallest_start_time[1]][
+                    1
+                ].isoformat(),
+                smallest_start_time[0],
+            ]
+        )
+        del activities[smallest_start_time[0]][smallest_start_time[1]]
+        if not activities[smallest_start_time[0]]:
+            del activities[smallest_start_time[0]]
+    return log
+
+
+def write_logfile(vp_dir, log):
+    path = Path(f"{vp_dir}/logs.csv")
+    with open(path, "w") as csvfile:
+        writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC)
+        writer.writerow(["Starttime", "Endtime", "Title", "URL"])
+        for row in log:
+            writer.writerow(row)
+
+
+def main():
+    for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
+        print(vp_dir)
+        log = extract_activities(get_log_data(vp_dir))
+        log = generate_log(log)
+
+        history = get_history_db(vp_dir)
+        log = match_urls(history, log)
+        write_logfile(vp_dir, log)
+
+
+if __name__ == "__main__":
+    main()
author	Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de>	2023-10-04 12:34:02 +0200
committer	Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de>	2023-10-04 12:34:02 +0200
commit	791eb3ee3f0e3e9d5ba1225eb6feed41915e035b (patch)
tree	0749e9f83ded67273e9587361095ee2af1c76f2d
parent	d52ede0288a5db23563ec29f998a1ad26e8aec28 (diff)