summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de>2023-10-04 12:33:39 +0200
committerNiclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de>2023-10-04 12:33:39 +0200
commitd52ede0288a5db23563ec29f998a1ad26e8aec28 (patch)
treee5e73df1e5372ad450db93d48921ecc29d615c3a
parentcb7f7b44140728990f99115863922dd1f96c5cfe (diff)
use LogParser in post_processing
-rwxr-xr-xbjoern/videoanalyse/LogParser.py39
-rw-r--r--bjoern/videoanalyse/post_processing.py11
2 files changed, 33 insertions, 17 deletions
diff --git a/bjoern/videoanalyse/LogParser.py b/bjoern/videoanalyse/LogParser.py
index 448b283..1104e7f 100755
--- a/bjoern/videoanalyse/LogParser.py
+++ b/bjoern/videoanalyse/LogParser.py
@@ -12,11 +12,10 @@ import codecs
from read_sqlite import get_url_from_sqlite
from pathlib import Path
-BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
-
# takes the log data string and returns a list of activity titles and their time windows
def extract_activities(log_data):
+ BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
# regex which matches between squared brackets
reg_titles = re.compile("(?<=\[).*?(?=\])")
# regex for total/active time
@@ -86,6 +85,7 @@ def match_urls(history_db, log):
entry.append(url)
return log
+
def generate_log(activities: dict):
# For each start time in ascending order, make an entry with title and timestamp
log = []
@@ -101,26 +101,23 @@ def generate_log(activities: dict):
smallest_start_time = (title, idx)
log.append(
[
- activities[smallest_start_time[0]][smallest_start_time[1]][0].isoformat(),
- activities[smallest_start_time[0]][smallest_start_time[1]][1].isoformat(),
+ activities[smallest_start_time[0]][smallest_start_time[1]][
+ 0
+ ].isoformat(),
+ activities[smallest_start_time[0]][smallest_start_time[1]][
+ 1
+ ].isoformat(),
smallest_start_time[0],
]
)
del activities[smallest_start_time[0]][smallest_start_time[1]]
if not activities[smallest_start_time[0]]:
del activities[smallest_start_time[0]]
- return(log)
-
-
-for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
- print(vp_dir)
- log = extract_activities(get_log_data(vp_dir))
- log = generate_log(log)
+ return log
- history = get_history_db(vp_dir)
- log = match_urls(history, log)
- path = Path(f"{vp_dir}/{vp_dir}.csv")
+def write_logfile(vp_dir, log):
+ path = Path(f"{vp_dir}/logs.csv")
with open(path, "w") as csvfile:
writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC)
writer.writerow(["Starttime", "Endtime", "Title", "URL"])
@@ -128,4 +125,16 @@ for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
writer.writerow(row)
-input("*Press enter to close*")
+def main():
+ for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
+ print(vp_dir)
+ log = extract_activities(get_log_data(vp_dir))
+ log = generate_log(log)
+
+ history = get_history_db(vp_dir)
+ log = match_urls(history, log)
+ write_logfile(vp_dir, log)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py
index e24edcf..bdf76c3 100644
--- a/bjoern/videoanalyse/post_processing.py
+++ b/bjoern/videoanalyse/post_processing.py
@@ -6,6 +6,7 @@ from pprint import pprint
import pandas as pd
import utils
+import LogParser
argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation")
argparser.add_argument("vp_dir", help="Directory with all VPs")
@@ -17,12 +18,18 @@ all_vp = [x for x in data_path.iterdir() if x.is_dir()]
vp_results = []
for vp_path in all_vp:
+ log = LogParser.extract_activities(LogParser.get_log_data(vp_path))
+ log = LogParser.generate_log(log)
+ history = LogParser.get_history_db(vp_path)
+ log = LogParser.match_urls(history, log)
+ LogParser.write_logfile(vp_path, log)
+
video_path = next(vp_path.glob("*.mkv"))
ocr_path = vp_path / "analysis_results.csv"
- log_path = vp_path / f"{vp_path.stem}.csv"
+ log_path = vp_path / "logs.csv"
df = utils.combine_ocr_logs(video_path, ocr_path, log_path)
- df = df.fillna('')
+ df = df.fillna("")
df["vp_code"] = vp_path.stem
df = utils.calc_levenshtein_distance(df)