diff options
author | Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de> | 2023-10-04 12:33:39 +0200 |
---|---|---|
committer | Niclas Dobbertin <niclas.dobbertin@stud.tu-darmstadt.de> | 2023-10-04 12:33:39 +0200 |
commit | d52ede0288a5db23563ec29f998a1ad26e8aec28 (patch) | |
tree | e5e73df1e5372ad450db93d48921ecc29d615c3a /bjoern | |
parent | cb7f7b44140728990f99115863922dd1f96c5cfe (diff) |
use LogParser in post_processing
Diffstat (limited to 'bjoern')
-rwxr-xr-x | bjoern/videoanalyse/LogParser.py | 39 | ||||
-rw-r--r-- | bjoern/videoanalyse/post_processing.py | 11 |
2 files changed, 33 insertions, 17 deletions
diff --git a/bjoern/videoanalyse/LogParser.py b/bjoern/videoanalyse/LogParser.py index 448b283..1104e7f 100755 --- a/bjoern/videoanalyse/LogParser.py +++ b/bjoern/videoanalyse/LogParser.py @@ -12,11 +12,10 @@ import codecs from read_sqlite import get_url_from_sqlite
from pathlib import Path
-BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
-
# takes the log data string and returns a list of activity titles and their time windows
def extract_activities(log_data):
+ BROWSER_TITLE_SUFFIX = " - Mozilla Firefox"
# regex which matches between squared brackets
reg_titles = re.compile("(?<=\[).*?(?=\])")
# regex for total/active time
@@ -86,6 +85,7 @@ def match_urls(history_db, log): entry.append(url)
return log
+
def generate_log(activities: dict):
# For each start time in ascending order, make an entry with title and timestamp
log = []
@@ -101,26 +101,23 @@ def generate_log(activities: dict): smallest_start_time = (title, idx)
log.append(
[
- activities[smallest_start_time[0]][smallest_start_time[1]][0].isoformat(),
- activities[smallest_start_time[0]][smallest_start_time[1]][1].isoformat(),
+ activities[smallest_start_time[0]][smallest_start_time[1]][
+ 0
+ ].isoformat(),
+ activities[smallest_start_time[0]][smallest_start_time[1]][
+ 1
+ ].isoformat(),
smallest_start_time[0],
]
)
del activities[smallest_start_time[0]][smallest_start_time[1]]
if not activities[smallest_start_time[0]]:
del activities[smallest_start_time[0]]
- return(log)
-
-
-for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
- print(vp_dir)
- log = extract_activities(get_log_data(vp_dir))
- log = generate_log(log)
+ return log
- history = get_history_db(vp_dir)
- log = match_urls(history, log)
- path = Path(f"{vp_dir}/{vp_dir}.csv")
+def write_logfile(vp_dir, log):
+ path = Path(f"{vp_dir}/logs.csv")
with open(path, "w") as csvfile:
writer = csv.writer(csvfile, delimiter=",", quoting=csv.QUOTE_NONNUMERIC)
writer.writerow(["Starttime", "Endtime", "Title", "URL"])
@@ -128,4 +125,16 @@ for vp_dir in [f.name for f in os.scandir() if f.is_dir()]: writer.writerow(row)
-input("*Press enter to close*")
+def main():
+ for vp_dir in [f.name for f in os.scandir() if f.is_dir()]:
+ print(vp_dir)
+ log = extract_activities(get_log_data(vp_dir))
+ log = generate_log(log)
+
+ history = get_history_db(vp_dir)
+ log = match_urls(history, log)
+ write_logfile(vp_dir, log)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/bjoern/videoanalyse/post_processing.py b/bjoern/videoanalyse/post_processing.py index e24edcf..bdf76c3 100644 --- a/bjoern/videoanalyse/post_processing.py +++ b/bjoern/videoanalyse/post_processing.py @@ -6,6 +6,7 @@ from pprint import pprint import pandas as pd import utils +import LogParser argparser = argparse.ArgumentParser(description="OCR-Logfile evaluation") argparser.add_argument("vp_dir", help="Directory with all VPs") @@ -17,12 +18,18 @@ all_vp = [x for x in data_path.iterdir() if x.is_dir()] vp_results = [] for vp_path in all_vp: + log = LogParser.extract_activities(LogParser.get_log_data(vp_path)) + log = LogParser.generate_log(log) + history = LogParser.get_history_db(vp_path) + log = LogParser.match_urls(history, log) + LogParser.write_logfile(vp_path, log) + video_path = next(vp_path.glob("*.mkv")) ocr_path = vp_path / "analysis_results.csv" - log_path = vp_path / f"{vp_path.stem}.csv" + log_path = vp_path / "logs.csv" df = utils.combine_ocr_logs(video_path, ocr_path, log_path) - df = df.fillna('') + df = df.fillna("") df["vp_code"] = vp_path.stem df = utils.calc_levenshtein_distance(df) |