#+title: Analysis #+PROPERTY: header-args:python+ :session *python* :exports both :tangle yes * Imports #+begin_src python :results none import pandas as pd import pickle from pathlib import Path from pprint import pprint #+end_src * Constants #+begin_src python :results none data_path = Path("/home/niclas/repos/uni/master_thesis/experiment/data") procedures = ["1", "2", "3", "4", "5", "6", "overall"] #+end_src * Import Data #+begin_src python :results none def unpickle(pkl): with open(pkl, "rb") as f: data = pickle.load(f) return data #+end_src ** Conditions #+begin_src python conditions = [x.stem for x in data_path.iterdir() if x.is_dir()] conditions #+end_src #+RESULTS: | random | fixed | blocked | ** Data #+begin_src python data = {} for condition in conditions: data[condition] = {} for vp in (data_path / condition).iterdir(): data[condition][vp.stem] = unpickle(vp / "vp.pkl") #+end_src #+RESULTS: : None * Basic statistics ** Total percent correct To find out how well VP solved the tasked, we calculate the accuracy for train and test phase. #+begin_src python :results none def count_correct(vp, trials): trials_correct = {} for proc in procedures: trials_correct[proc] = 0 for sample in trials: for proc in vp[sample]["procedure_order"]: vp_ans = vp[sample][proc]["answer"] for c in vp_ans: if not c.isdigit(): vp_ans = vp_ans.replace(c, "") vp_ans = int(vp_ans) if vp_ans == vp[sample]["water_sample"][proc][0]: trials_correct[proc] += 1 return trials_correct #+end_src #+begin_src python :results none def total_accuracy(vp): train_total = len(train) * len(vp[train[0]]["procedure_order"]) test_total = len(test) * len(vp[test[0]]["procedure_order"]) acc_train = count_correct(vp, train) acc_test = count_correct(vp, test) acc_train = sum([acc_train[x] for x in acc_train.keys()]) / train_total acc_test = sum([acc_test[x] for x in acc_test.keys()]) / test_total return acc_train, acc_test #+end_src #+begin_src python train = [x for x in vp.keys() if "train" in x] test = [x for x in vp.keys() if "test" in x] condition = "random" df = pd.DataFrame([total_accuracy(data[condition][vp]) for vp in data[condition].keys()], index=data[condition].keys(), columns=["train", "test"]) df #+end_src #+RESULTS: #+begin_example train test vp12 0.822222 0.820000 vp19 0.966667 0.800000 vp15 0.973333 0.980000 vp17 0.911111 0.960000 vp20 0.906667 0.980000 vp10 0.924444 0.943333 vp16 0.957778 0.926667 vp13 0.857778 0.946667 vp18 0.962222 0.970000 vp14 0.982222 0.986667 #+end_example Most subjects have an accuracy of over 95% in both training and test phase. Some however are notably lower, under 90% in either training or test phase, or both. This could be a systematic misunderstanding of specific equations, that are present in both, or only one of the two phases. To investigate, we look at the per procedure accuracy per subject. #+begin_src python condition = "random" proc_accs = [ count_correct(data[condition][vp], data[condition][vp].keys()) for vp in data[condition].keys() ] for vp in proc_accs: for proc in vp.keys(): vp[proc] /= len(next(iter(data[condition].values())).keys()) df = pd.DataFrame(proc_accs, index=data[condition].keys()) df #+end_src #+RESULTS: #+begin_example 1 2 3 4 5 6 overall vp12 0.992 0.592 0.392 0.976 0.960 1.000 0.016 vp19 1.000 0.992 0.000 0.576 0.992 0.992 0.848 vp15 0.992 0.992 0.960 0.392 0.592 1.000 0.928 vp17 0.392 0.968 0.584 1.000 1.000 0.992 0.648 vp20 0.992 0.376 0.952 0.976 0.976 0.560 0.784 vp10 0.968 0.360 0.592 0.984 0.984 0.992 0.712 vp16 0.976 0.600 0.376 0.976 0.992 1.000 0.752 vp13 0.384 0.960 0.928 0.560 0.992 0.968 0.568 vp18 0.976 0.976 0.960 0.392 0.600 0.984 0.904 vp14 0.992 0.976 0.992 0.976 0.400 0.600 0.968 #+end_example We can see that most vp have around 2 procedures with accuracy of around 50%