diff options
Diffstat (limited to 'experiment/analysis/analysis.org')
-rw-r--r-- | experiment/analysis/analysis.org | 57 |
1 files changed, 8 insertions, 49 deletions
diff --git a/experiment/analysis/analysis.org b/experiment/analysis/analysis.org index 7f6a58d..e726046 100644 --- a/experiment/analysis/analysis.org +++ b/experiment/analysis/analysis.org @@ -4,10 +4,11 @@ * Imports #+begin_src python :results none import pandas as pd -import pickle from pathlib import Path from pprint import pprint +import tools + #+end_src * Constants @@ -18,13 +19,6 @@ procedures = ["1", "2", "3", "4", "5", "6", "overall"] #+end_src * Import Data -#+begin_src python :results none -def unpickle(pkl): - with open(pkl, "rb") as f: - data = pickle.load(f) - return data -#+end_src - ** Conditions #+begin_src python conditions = [x.stem for x in data_path.iterdir() if x.is_dir()] @@ -35,59 +29,24 @@ conditions | random | fixed | blocked | ** Data -#+begin_src python +#+begin_src python :results none data = {} for condition in conditions: data[condition] = {} for vp in (data_path / condition).iterdir(): - data[condition][vp.stem] = unpickle(vp / "vp.pkl") -#+end_src + data[condition][vp.stem] = tools.unpickle(vp / "vp.pkl") -#+RESULTS: -: None +data_train, data_test = tools.train_test_split(data) +#+end_src * Basic statistics ** Total percent correct To find out how well VP solved the tasked, we calculate the accuracy for train and test phase. -#+begin_src python :results none -def count_correct(vp, trials): - trials_correct = {} - for proc in procedures: - trials_correct[proc] = 0 - for sample in trials: - for proc in vp[sample]["procedure_order"]: - vp_ans = vp[sample][proc]["answer"] - for c in vp_ans: - if not c.isdigit(): - vp_ans = vp_ans.replace(c, "") - vp_ans = int(vp_ans) - if vp_ans == vp[sample]["water_sample"][proc][0]: - trials_correct[proc] += 1 - return trials_correct -#+end_src - -#+begin_src python :results none -def total_accuracy(vp): - train_total = len(train) * len(vp[train[0]]["procedure_order"]) - test_total = len(test) * len(vp[test[0]]["procedure_order"]) - - acc_train = count_correct(vp, train) - acc_test = count_correct(vp, test) - - acc_train = sum([acc_train[x] for x in acc_train.keys()]) / train_total - acc_test = sum([acc_test[x] for x in acc_test.keys()]) / test_total - - return acc_train, acc_test -#+end_src - #+begin_src python -train = [x for x in vp.keys() if "train" in x] -test = [x for x in vp.keys() if "test" in x] - condition = "random" -df = pd.DataFrame([total_accuracy(data[condition][vp]) for vp in data[condition].keys()], index=data[condition].keys(), columns=["train", "test"]) +df = pd.DataFrame([tools.total_accuracy(data[condition][vp], procedures) for vp in data[condition].keys()], index=data[condition].keys(), columns=["train", "test"]) df #+end_src @@ -116,7 +75,7 @@ To investigate, we look at the per procedure accuracy per subject. #+begin_src python condition = "random" proc_accs = [ - count_correct(data[condition][vp], data[condition][vp].keys()) + tools.count_correct(data[condition][vp], data[condition][vp].keys(), procedures) for vp in data[condition].keys() ] for vp in proc_accs: |