experiment/analysis/analysis.tex


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

% Created 2023-10-28 Sat 19:43
% Intended LaTeX compiler: pdflatex
\documentclass[11pt]{article}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{graphicx}
\usepackage{longtable}
\usepackage{wrapfig}
\usepackage{rotating}
\usepackage[normalem]{ulem}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{capt-of}
\usepackage{hyperref}
\author{Niclas Dobbertin}
\date{\today}
\title{Analysis}
\hypersetup{
 pdfauthor={Niclas Dobbertin},
 pdftitle={Analysis},
 pdfkeywords={},
 pdfsubject={},
 pdfcreator={Emacs 29.1 (Org mode 9.7)}, 
 pdflang={English}}
\usepackage{biblatex}
\addbibresource{/home/niclas/bib/references.bib}
\begin{document}

\maketitle
\tableofcontents

\section{Imports}
\label{sec:orgbdc2c77}
\begin{verbatim}
import pandas as pd
from pathlib import Path
from pprint import pprint

import tools

\end{verbatim}
\section{Constants}
\label{sec:orgcb8c537}
\begin{verbatim}
data_path = Path("/home/niclas/repos/uni/master_thesis/experiment/data")

procedures = ["1", "2", "3", "4", "5", "6", "overall"]
\end{verbatim}
\section{Import Data}
\label{sec:org87e67b0}
\subsection{Conditions}
\label{sec:orga12f2b6}
\begin{verbatim}
conditions = [x.stem for x in data_path.iterdir() if x.is_dir()]
conditions
\end{verbatim}

\begin{center}
\begin{tabular}{lll}
random & fixed & blocked\\[0pt]
\end{tabular}
\end{center}
\subsection{Data}
\label{sec:orgcac95cb}
\begin{verbatim}
data = {}
for condition in conditions:
    data[condition] = {}
    for vp in (data_path / condition).iterdir():
        data[condition][vp.stem] = tools.unpickle(vp / "vp.pkl")
\end{verbatim}

\begin{verbatim}
None
\end{verbatim}
\subsection{Useful Subdata}
\label{sec:org4384120}
\begin{verbatim}
# data_correct = {conditons[0]: {}, conditons[1]: {}, conditons[2]: {}}
pass
# for condition in conditions:
#     data_correct[condition] = None
\end{verbatim}

\begin{verbatim}
None
\end{verbatim}
\section{Basic statistics}
\label{sec:org44d0851}
\subsection{Total percent correct}
\label{sec:org461b551}
To find out how well VP solved the tasked, we calculate the accuracy for train
and test phase.

\begin{verbatim}
condition = "random"
df = pd.DataFrame([tools.total_accuracy(data[condition][vp], procedures) for vp in data[condition].keys()], index=data[condition].keys(), columns=["train", "test"])
df
\end{verbatim}

\begin{verbatim}
         train      test
vp12  0.822222  0.820000
vp19  0.966667  0.800000
vp15  0.973333  0.980000
vp17  0.911111  0.960000
vp20  0.906667  0.980000
vp10  0.924444  0.943333
vp16  0.957778  0.926667
vp13  0.857778  0.946667
vp18  0.962222  0.970000
vp14  0.982222  0.986667
\end{verbatim}

Most subjects have an accuracy of over 95\% in both training and test phase.
Some however are notably lower, under 90\% in either training or test phase, or
both.
This could be a systematic misunderstanding of specific equations, that are
present in both, or only one of the two phases.
To investigate, we look at the per procedure accuracy per subject.

\begin{verbatim}
condition = "random"
proc_accs = [
    tools.count_correct(data[condition][vp], data[condition][vp].keys(), procedures)
    for vp in data[condition].keys()
]
for vp in proc_accs:
    for proc in vp.keys():
        vp[proc] /= len(next(iter(data[condition].values())).keys())
df = pd.DataFrame(proc_accs, index=data[condition].keys())
df
\end{verbatim}

\begin{verbatim}
          1      2      3      4      5      6  overall
vp12  0.992  0.592  0.392  0.976  0.960  1.000    0.016
vp19  1.000  0.992  0.000  0.576  0.992  0.992    0.848
vp15  0.992  0.992  0.960  0.392  0.592  1.000    0.928
vp17  0.392  0.968  0.584  1.000  1.000  0.992    0.648
vp20  0.992  0.376  0.952  0.976  0.976  0.560    0.784
vp10  0.968  0.360  0.592  0.984  0.984  0.992    0.712
vp16  0.976  0.600  0.376  0.976  0.992  1.000    0.752
vp13  0.384  0.960  0.928  0.560  0.992  0.968    0.568
vp18  0.976  0.976  0.960  0.392  0.600  0.984    0.904
vp14  0.992  0.976  0.992  0.976  0.400  0.600    0.968
\end{verbatim}

We can see that most vp have around 2 procedures with accuracy of around 50\%
\end{document}