summaryrefslogtreecommitdiff
path: root/experiment/analysis/analysis.tex
diff options
context:
space:
mode:
Diffstat (limited to 'experiment/analysis/analysis.tex')
-rw-r--r--experiment/analysis/analysis.tex116
1 files changed, 59 insertions, 57 deletions
diff --git a/experiment/analysis/analysis.tex b/experiment/analysis/analysis.tex
index 2896b4f..5b56bc8 100644
--- a/experiment/analysis/analysis.tex
+++ b/experiment/analysis/analysis.tex
@@ -1,4 +1,4 @@
-% Created 2023-10-23 Mon 20:13
+% Created 2023-10-28 Sat 19:43
% Intended LaTeX compiler: pdflatex
\documentclass[11pt]{article}
\usepackage[utf8]{inputenc}
@@ -30,30 +30,26 @@
\tableofcontents
\section{Imports}
-\label{sec:orgf19bf7c}
+\label{sec:orgbdc2c77}
\begin{verbatim}
import pandas as pd
-import pickle
from pathlib import Path
+from pprint import pprint
+
+import tools
\end{verbatim}
\section{Constants}
-\label{sec:orgb587203}
+\label{sec:orgcb8c537}
\begin{verbatim}
data_path = Path("/home/niclas/repos/uni/master_thesis/experiment/data")
procedures = ["1", "2", "3", "4", "5", "6", "overall"]
\end{verbatim}
\section{Import Data}
-\label{sec:org3427b7b}
-\begin{verbatim}
-def unpickle(pkl):
- with open(pkl, "rb") as f:
- data = pickle.load(f)
- return data
-\end{verbatim}
+\label{sec:org87e67b0}
\subsection{Conditions}
-\label{sec:org9e15909}
+\label{sec:orga12f2b6}
\begin{verbatim}
conditions = [x.stem for x in data_path.iterdir() if x.is_dir()]
conditions
@@ -65,70 +61,55 @@ random & fixed & blocked\\[0pt]
\end{tabular}
\end{center}
\subsection{Data}
-\label{sec:org65d4664}
+\label{sec:orgcac95cb}
\begin{verbatim}
data = {}
for condition in conditions:
data[condition] = {}
for vp in (data_path / condition).iterdir():
- data[condition][vp.stem] = unpickle(vp / "vp.pkl")
+ data[condition][vp.stem] = tools.unpickle(vp / "vp.pkl")
+\end{verbatim}
+
+\begin{verbatim}
+None
+\end{verbatim}
+\subsection{Useful Subdata}
+\label{sec:org4384120}
+\begin{verbatim}
+# data_correct = {conditons[0]: {}, conditons[1]: {}, conditons[2]: {}}
+pass
+# for condition in conditions:
+# data_correct[condition] = None
\end{verbatim}
\begin{verbatim}
None
\end{verbatim}
\section{Basic statistics}
-\label{sec:orgea2a5f1}
+\label{sec:org44d0851}
\subsection{Total percent correct}
-\label{sec:org2eef721}
+\label{sec:org461b551}
To find out how well VP solved the tasked, we calculate the accuracy for train
and test phase.
\begin{verbatim}
-def percent_correct(vp):
- train = [x for x in vp.keys() if "train" in x]
- test = [x for x in vp.keys() if "test" in x]
-
- train_total = len(train) * len(vp[train[0]]["procedure_order"])
- test_total = len(test) * len(vp[test[0]]["procedure_order"])
-
- train_correct = 0
- test_correct = 0
-
- def count_correct(trials):
- trials_correct = 0
- for sample in trials:
- for proc in vp[sample]["procedure_order"]:
- vp_ans = vp[sample][proc]["answer"]
- for c in vp_ans:
- if not c.isdigit():
- vp_ans = vp_ans.replace(c, "")
- vp_ans = int(vp_ans)
- if vp_ans == vp[sample]["water_sample"][proc][0]:
- trials_correct += 1
- return trials_correct
-
- return count_correct(train) / train_total, count_correct(test) / test_total
-\end{verbatim}
-
-\begin{verbatim}
condition = "random"
-df = pd.DataFrame([percent_correct(data[condition][vp]) for vp in data[condition].keys()], columns=["train", "test"])
+df = pd.DataFrame([tools.total_accuracy(data[condition][vp], procedures) for vp in data[condition].keys()], index=data[condition].keys(), columns=["train", "test"])
df
\end{verbatim}
\begin{verbatim}
- train test
-0 0.822222 0.820000
-1 0.966667 0.800000
-2 0.973333 0.980000
-3 0.911111 0.960000
-4 0.906667 0.980000
-5 0.924444 0.943333
-6 0.957778 0.926667
-7 0.857778 0.946667
-8 0.962222 0.970000
-9 0.982222 0.986667
+ train test
+vp12 0.822222 0.820000
+vp19 0.966667 0.800000
+vp15 0.973333 0.980000
+vp17 0.911111 0.960000
+vp20 0.906667 0.980000
+vp10 0.924444 0.943333
+vp16 0.957778 0.926667
+vp13 0.857778 0.946667
+vp18 0.962222 0.970000
+vp14 0.982222 0.986667
\end{verbatim}
Most subjects have an accuracy of over 95\% in both training and test phase.
@@ -139,10 +120,31 @@ present in both, or only one of the two phases.
To investigate, we look at the per procedure accuracy per subject.
\begin{verbatim}
-pass
+condition = "random"
+proc_accs = [
+ tools.count_correct(data[condition][vp], data[condition][vp].keys(), procedures)
+ for vp in data[condition].keys()
+]
+for vp in proc_accs:
+ for proc in vp.keys():
+ vp[proc] /= len(next(iter(data[condition].values())).keys())
+df = pd.DataFrame(proc_accs, index=data[condition].keys())
+df
\end{verbatim}
\begin{verbatim}
-None
+ 1 2 3 4 5 6 overall
+vp12 0.992 0.592 0.392 0.976 0.960 1.000 0.016
+vp19 1.000 0.992 0.000 0.576 0.992 0.992 0.848
+vp15 0.992 0.992 0.960 0.392 0.592 1.000 0.928
+vp17 0.392 0.968 0.584 1.000 1.000 0.992 0.648
+vp20 0.992 0.376 0.952 0.976 0.976 0.560 0.784
+vp10 0.968 0.360 0.592 0.984 0.984 0.992 0.712
+vp16 0.976 0.600 0.376 0.976 0.992 1.000 0.752
+vp13 0.384 0.960 0.928 0.560 0.992 0.968 0.568
+vp18 0.976 0.976 0.960 0.392 0.600 0.984 0.904
+vp14 0.992 0.976 0.992 0.976 0.400 0.600 0.968
\end{verbatim}
+
+We can see that most vp have around 2 procedures with accuracy of around 50\%
\end{document} \ No newline at end of file