Skip to content
Snippets Groups Projects
workbook_su_ol.ipynb 538 KiB
Newer Older
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Workbook for Eye Tracking Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Basics\n",
    "import numpy as np\n",
    "import os\n",
    "import math\n",
    "\n",
    "# Data processing\n",
    "import pandas as pd\n",
    "import awkward as ak\n",
    "\n",
    "# ML\n",
    "import sklearn\n",
    "\n",
    "# Misc\n",
    "from pathlib import Path\n",
    "import pyarrow as pa\n",
    "import urllib.request"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Import data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import users and their score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Imports the user score information\n",
    "user_data = pd.read_csv(r\"data\\scores_WtG_PrePost.csv\", delimiter=\",\", usecols=[\"User\", \"Pre score\", \"Post score\", \"Difference\", \"Group cat\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Defines my directory with the user data\n",
    "user_dir = r'data\\with ET'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Filters and drops non-relevant users\n",
    "to_drop = []\n",
    "for i, cat in enumerate(user_data[\"Group cat\"]):\n",
    "    if math.isnan(cat):\n",
    "        to_drop.append(i)\n",
    "user_data = user_data.drop(to_drop)\n",
Rhino's avatar
Rhino committed
    "user_data = user_data.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Filters and drops users with no directory\n",
    "not_existing_names = []\n",
    "for i, user in enumerate(user_data[\"User\"]):\n",
    "    if not os.path.isdir(user_dir + '/' + user):\n",
    "        not_existing_names.append(i)\n",
    "user_data = user_data.drop(not_existing_names)\n",
Rhino's avatar
Rhino committed
    "user_data = user_data.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "tags": []
   },
Rhino's avatar
Rhino committed
   "outputs": [],
   "source": [
    "# Convert to awkward array\n",
Rhino's avatar
Rhino committed
    "array_user = ak.zip(dict(user_data))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Import Eye Tracking data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Creates dictionary with all the files for one user\n",
    "file_names = {}\n",
    "for user in user_data[\"User\"]:\n",
    "    #print(user)\n",
Rhino's avatar
Rhino committed
    "    available_files = []\n",
    "    available_files_temp = os.listdir(user_dir + '/' + user)\n",
    "    for file in available_files_temp:\n",
    "        if \"graph01-ET_planning\" in file:\n",
    "            available_files.append(file)\n",
    "    # print(available_files)\n",
    "    file_names[user] = available_files\n",
    "#file_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "tags": []
   },
Rhino's avatar
Rhino committed
   "outputs": [],
   "source": [
    "# Read each CSV file for one user, stored for each attempt\n",
    "df_attempt1 = []\n",
    "df_attempt2 = []\n",
Rhino's avatar
Rhino committed
    "attempt2_mask = []\n",
    "for user in user_data['User']:\n",
    "    files = file_names[user]\n",
    "    if len(files) == 2:\n",
Rhino's avatar
Rhino committed
    "        attempt2_mask.append(True)\n",
    "        df_attempt1.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter=\"\t\", usecols=[\"eyeDataTimestamp\", \"gazePointAOI_target_x\", \"gazePointAOI_target_y\"]))\n",
    "        df_attempt2.append(pd.read_csv(user_dir + '/' + user + '/' + files[1], delimiter=\"\t\", usecols=[\"eyeDataTimestamp\", \"gazePointAOI_target_x\", \"gazePointAOI_target_y\"]))\n",
    "    elif len(files) == 1:\n",
Rhino's avatar
Rhino committed
    "        attempt2_mask.append(False)\n",
    "        df_attempt1.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter=\"\t\", usecols=[\"eyeDataTimestamp\", \"gazePointAOI_target_x\", \"gazePointAOI_target_y\"]))\n",
Rhino's avatar
Rhino committed
    "        df_attempt2.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter=\"\t\", usecols=[\"eyeDataTimestamp\", \"gazePointAOI_target_x\", \"gazePointAOI_target_y\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Add delta t list\n",
    "for attempt in [df_attempt1, df_attempt2]:\n",
    "    for i in range(len(attempt)):\n",
    "        temp_delta_t_list = []\n",
    "        for j in range(len(attempt[i][\"eyeDataTimestamp\"]) - 1):\n",
    "            temp_delta_t_list.append(attempt[i][\"eyeDataTimestamp\"][j+1] - attempt[i][\"eyeDataTimestamp\"][j])\n",
    "        temp_delta_t_list.append(np.mean(temp_delta_t_list))\n",
    "        attempt[i][\"deltaTimestamp\"] = temp_delta_t_list\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Convert df_attempts to ak.Array\n",
    "array_attempt1 = []\n",
    "array_attempt2 = []\n",
    "for df in df_attempt1:\n",
    "    array_attempt1.append(ak.Array(dict(df)))\n",
    "for df in df_attempt2:\n",
Rhino's avatar
Rhino committed
    "    array_attempt2.append(ak.Array(dict(df)))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data processing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### Add Eye Tracking Data to user data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def add_column_old_broken(ak_array1, ak_array2, col_name):\n",
    "    entries = []\n",
    "    for entry, dataframe in zip(ak_array1, ak_array2):\n",
    "        entry_with_column = {**entry, col_name: dataframe}\n",
    "        print(entry_with_column)\n",
    "        entries.append(entry_with_column)\n",
    "    print(entries)\n",
    "    return ak.Array(entries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def add_column_old(ak_array1, arrays, col_name):\n",
    "    return ak.zip({**{k: ak_array1[k] for k in ak_array1.fields}, col_name: arrays})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Adds a list of arrays in a new column to an array\n",
    "def add_column(ak_array, arrays, col_name):\n",
    "    combined_entries = [\n",
    "        {**{k: ak_array[k][i] for k in ak_array.fields}, col_name: array} for i, (entry, array) in enumerate(zip(ak_array, arrays))\n",
    "    ]\n",
    "    return ak.Array(combined_entries)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
Rhino's avatar
Rhino committed
   "metadata": {},
   "outputs": [],
   "source": [
Rhino's avatar
Rhino committed
    "labels_str = []\n",
    "labels_int_expert = []\n",
    "labels_int_good = []\n",
    "labels_int_bad = []\n",
    "\n",
Rhino's avatar
Rhino committed
    "for subject_name, pre_score, diff in zip(array_user[\"User\"], array_user[\"Pre score\"], array_user[\"Difference\"]):\n",
    "    if pre_score == 2 and diff == 0:\n",
    "        label_str = \"expert\"\n",
    "        label_int_expert = 1\n",
    "        label_int_good = 0  \n",
    "        label_int_bad = 0\n",
Rhino's avatar
Rhino committed
    "    elif diff <= 0:\n",
    "        label_str = \"bad\"\n",
    "        label_int_expert = 0\n",
    "        label_int_good = 0  \n",
    "        label_int_bad = 1\n",
Rhino's avatar
Rhino committed
    "    else:\n",
    "        label_str = \"good\"\n",
    "        label_int_expert = 0\n",
    "        label_int_good = 1\n",
    "        label_int_bad = 0\n",
Rhino's avatar
Rhino committed
    "    labels_str.append(label_str)\n",
    "    labels_int_expert.append(label_int_expert)   \n",
    "    labels_int_good.append(label_int_good)    \n",
    "    labels_int_bad.append(label_int_bad)    \n",
    " \n",
Rhino's avatar
Rhino committed
    "labels_str = ak.Array(labels_str)\n",
    "labels_int_expert = ak.Array(labels_int_expert)\n",
    "labels_int_good = ak.Array(labels_int_good)\n",
    "labels_int_bad = ak.Array(labels_int_bad)\n",
    "\n",
Rhino's avatar
Rhino committed
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "tags": []
   },
Rhino's avatar
Rhino committed
   "outputs": [],
   "source": [
Rhino's avatar
Rhino committed
    "# Creates array with first and second attempts added as well as the labels arrays\n",
    "array_data = add_column(array_user, array_attempt1, 'Attempt1')\n",
    "array_data = add_column(array_data, array_attempt2, 'Attempt2')\n",
Rhino's avatar
Rhino committed
    "array_data['Labels Str'] = labels_str\n",
    "array_data['Labels Expert'] = labels_int_expert\n",
    "array_data['Labels Good'] = labels_int_good\n",
    "array_data['Labels Bad'] = labels_int_bad\n",
    "array_data[\"Attempt 2 Mask\"] = ak.Array(attempt2_mask)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "def minmax(data):\n",
    "    \"\"\"Get the min and max of an iterable in O(n) time and constant space.\"\"\"\n",
    "    minValue = data[0]\n",
    "    maxValue = data[0]\n",
    "    for d in data[1:]:\n",
    "        minValue = d if d < minValue else minValue\n",
    "        maxValue = d if d > maxValue else maxValue\n",
    "    return (minValue,maxValue)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.4961899999999999 0.4198 -0.49335 0.24327\n"
     ]
    }
   ],
   "source": [
    "# Get Range of field of view\n",
    "min_max_x = []\n",
    "min_max_y = []\n",
    "for i, user in enumerate(array_data[\"User\"]):\n",
    "    min_x, max_x = minmax(array_data[\"Attempt1\"][i][\"gazePointAOI_target_x\"])\n",
    "    min_y, max_y = minmax(array_data[\"Attempt1\"][i][\"gazePointAOI_target_y\"])\n",
    "    min_max_x.extend([min_x, max_x])\n",
    "    min_max_y.extend([min_y, max_y])\n",
    "\n",
    "    if array_data[\"Attempt 2 Mask\"][i]:\n",
    "        min_x, max_x = minmax(array_data[\"Attempt2\"][i][\"gazePointAOI_target_x\"])\n",
    "        min_y, max_y = minmax(array_data[\"Attempt2\"][i][\"gazePointAOI_target_y\"])\n",
    "        min_max_x.extend([min_x, max_x])\n",
    "        min_max_y.extend([min_y, max_y])\n",
    "min_x, max_x = minmax(min_max_x)\n",
    "min_y, max_y = minmax(min_max_y)\n",
    "print(min_x, max_x, min_y, max_y)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "image/png": "
Loading
Loading full blame...