# Workbook for Eye Tracking Data

## Import Libraries

In [8]:
# Basics
import numpy as np
import os
import math

# Data processing
import pandas as pd
import awkward as ak

# ML
import sklearn

# Misc
from pathlib import Path
import pyarrow as pa
import urllib.request

## Import data

### Import users and their score

In [9]:
# Imports the user score information
user_data = pd.read_csv(r"wetransfer_wtg-ar-daten_2023-05-31_1333/scores_WtG_PrePost.csv", delimiter=",", usecols=["User", "Pre score", "Post score", "Difference", "Group cat"])

In [10]:
# Defines my directory with the user data
user_dir = 'wetransfer_wtg-ar-daten_2023-05-31_1333/WtG_AR_data_cleaned/with ET'

In [11]:
# Filters and drops non-relevant users
to_drop = []
for i, cat in enumerate(user_data["Group cat"]):
    if math.isnan(cat):
        to_drop.append(i)
user_data = user_data.drop(to_drop)
user_data = user_data.reset_index()

In [12]:
# Filters and drops users with no directory
not_existing_names = []
for i, user in enumerate(user_data["User"]):
    if not os.path.isdir(user_dir + '/' + user):
        not_existing_names.append(i)
user_data = user_data.drop(not_existing_names)
user_data = user_data.reset_index()

In [14]:
# Convert to awkward array
array_user = ak.zip(dict(user_data))
array_user

### Import Eye Tracking data

In [27]:
# Creates dictionary with all the files for one user
file_names = {}
for user in user_data["User"]:
    #print(user)
    available_files = os.listdir(user_dir + '/' + user)
    #print(available_files)
    file_names[user] = available_files
#file_names

In [35]:
# Read each CSV file for one user, stored for each attempt
df_attempt1 = []
df_attempt2 = []
for user in user_data['User']:
    files = file_names[user]
    if len(files) == 2:
        df_attempt1.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"]))
        df_attempt2.append(pd.read_csv(user_dir + '/' + user + '/' + files[1], delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"]))
    elif len(files) == 1:
        df_attempt1.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"]))
        df_attempt2.append(pd.read_csv(user_dir + '/' + user + '/' + files[0], delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"]))
print(len(df_attempt1))
print(len(df_attempt2))
#df_attempt1

36
36


In [30]:
# Read each CSV file in user_dir
#files = []
#dfs = []
#for file in Path(user_dir).glob("**/*.csv"):
#    files.append(file)
#    dfs.append(pd.read_csv(file, delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"]))
#dfs

In [37]:
# Convert df_attempts to ak.Array
array_attempt1 = []
array_attempt2 = []
for df in df_attempt1:
    array_attempt1.append(ak.Array(dict(df)))
for df in df_attempt2:
    array_attempt2.append(ak.Array(dict(df)))
array_attempt1

[<Array [{eyeDataTimestamp: ..., ...}, ...] type='2429 * {eyeDataTimestamp: ...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='445 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='216 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='153 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='364 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='810 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='200 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='316 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='463 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='157 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='745 * {eyeDataTimestamp: i...'>,
 <Array [{eyeDataTimestamp: ..., ...}, ...] type='246 * {eyeDataTimestamp: i...'>,
 <Ar

### Test with single .csv

In [108]:
df1 = pd.read_csv(user_dir + r"/1A/2020_11_03-01_58_35-graph01-ET_planning-1A-Graph_Hololens.csv", delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"])
df2 = pd.read_csv(user_dir + r"/1A/2020_11_03-01_59_11-graph01-ET_planning-1A-Graph_Hololens.csv", delimiter="	", usecols=["eyeDataTimestamp", "gazePointAOI_target_x", "gazePointAOI_target_y"])

In [109]:
df1

Unnamed: 0,eyeDataTimestamp,gazePointAOI_target_x,gazePointAOI_target_y
0,1604397462436,,
1,1604397462469,0.13731,-0.23269
2,1604397462502,0.18260,-0.20021
3,1604397462536,,
4,1604397462569,,
...,...,...,...
1468,1604397511370,0.04413,-0.20095
1469,1604397511403,0.04517,-0.20161
1470,1604397511436,0.04554,-0.20434
1471,1604397511470,0.04488,-0.20405


In [250]:
# Convert to awkward array
array_column1 = ak.Array(dict(df1))
array_column2 = ak.Array(dict(df2))
array_column1

In [31]:
#array_column2

## Data processing

### Add Eye Tracking Data to user data

In [38]:
def add_column_old_broken(ak_array1, ak_array2, col_name):
    entries = []
    for entry, dataframe in zip(ak_array1, ak_array2):
        entry_with_column = {**entry, col_name: dataframe}
        print(entry_with_column)
        entries.append(entry_with_column)
    print(entries)
    return ak.Array(entries)

In [39]:
def add_column_old(ak_array1, arrays, col_name):
    return ak.zip({**{k: ak_array1[k] for k in ak_array1.fields}, col_name: arrays})

In [46]:
# Adds a list of arrays in a new column to an array
def add_column(ak_array, arrays, col_name):
    combined_entries = [
        {**{k: ak_array[k][i] for k in ak_array.fields}, col_name: array} for i, (entry, array) in enumerate(zip(ak_array, arrays))
    ]
    return ak.Array(combined_entries)

In [41]:
#[{k: array_user[i][k] for k in array_user.fields} for i in range(36)]

In [47]:
# Creates array with first and second attempts added
array_data = add_column(array_user, array_attempt1, 'Attempt1')
array_data = add_column(array_data, array_attempt2, 'Attempt2')
array_data

In [45]:
array_data['Attempt1'][7]

In [105]:
ak.all(array_data['Attempt1']['eyeDataTimestamp'][0] == array_data['Attempt1']['eyeDataTimestamp'][1])

True