gaitsetpy.dataset.daphnet

Daphnet Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader.

  1'''
  2Daphnet Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the Daphnet dataset loader class that inherits from BaseDatasetLoader.
  6'''
  7
  8import os
  9import pandas as pd
 10import numpy as np
 11from typing import List, Dict, Tuple
 12from glob import glob
 13from ..core.base_classes import BaseDatasetLoader
 14from .utils import download_dataset, extract_dataset, sliding_window
 15
 16
 17class DaphnetLoader(BaseDatasetLoader):
 18    """
 19    Daphnet dataset loader class.
 20    
 21    This class handles loading and processing of the Daphnet dataset for gait analysis.
 22    """
 23    
 24    def __init__(self):
 25        super().__init__(
 26            name="daphnet",
 27            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease"
 28        )
 29        self.metadata = {
 30            'sensors': ['shank', 'thigh', 'trunk'],
 31            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 32            'sampling_frequency': 64,
 33            'annotations': {
 34                0: 'not_valid',
 35                1: 'no_freeze',
 36                2: 'freeze'
 37            }
 38        }
 39    
 40    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 41        """
 42        Load Daphnet dataset from the specified directory.
 43        
 44        Args:
 45            data_dir: Directory to store/find the dataset
 46            **kwargs: Additional arguments (unused for Daphnet)
 47            
 48        Returns:
 49            Tuple of (data_list, names_list)
 50        """
 51        # Download and extract if needed
 52        download_dataset("daphnet", data_dir)
 53        extract_dataset("daphnet", data_dir)
 54        
 55        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 56        daphnet_data = []
 57        daphnet_names = []
 58        
 59        # Load all subject files
 60        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 61            # Extract filename from path
 62            filename = os.path.basename(file)
 63            daphnet_names.append(filename)
 64            
 65            # Load CSV with proper column names
 66            column_names = [
 67                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 68                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 69                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 70            ]
 71            
 72            df = pd.read_csv(file, sep=" ", names=column_names)
 73            
 74            # Set time as index
 75            df = df.set_index("time")
 76            
 77            # Calculate magnitude for each sensor
 78            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 79            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 80            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 81            
 82            # Reorder columns for consistency
 83            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 84                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 85                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 86            
 87            daphnet_data.append(df)
 88        
 89        # Store loaded data
 90        self.data = daphnet_data
 91        self.names = daphnet_names
 92        
 93        return daphnet_data, daphnet_names
 94    
 95    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 96                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 97        """
 98        Create sliding windows from the Daphnet dataset.
 99        
100        Args:
101            data: List of DataFrames containing Daphnet data
102            names: List of names corresponding to the data
103            window_size: Size of the sliding window (default: 192)
104            step_size: Step size for the sliding window (default: 32)
105            
106        Returns:
107            List of dictionaries containing sliding windows for each DataFrame
108        """
109        windows_data = []
110        
111        for idx, df in enumerate(data):
112            # Filter out invalid data (annotations == 0)
113            df_filtered = df[df.annotations > 0]
114            
115            if df_filtered.empty:
116                continue
117                
118            windows = []
119            processed_columns = set()
120            
121            # Process each sensor column
122            for col in df_filtered.columns:
123                if col != "annotations" and col not in processed_columns:
124                    window_data = sliding_window(df_filtered[col], window_size, step_size)
125                    windows.append({"name": col, "data": window_data})
126                    processed_columns.add(col)
127            
128            # Include annotations separately
129            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
130            windows.append({"name": "annotations", "data": annotations_window})
131            
132            windows_data.append({"name": names[idx], "windows": windows})
133        
134        return windows_data
135    
136    def get_supported_formats(self) -> List[str]:
137        """
138        Get list of supported file formats for Daphnet dataset.
139        
140        Returns:
141            List of supported file extensions
142        """
143        return ['.txt']
144    
145    def get_sensor_info(self) -> Dict[str, List[str]]:
146        """
147        Get information about sensors in the dataset.
148        
149        Returns:
150            Dictionary containing sensor information
151        """
152        return {
153            'sensors': self.metadata['sensors'],
154            'components': self.metadata['components'],
155            'sampling_frequency': self.metadata['sampling_frequency']
156        }
157    
158    def get_annotation_info(self) -> Dict[int, str]:
159        """
160        Get information about annotations in the dataset.
161        
162        Returns:
163            Dictionary mapping annotation values to descriptions
164        """
165        return self.metadata['annotations']
166
167
168# Legacy function wrappers for backward compatibility
169def load_daphnet_data(data_dir: str):
170    """
171    Legacy function for loading Daphnet data.
172    
173    Args:
174        data_dir: Directory to store the dataset
175        
176    Returns:
177        Tuple of (data_list, names_list)
178    """
179    loader = DaphnetLoader()
180    return loader.load_data(data_dir)
181
182
183def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
184    """
185    Legacy function for creating sliding windows.
186    
187    Args:
188        daphnet: List of dataframes containing Daphnet data
189        daphnet_names: List of names of the Daphnet dataframes
190        window_size: Size of the sliding window
191        step_size: Step size for the sliding window
192        
193    Returns:
194        List of dictionaries containing sliding windows for each DataFrame
195    """
196    loader = DaphnetLoader()
197    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)
198
199
200def plot_dataset_sample():
201    """Placeholder for dataset sample plotting."""
202    pass
203
204
205def plot_sliding_window():
206    """Placeholder for sliding window plotting."""
207    pass
class DaphnetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 18class DaphnetLoader(BaseDatasetLoader):
 19    """
 20    Daphnet dataset loader class.
 21    
 22    This class handles loading and processing of the Daphnet dataset for gait analysis.
 23    """
 24    
 25    def __init__(self):
 26        super().__init__(
 27            name="daphnet",
 28            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease"
 29        )
 30        self.metadata = {
 31            'sensors': ['shank', 'thigh', 'trunk'],
 32            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 33            'sampling_frequency': 64,
 34            'annotations': {
 35                0: 'not_valid',
 36                1: 'no_freeze',
 37                2: 'freeze'
 38            }
 39        }
 40    
 41    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 42        """
 43        Load Daphnet dataset from the specified directory.
 44        
 45        Args:
 46            data_dir: Directory to store/find the dataset
 47            **kwargs: Additional arguments (unused for Daphnet)
 48            
 49        Returns:
 50            Tuple of (data_list, names_list)
 51        """
 52        # Download and extract if needed
 53        download_dataset("daphnet", data_dir)
 54        extract_dataset("daphnet", data_dir)
 55        
 56        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 57        daphnet_data = []
 58        daphnet_names = []
 59        
 60        # Load all subject files
 61        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 62            # Extract filename from path
 63            filename = os.path.basename(file)
 64            daphnet_names.append(filename)
 65            
 66            # Load CSV with proper column names
 67            column_names = [
 68                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 69                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 70                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 71            ]
 72            
 73            df = pd.read_csv(file, sep=" ", names=column_names)
 74            
 75            # Set time as index
 76            df = df.set_index("time")
 77            
 78            # Calculate magnitude for each sensor
 79            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 80            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 81            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 82            
 83            # Reorder columns for consistency
 84            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 85                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 86                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 87            
 88            daphnet_data.append(df)
 89        
 90        # Store loaded data
 91        self.data = daphnet_data
 92        self.names = daphnet_names
 93        
 94        return daphnet_data, daphnet_names
 95    
 96    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 97                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 98        """
 99        Create sliding windows from the Daphnet dataset.
100        
101        Args:
102            data: List of DataFrames containing Daphnet data
103            names: List of names corresponding to the data
104            window_size: Size of the sliding window (default: 192)
105            step_size: Step size for the sliding window (default: 32)
106            
107        Returns:
108            List of dictionaries containing sliding windows for each DataFrame
109        """
110        windows_data = []
111        
112        for idx, df in enumerate(data):
113            # Filter out invalid data (annotations == 0)
114            df_filtered = df[df.annotations > 0]
115            
116            if df_filtered.empty:
117                continue
118                
119            windows = []
120            processed_columns = set()
121            
122            # Process each sensor column
123            for col in df_filtered.columns:
124                if col != "annotations" and col not in processed_columns:
125                    window_data = sliding_window(df_filtered[col], window_size, step_size)
126                    windows.append({"name": col, "data": window_data})
127                    processed_columns.add(col)
128            
129            # Include annotations separately
130            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
131            windows.append({"name": "annotations", "data": annotations_window})
132            
133            windows_data.append({"name": names[idx], "windows": windows})
134        
135        return windows_data
136    
137    def get_supported_formats(self) -> List[str]:
138        """
139        Get list of supported file formats for Daphnet dataset.
140        
141        Returns:
142            List of supported file extensions
143        """
144        return ['.txt']
145    
146    def get_sensor_info(self) -> Dict[str, List[str]]:
147        """
148        Get information about sensors in the dataset.
149        
150        Returns:
151            Dictionary containing sensor information
152        """
153        return {
154            'sensors': self.metadata['sensors'],
155            'components': self.metadata['components'],
156            'sampling_frequency': self.metadata['sampling_frequency']
157        }
158    
159    def get_annotation_info(self) -> Dict[int, str]:
160        """
161        Get information about annotations in the dataset.
162        
163        Returns:
164            Dictionary mapping annotation values to descriptions
165        """
166        return self.metadata['annotations']

Daphnet dataset loader class.

This class handles loading and processing of the Daphnet dataset for gait analysis.

DaphnetLoader()
25    def __init__(self):
26        super().__init__(
27            name="daphnet",
28            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease"
29        )
30        self.metadata = {
31            'sensors': ['shank', 'thigh', 'trunk'],
32            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
33            'sampling_frequency': 64,
34            'annotations': {
35                0: 'not_valid',
36                1: 'no_freeze',
37                2: 'freeze'
38            }
39        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
41    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
42        """
43        Load Daphnet dataset from the specified directory.
44        
45        Args:
46            data_dir: Directory to store/find the dataset
47            **kwargs: Additional arguments (unused for Daphnet)
48            
49        Returns:
50            Tuple of (data_list, names_list)
51        """
52        # Download and extract if needed
53        download_dataset("daphnet", data_dir)
54        extract_dataset("daphnet", data_dir)
55        
56        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
57        daphnet_data = []
58        daphnet_names = []
59        
60        # Load all subject files
61        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
62            # Extract filename from path
63            filename = os.path.basename(file)
64            daphnet_names.append(filename)
65            
66            # Load CSV with proper column names
67            column_names = [
68                "time", "shank_h_fd", "shank_v", "shank_h_l", 
69                "thigh_h_fd", "thigh_v", "thigh_h_l", 
70                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
71            ]
72            
73            df = pd.read_csv(file, sep=" ", names=column_names)
74            
75            # Set time as index
76            df = df.set_index("time")
77            
78            # Calculate magnitude for each sensor
79            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
80            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
81            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
82            
83            # Reorder columns for consistency
84            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
85                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
86                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
87            
88            daphnet_data.append(df)
89        
90        # Store loaded data
91        self.data = daphnet_data
92        self.names = daphnet_names
93        
94        return daphnet_data, daphnet_names

Load Daphnet dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
 96    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 97                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 98        """
 99        Create sliding windows from the Daphnet dataset.
100        
101        Args:
102            data: List of DataFrames containing Daphnet data
103            names: List of names corresponding to the data
104            window_size: Size of the sliding window (default: 192)
105            step_size: Step size for the sliding window (default: 32)
106            
107        Returns:
108            List of dictionaries containing sliding windows for each DataFrame
109        """
110        windows_data = []
111        
112        for idx, df in enumerate(data):
113            # Filter out invalid data (annotations == 0)
114            df_filtered = df[df.annotations > 0]
115            
116            if df_filtered.empty:
117                continue
118                
119            windows = []
120            processed_columns = set()
121            
122            # Process each sensor column
123            for col in df_filtered.columns:
124                if col != "annotations" and col not in processed_columns:
125                    window_data = sliding_window(df_filtered[col], window_size, step_size)
126                    windows.append({"name": col, "data": window_data})
127                    processed_columns.add(col)
128            
129            # Include annotations separately
130            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
131            windows.append({"name": "annotations", "data": annotations_window})
132            
133            windows_data.append({"name": names[idx], "windows": windows})
134        
135        return windows_data

Create sliding windows from the Daphnet dataset.

Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
137    def get_supported_formats(self) -> List[str]:
138        """
139        Get list of supported file formats for Daphnet dataset.
140        
141        Returns:
142            List of supported file extensions
143        """
144        return ['.txt']

Get list of supported file formats for Daphnet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
146    def get_sensor_info(self) -> Dict[str, List[str]]:
147        """
148        Get information about sensors in the dataset.
149        
150        Returns:
151            Dictionary containing sensor information
152        """
153        return {
154            'sensors': self.metadata['sensors'],
155            'components': self.metadata['components'],
156            'sampling_frequency': self.metadata['sampling_frequency']
157        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_annotation_info(self) -> Dict[int, str]:
159    def get_annotation_info(self) -> Dict[int, str]:
160        """
161        Get information about annotations in the dataset.
162        
163        Returns:
164            Dictionary mapping annotation values to descriptions
165        """
166        return self.metadata['annotations']

Get information about annotations in the dataset.

Returns: Dictionary mapping annotation values to descriptions

def load_daphnet_data(data_dir: str):
170def load_daphnet_data(data_dir: str):
171    """
172    Legacy function for loading Daphnet data.
173    
174    Args:
175        data_dir: Directory to store the dataset
176        
177    Returns:
178        Tuple of (data_list, names_list)
179    """
180    loader = DaphnetLoader()
181    return loader.load_data(data_dir)

Legacy function for loading Daphnet data.

Args: data_dir: Directory to store the dataset

Returns: Tuple of (data_list, names_list)

def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
184def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
185    """
186    Legacy function for creating sliding windows.
187    
188    Args:
189        daphnet: List of dataframes containing Daphnet data
190        daphnet_names: List of names of the Daphnet dataframes
191        window_size: Size of the sliding window
192        step_size: Step size for the sliding window
193        
194    Returns:
195        List of dictionaries containing sliding windows for each DataFrame
196    """
197    loader = DaphnetLoader()
198    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)

Legacy function for creating sliding windows.

Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def plot_dataset_sample():
201def plot_dataset_sample():
202    """Placeholder for dataset sample plotting."""
203    pass

Placeholder for dataset sample plotting.

def plot_sliding_window():
206def plot_sliding_window():
207    """Placeholder for sliding window plotting."""
208    pass

Placeholder for sliding window plotting.