gaitsetpy.dataset

dataset: Handles loading and processing of supported datasets.

This module provides both the new class-based dataset loaders and legacy function-based API. All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.

Supported datasets:

  • Daphnet: Freezing of Gait dataset
  • MobiFall: Fall detection dataset
  • Arduous: Daily activity recognition dataset
  • PhysioNet: VGRF dataset for Parkinson's disease gait analysis
  • HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
  • UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
  1"""
  2dataset: Handles loading and processing of supported datasets.
  3
  4This module provides both the new class-based dataset loaders and legacy function-based API.
  5All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.
  6
  7Supported datasets:
  8- Daphnet: Freezing of Gait dataset
  9- MobiFall: Fall detection dataset
 10- Arduous: Daily activity recognition dataset
 11- PhysioNet: VGRF dataset for Parkinson's disease gait analysis
 12- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
 13- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
 14
 15"""
 16
 17# Import the new class-based loaders
 18from .daphnet import DaphnetLoader
 19from .mobifall import MobiFallLoader
 20from .arduous import ArduousLoader
 21from .physionet import PhysioNetLoader
 22from .harup import HARUPLoader
 23from .urfall import UrFallLoader
 24
 25# Import legacy functions for backward compatibility
 26from .daphnet import load_daphnet_data, create_sliding_windows
 27from .mobifall import load_mobifall_data
 28from .arduous import load_arduous_data
 29from .physionet import load_physionet_data, create_physionet_windows
 30from .harup import load_harup_data, create_harup_windows, extract_harup_features
 31from .urfall import load_urfall_data, create_urfall_windows
 32from .utils import download_dataset, extract_dataset, sliding_window
 33
 34# Import managers
 35from ..core.managers import DatasetManager
 36
 37# Register all dataset loaders with the manager
 38def _register_datasets():
 39    """Register all available dataset loaders with the DatasetManager."""
 40    manager = DatasetManager()
 41    manager.register_dataset("daphnet", DaphnetLoader)
 42    manager.register_dataset("mobifall", MobiFallLoader)
 43    manager.register_dataset("arduous", ArduousLoader)
 44    manager.register_dataset("physionet", PhysioNetLoader)
 45    manager.register_dataset("harup", HARUPLoader)
 46    manager.register_dataset("urfall", UrFallLoader)
 47
 48# Auto-register datasets when module is imported
 49_register_datasets()
 50
 51# Convenient access to the dataset manager
 52def get_dataset_manager():
 53    """Get the singleton DatasetManager instance."""
 54    return DatasetManager()
 55
 56# Helper function to get available datasets
 57def get_available_datasets():
 58    """Get list of available dataset names."""
 59    return DatasetManager().get_available_components()
 60
 61# Helper function to load dataset using manager
 62def load_dataset(name: str, data_dir: str, **kwargs):
 63    """
 64    Load a dataset using the DatasetManager.
 65    
 66    Args:
 67        name: Name of the dataset loader
 68        data_dir: Directory containing the dataset
 69        **kwargs: Additional arguments for the loader
 70        
 71    Returns:
 72        Dataset loader instance with loaded data
 73    """
 74    return DatasetManager().load_dataset(name, data_dir, **kwargs)
 75
 76__all__ = [
 77    # New class-based loaders
 78    'DaphnetLoader',
 79    'MobiFallLoader', 
 80    'ArduousLoader',
 81    'PhysioNetLoader',
 82    'HARUPLoader',
 83    'UrFallLoader',
 84    # Legacy functions for backward compatibility
 85    'load_daphnet_data',
 86    'create_sliding_windows',
 87    'load_mobifall_data',
 88    'load_arduous_data',
 89    'load_physionet_data',
 90    'create_physionet_windows',
 91    'load_harup_data',
 92    'create_harup_windows',
 93    'extract_harup_features',
 94    'load_urfall_data',
 95    'create_urfall_windows',
 96    'download_dataset',
 97    'extract_dataset',
 98    'sliding_window',
 99    # Manager functions
100    'get_dataset_manager',
101    'get_available_datasets',
102    'load_dataset'
103]
class DaphnetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 18class DaphnetLoader(BaseDatasetLoader):
 19    """
 20    Daphnet dataset loader class.
 21    
 22    This class handles loading and processing of the Daphnet dataset for gait analysis.
 23    """
 24    
 25    def __init__(self):
 26        super().__init__(
 27            name="daphnet",
 28            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease"
 29        )
 30        self.metadata = {
 31            'sensors': ['shank', 'thigh', 'trunk'],
 32            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
 33            'sampling_frequency': 64,
 34            'annotations': {
 35                0: 'not_valid',
 36                1: 'no_freeze',
 37                2: 'freeze'
 38            }
 39        }
 40    
 41    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 42        """
 43        Load Daphnet dataset from the specified directory.
 44        
 45        Args:
 46            data_dir: Directory to store/find the dataset
 47            **kwargs: Additional arguments (unused for Daphnet)
 48            
 49        Returns:
 50            Tuple of (data_list, names_list)
 51        """
 52        # Download and extract if needed
 53        download_dataset("daphnet", data_dir)
 54        extract_dataset("daphnet", data_dir)
 55        
 56        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
 57        daphnet_data = []
 58        daphnet_names = []
 59        
 60        # Load all subject files
 61        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
 62            # Extract filename from path
 63            filename = os.path.basename(file)
 64            daphnet_names.append(filename)
 65            
 66            # Load CSV with proper column names
 67            column_names = [
 68                "time", "shank_h_fd", "shank_v", "shank_h_l", 
 69                "thigh_h_fd", "thigh_v", "thigh_h_l", 
 70                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
 71            ]
 72            
 73            df = pd.read_csv(file, sep=" ", names=column_names)
 74            
 75            # Set time as index
 76            df = df.set_index("time")
 77            
 78            # Calculate magnitude for each sensor
 79            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
 80            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
 81            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
 82            
 83            # Reorder columns for consistency
 84            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
 85                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
 86                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
 87            
 88            daphnet_data.append(df)
 89        
 90        # Store loaded data
 91        self.data = daphnet_data
 92        self.names = daphnet_names
 93        
 94        return daphnet_data, daphnet_names
 95    
 96    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 97                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 98        """
 99        Create sliding windows from the Daphnet dataset.
100        
101        Args:
102            data: List of DataFrames containing Daphnet data
103            names: List of names corresponding to the data
104            window_size: Size of the sliding window (default: 192)
105            step_size: Step size for the sliding window (default: 32)
106            
107        Returns:
108            List of dictionaries containing sliding windows for each DataFrame
109        """
110        windows_data = []
111        
112        for idx, df in enumerate(data):
113            # Filter out invalid data (annotations == 0)
114            df_filtered = df[df.annotations > 0]
115            
116            if df_filtered.empty:
117                continue
118                
119            windows = []
120            processed_columns = set()
121            
122            # Process each sensor column
123            for col in df_filtered.columns:
124                if col != "annotations" and col not in processed_columns:
125                    window_data = sliding_window(df_filtered[col], window_size, step_size)
126                    windows.append({"name": col, "data": window_data})
127                    processed_columns.add(col)
128            
129            # Include annotations separately
130            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
131            windows.append({"name": "annotations", "data": annotations_window})
132            
133            windows_data.append({"name": names[idx], "windows": windows})
134        
135        return windows_data
136    
137    def get_supported_formats(self) -> List[str]:
138        """
139        Get list of supported file formats for Daphnet dataset.
140        
141        Returns:
142            List of supported file extensions
143        """
144        return ['.txt']
145    
146    def get_sensor_info(self) -> Dict[str, List[str]]:
147        """
148        Get information about sensors in the dataset.
149        
150        Returns:
151            Dictionary containing sensor information
152        """
153        return {
154            'sensors': self.metadata['sensors'],
155            'components': self.metadata['components'],
156            'sampling_frequency': self.metadata['sampling_frequency']
157        }
158    
159    def get_annotation_info(self) -> Dict[int, str]:
160        """
161        Get information about annotations in the dataset.
162        
163        Returns:
164            Dictionary mapping annotation values to descriptions
165        """
166        return self.metadata['annotations']

Daphnet dataset loader class.

This class handles loading and processing of the Daphnet dataset for gait analysis.

DaphnetLoader()
25    def __init__(self):
26        super().__init__(
27            name="daphnet",
28            description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease"
29        )
30        self.metadata = {
31            'sensors': ['shank', 'thigh', 'trunk'],
32            'components': ['h_fd', 'v', 'h_l'],  # horizontal forward, vertical, horizontal lateral
33            'sampling_frequency': 64,
34            'annotations': {
35                0: 'not_valid',
36                1: 'no_freeze',
37                2: 'freeze'
38            }
39        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
41    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
42        """
43        Load Daphnet dataset from the specified directory.
44        
45        Args:
46            data_dir: Directory to store/find the dataset
47            **kwargs: Additional arguments (unused for Daphnet)
48            
49        Returns:
50            Tuple of (data_list, names_list)
51        """
52        # Download and extract if needed
53        download_dataset("daphnet", data_dir)
54        extract_dataset("daphnet", data_dir)
55        
56        file_path = os.path.join(data_dir, "dataset_fog_release/dataset")
57        daphnet_data = []
58        daphnet_names = []
59        
60        # Load all subject files
61        for file in sorted(glob(os.path.join(file_path, "S*.txt"))):
62            # Extract filename from path
63            filename = os.path.basename(file)
64            daphnet_names.append(filename)
65            
66            # Load CSV with proper column names
67            column_names = [
68                "time", "shank_h_fd", "shank_v", "shank_h_l", 
69                "thigh_h_fd", "thigh_v", "thigh_h_l", 
70                "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"
71            ]
72            
73            df = pd.read_csv(file, sep=" ", names=column_names)
74            
75            # Set time as index
76            df = df.set_index("time")
77            
78            # Calculate magnitude for each sensor
79            df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2)
80            df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2)
81            df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2)
82            
83            # Reorder columns for consistency
84            df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 
85                    "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 
86                    "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]]
87            
88            daphnet_data.append(df)
89        
90        # Store loaded data
91        self.data = daphnet_data
92        self.names = daphnet_names
93        
94        return daphnet_data, daphnet_names

Load Daphnet dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
 96    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 97                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 98        """
 99        Create sliding windows from the Daphnet dataset.
100        
101        Args:
102            data: List of DataFrames containing Daphnet data
103            names: List of names corresponding to the data
104            window_size: Size of the sliding window (default: 192)
105            step_size: Step size for the sliding window (default: 32)
106            
107        Returns:
108            List of dictionaries containing sliding windows for each DataFrame
109        """
110        windows_data = []
111        
112        for idx, df in enumerate(data):
113            # Filter out invalid data (annotations == 0)
114            df_filtered = df[df.annotations > 0]
115            
116            if df_filtered.empty:
117                continue
118                
119            windows = []
120            processed_columns = set()
121            
122            # Process each sensor column
123            for col in df_filtered.columns:
124                if col != "annotations" and col not in processed_columns:
125                    window_data = sliding_window(df_filtered[col], window_size, step_size)
126                    windows.append({"name": col, "data": window_data})
127                    processed_columns.add(col)
128            
129            # Include annotations separately
130            annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size)
131            windows.append({"name": "annotations", "data": annotations_window})
132            
133            windows_data.append({"name": names[idx], "windows": windows})
134        
135        return windows_data

Create sliding windows from the Daphnet dataset.

Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
137    def get_supported_formats(self) -> List[str]:
138        """
139        Get list of supported file formats for Daphnet dataset.
140        
141        Returns:
142            List of supported file extensions
143        """
144        return ['.txt']

Get list of supported file formats for Daphnet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
146    def get_sensor_info(self) -> Dict[str, List[str]]:
147        """
148        Get information about sensors in the dataset.
149        
150        Returns:
151            Dictionary containing sensor information
152        """
153        return {
154            'sensors': self.metadata['sensors'],
155            'components': self.metadata['components'],
156            'sampling_frequency': self.metadata['sampling_frequency']
157        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_annotation_info(self) -> Dict[int, str]:
159    def get_annotation_info(self) -> Dict[int, str]:
160        """
161        Get information about annotations in the dataset.
162        
163        Returns:
164            Dictionary mapping annotation values to descriptions
165        """
166        return self.metadata['annotations']

Get information about annotations in the dataset.

Returns: Dictionary mapping annotation values to descriptions

class MobiFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 17class MobiFallLoader(BaseDatasetLoader):
 18    """
 19    MobiFall dataset loader class.
 20    
 21    This class handles loading and processing of the MobiFall dataset for gait analysis.
 22    """
 23    
 24    def __init__(self):
 25        super().__init__(
 26            name="mobifall",
 27            description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection"
 28        )
 29        self.metadata = {
 30            'sensors': ['accelerometer', 'gyroscope'],
 31            'components': ['x', 'y', 'z'],
 32            'sampling_frequency': 100,  # Typical for MobiFall
 33            'activities': ['ADL', 'FALL']  # Activities of Daily Living and Falls
 34        }
 35    
 36    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 37        """
 38        Load MobiFall dataset from the specified directory.
 39        
 40        Args:
 41            data_dir: Directory to store/find the dataset
 42            **kwargs: Additional arguments (unused for MobiFall)
 43            
 44        Returns:
 45            Tuple of (data_list, names_list)
 46        """
 47        # TODO: Implement MobiFall data loading
 48        # This is a placeholder implementation
 49        print("MobiFall data loading is not yet implemented")
 50        return [], []
 51    
 52    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 53                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 54        """
 55        Create sliding windows from the MobiFall dataset.
 56        
 57        Args:
 58            data: List of DataFrames containing MobiFall data
 59            names: List of names corresponding to the data
 60            window_size: Size of the sliding window (default: 192)
 61            step_size: Step size for the sliding window (default: 32)
 62            
 63        Returns:
 64            List of dictionaries containing sliding windows for each DataFrame
 65        """
 66        # TODO: Implement MobiFall sliding window creation
 67        # This is a placeholder implementation
 68        print("MobiFall sliding window creation is not yet implemented")
 69        return []
 70    
 71    def get_supported_formats(self) -> List[str]:
 72        """
 73        Get list of supported file formats for MobiFall dataset.
 74        
 75        Returns:
 76            List of supported file extensions
 77        """
 78        return ['.csv', '.txt']
 79    
 80    def get_sensor_info(self) -> Dict[str, List[str]]:
 81        """
 82        Get information about sensors in the dataset.
 83        
 84        Returns:
 85            Dictionary containing sensor information
 86        """
 87        return {
 88            'sensors': self.metadata['sensors'],
 89            'components': self.metadata['components'],
 90            'sampling_frequency': self.metadata['sampling_frequency']
 91        }
 92    
 93    def get_activity_info(self) -> List[str]:
 94        """
 95        Get information about activities in the dataset.
 96        
 97        Returns:
 98            List of activity types
 99        """
100        return self.metadata['activities']

MobiFall dataset loader class.

This class handles loading and processing of the MobiFall dataset for gait analysis.

MobiFallLoader()
24    def __init__(self):
25        super().__init__(
26            name="mobifall",
27            description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection"
28        )
29        self.metadata = {
30            'sensors': ['accelerometer', 'gyroscope'],
31            'components': ['x', 'y', 'z'],
32            'sampling_frequency': 100,  # Typical for MobiFall
33            'activities': ['ADL', 'FALL']  # Activities of Daily Living and Falls
34        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
36    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
37        """
38        Load MobiFall dataset from the specified directory.
39        
40        Args:
41            data_dir: Directory to store/find the dataset
42            **kwargs: Additional arguments (unused for MobiFall)
43            
44        Returns:
45            Tuple of (data_list, names_list)
46        """
47        # TODO: Implement MobiFall data loading
48        # This is a placeholder implementation
49        print("MobiFall data loading is not yet implemented")
50        return [], []

Load MobiFall dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for MobiFall)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
52    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
53                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
54        """
55        Create sliding windows from the MobiFall dataset.
56        
57        Args:
58            data: List of DataFrames containing MobiFall data
59            names: List of names corresponding to the data
60            window_size: Size of the sliding window (default: 192)
61            step_size: Step size for the sliding window (default: 32)
62            
63        Returns:
64            List of dictionaries containing sliding windows for each DataFrame
65        """
66        # TODO: Implement MobiFall sliding window creation
67        # This is a placeholder implementation
68        print("MobiFall sliding window creation is not yet implemented")
69        return []

Create sliding windows from the MobiFall dataset.

Args: data: List of DataFrames containing MobiFall data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
71    def get_supported_formats(self) -> List[str]:
72        """
73        Get list of supported file formats for MobiFall dataset.
74        
75        Returns:
76            List of supported file extensions
77        """
78        return ['.csv', '.txt']

Get list of supported file formats for MobiFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
80    def get_sensor_info(self) -> Dict[str, List[str]]:
81        """
82        Get information about sensors in the dataset.
83        
84        Returns:
85            Dictionary containing sensor information
86        """
87        return {
88            'sensors': self.metadata['sensors'],
89            'components': self.metadata['components'],
90            'sampling_frequency': self.metadata['sampling_frequency']
91        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> List[str]:
 93    def get_activity_info(self) -> List[str]:
 94        """
 95        Get information about activities in the dataset.
 96        
 97        Returns:
 98            List of activity types
 99        """
100        return self.metadata['activities']

Get information about activities in the dataset.

Returns: List of activity types

class ArduousLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 17class ArduousLoader(BaseDatasetLoader):
 18    """
 19    Arduous dataset loader class.
 20    
 21    This class handles loading and processing of the Arduous dataset for gait analysis.
 22    """
 23    
 24    def __init__(self):
 25        super().__init__(
 26            name="arduous",
 27            description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition"
 28        )
 29        self.metadata = {
 30            'sensors': ['accelerometer', 'gyroscope', 'magnetometer'],
 31            'components': ['x', 'y', 'z'],
 32            'sampling_frequency': 50,  # Typical for Arduous
 33            'activities': ['walking', 'running', 'sitting', 'standing', 'lying']
 34        }
 35    
 36    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 37        """
 38        Load Arduous dataset from the specified directory.
 39        
 40        Args:
 41            data_dir: Directory to store/find the dataset
 42            **kwargs: Additional arguments (unused for Arduous)
 43            
 44        Returns:
 45            Tuple of (data_list, names_list)
 46        """
 47        # TODO: Implement Arduous data loading
 48        # This is a placeholder implementation
 49        print("Arduous data loading is not yet implemented")
 50        return [], []
 51    
 52    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
 53                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
 54        """
 55        Create sliding windows from the Arduous dataset.
 56        
 57        Args:
 58            data: List of DataFrames containing Arduous data
 59            names: List of names corresponding to the data
 60            window_size: Size of the sliding window (default: 192)
 61            step_size: Step size for the sliding window (default: 32)
 62            
 63        Returns:
 64            List of dictionaries containing sliding windows for each DataFrame
 65        """
 66        # TODO: Implement Arduous sliding window creation
 67        # This is a placeholder implementation
 68        print("Arduous sliding window creation is not yet implemented")
 69        return []
 70    
 71    def get_supported_formats(self) -> List[str]:
 72        """
 73        Get list of supported file formats for Arduous dataset.
 74        
 75        Returns:
 76            List of supported file extensions
 77        """
 78        return ['.csv', '.txt']
 79    
 80    def get_sensor_info(self) -> Dict[str, List[str]]:
 81        """
 82        Get information about sensors in the dataset.
 83        
 84        Returns:
 85            Dictionary containing sensor information
 86        """
 87        return {
 88            'sensors': self.metadata['sensors'],
 89            'components': self.metadata['components'],
 90            'sampling_frequency': self.metadata['sampling_frequency']
 91        }
 92    
 93    def get_activity_info(self) -> List[str]:
 94        """
 95        Get information about activities in the dataset.
 96        
 97        Returns:
 98            List of activity types
 99        """
100        return self.metadata['activities']

Arduous dataset loader class.

This class handles loading and processing of the Arduous dataset for gait analysis.

ArduousLoader()
24    def __init__(self):
25        super().__init__(
26            name="arduous",
27            description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition"
28        )
29        self.metadata = {
30            'sensors': ['accelerometer', 'gyroscope', 'magnetometer'],
31            'components': ['x', 'y', 'z'],
32            'sampling_frequency': 50,  # Typical for Arduous
33            'activities': ['walking', 'running', 'sitting', 'standing', 'lying']
34        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
36    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
37        """
38        Load Arduous dataset from the specified directory.
39        
40        Args:
41            data_dir: Directory to store/find the dataset
42            **kwargs: Additional arguments (unused for Arduous)
43            
44        Returns:
45            Tuple of (data_list, names_list)
46        """
47        # TODO: Implement Arduous data loading
48        # This is a placeholder implementation
49        print("Arduous data loading is not yet implemented")
50        return [], []

Load Arduous dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Arduous)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 192, step_size: int = 32) -> List[Dict]:
52    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
53                             window_size: int = 192, step_size: int = 32) -> List[Dict]:
54        """
55        Create sliding windows from the Arduous dataset.
56        
57        Args:
58            data: List of DataFrames containing Arduous data
59            names: List of names corresponding to the data
60            window_size: Size of the sliding window (default: 192)
61            step_size: Step size for the sliding window (default: 32)
62            
63        Returns:
64            List of dictionaries containing sliding windows for each DataFrame
65        """
66        # TODO: Implement Arduous sliding window creation
67        # This is a placeholder implementation
68        print("Arduous sliding window creation is not yet implemented")
69        return []

Create sliding windows from the Arduous dataset.

Args: data: List of DataFrames containing Arduous data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
71    def get_supported_formats(self) -> List[str]:
72        """
73        Get list of supported file formats for Arduous dataset.
74        
75        Returns:
76            List of supported file extensions
77        """
78        return ['.csv', '.txt']

Get list of supported file formats for Arduous dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
80    def get_sensor_info(self) -> Dict[str, List[str]]:
81        """
82        Get information about sensors in the dataset.
83        
84        Returns:
85            Dictionary containing sensor information
86        """
87        return {
88            'sensors': self.metadata['sensors'],
89            'components': self.metadata['components'],
90            'sampling_frequency': self.metadata['sampling_frequency']
91        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> List[str]:
 93    def get_activity_info(self) -> List[str]:
 94        """
 95        Get information about activities in the dataset.
 96        
 97        Returns:
 98            List of activity types
 99        """
100        return self.metadata['activities']

Get information about activities in the dataset.

Returns: List of activity types

class PhysioNetLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 25class PhysioNetLoader(BaseDatasetLoader):
 26    """
 27    PhysioNet VGRF dataset loader class.
 28    
 29    This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset.
 30    The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's 
 31    disease and healthy controls.
 32    """
 33    
 34    def __init__(self):
 35        super().__init__(
 36            name="physionet",
 37            description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls"
 38        )
 39        self.metadata = {
 40            'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8',
 41                       'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'],
 42            'sampling_frequency': 100,  # 100 Hz sampling frequency
 43            'subjects': {
 44                'Co': 'Control subjects',
 45                'Pt': 'Parkinson\'s disease patients'
 46            },
 47            'window_size': 600,  # 6 seconds at 100 Hz
 48            'url': 'https://physionet.org/files/gaitpdb/1.0.0/'
 49        }
 50        self.labels = []
 51        self.subject_types = []
 52    
 53    def _download_physionet_data(self, data_dir: str) -> str:
 54        """
 55        Download PhysioNet dataset if not already present.
 56        
 57        Args:
 58            data_dir: Directory to store the dataset
 59            
 60        Returns:
 61            Path to the downloaded/existing dataset directory
 62        """
 63        dataset_path = os.path.join(data_dir, "physionet_gaitpdb")
 64        
 65        if os.path.exists(dataset_path) and len(os.listdir(dataset_path)) > 0:
 66            print(f"PhysioNet dataset already exists at: {dataset_path}")
 67            return dataset_path
 68        
 69        os.makedirs(dataset_path, exist_ok=True)
 70        
 71        # Download the dataset files
 72        base_url = "https://physionet.org/files/gaitpdb/1.0.0/"
 73        
 74        # Get list of files (basic file names based on the reference)
 75        file_patterns = [
 76            # Control subjects - Ga prefix
 77            *[f"GaCo{i:02d}_{j:02d}.txt" for i in range(1, 18) for j in range(1, 3)],
 78            "GaCo22_01.txt", "GaCo22_10.txt",
 79            
 80            # Parkinson's patients - Ga prefix
 81            *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(3, 10) for j in range(1, 3)],
 82            *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(12, 34) for j in range(1, 3)],
 83            *[f"GaPt{i:02d}_10.txt" for i in range(13, 34)],
 84            
 85            # Control subjects - Ju prefix
 86            *[f"JuCo{i:02d}_01.txt" for i in range(1, 27)],
 87            
 88            # Parkinson's patients - Ju prefix
 89            *[f"JuPt{i:02d}_{j:02d}.txt" for i in range(1, 30) for j in range(1, 8)],
 90            
 91            # Control subjects - Si prefix
 92            *[f"SiCo{i:02d}_01.txt" for i in range(1, 31)],
 93            
 94            # Parkinson's patients - Si prefix
 95            *[f"SiPt{i:02d}_01.txt" for i in range(2, 41)]
 96        ]
 97        
 98        print(f"Downloading PhysioNet dataset to {dataset_path}")
 99        for filename in tqdm(file_patterns, desc="Downloading files"):
100            file_url = base_url + filename
101            file_path = os.path.join(dataset_path, filename)
102            
103            if os.path.exists(file_path):
104                continue
105                
106            try:
107                response = requests.get(file_url, stream=True)
108                if response.status_code == 200:
109                    with open(file_path, 'wb') as f:
110                        for chunk in response.iter_content(chunk_size=8192):
111                            f.write(chunk)
112                else:
113                    print(f"Could not download {filename} (status: {response.status_code})")
114            except Exception as e:
115                print(f"Error downloading {filename}: {e}")
116        
117        return dataset_path
118    
119    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
120        """
121        Load PhysioNet VGRF dataset from the specified directory.
122        
123        Args:
124            data_dir: Directory to store/find the dataset
125            **kwargs: Additional arguments (unused for PhysioNet)
126            
127        Returns:
128            Tuple of (data_list, names_list)
129        """
130        # Download dataset if needed
131        dataset_path = self._download_physionet_data(data_dir)
132        
133        physionet_data = []
134        physionet_names = []
135        self.labels = []
136        self.subject_types = []
137        
138        # Load all available files
139        for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))):
140            filename = os.path.basename(filepath)
141            
142            # Extract subject type from filename
143            if 'Co' in filename:
144                subject_type = 'Control'
145                label = 'Co'
146            elif 'Pt' in filename:
147                subject_type = 'Patient'
148                label = 'Pt'
149            else:
150                continue  # Skip files that don't match expected pattern
151            
152            try:
153                # Read the file - PhysioNet files are tab-delimited with variable columns
154                # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist
155                df = pd.read_csv(filepath, delimiter='\t', header=None)
156                
157                # Handle variable number of columns
158                n_cols = min(df.shape[1], 19)  # Limit to 19 columns max
159                df = df.iloc[:, :n_cols]
160                
161                # Create column names
162                col_names = ['time']
163                for i in range(1, n_cols):
164                    if i <= 8:
165                        col_names.append(f'VGRF_L{i}')
166                    elif i <= 16:
167                        col_names.append(f'VGRF_R{i-8}')
168                    else:
169                        col_names.append(f'sensor_{i}')
170                
171                df.columns = col_names
172                
173                # Set time as index
174                df = df.set_index('time')
175                
176                # Add subject metadata
177                df['subject_type'] = subject_type
178                df['label'] = label
179                
180                physionet_data.append(df)
181                physionet_names.append(filename)
182                self.labels.append(label)
183                self.subject_types.append(subject_type)
184                
185            except Exception as e:
186                print(f"Error loading {filename}: {e}")
187                continue
188        
189        # Store loaded data
190        self.data = physionet_data
191        self.names = physionet_names
192        
193        print(f"Loaded {len(physionet_data)} PhysioNet files")
194        print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}")
195        
196        return physionet_data, physionet_names
197    
198    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
199                             window_size: int = 600, step_size: int = 100) -> List[Dict]:
200        """
201        Create sliding windows from the PhysioNet dataset.
202        
203        Args:
204            data: List of DataFrames containing PhysioNet data
205            names: List of names corresponding to the data
206            window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz)
207            step_size: Step size for the sliding window (default: 100)
208            
209        Returns:
210            List of dictionaries containing sliding windows for each DataFrame
211        """
212        windows_data = []
213        
214        for idx, df in enumerate(data):
215            # Remove metadata columns for windowing
216            sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')]
217            df_sensors = df[sensor_columns]
218            
219            if df_sensors.empty or len(df_sensors) < window_size:
220                continue
221                
222            windows = []
223            
224            # Create windows for each sensor
225            for col in sensor_columns:
226                try:
227                    window_data = sliding_window(df_sensors[col].values, window_size, step_size)
228                    windows.append({"name": col, "data": window_data})
229                except Exception as e:
230                    print(f"Error creating windows for {col} in {names[idx]}: {e}")
231                    continue
232            
233            if windows:
234                windows_data.append({
235                    "name": names[idx],
236                    "windows": windows,
237                    "metadata": {
238                        "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown',
239                        "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown',
240                        "window_size": window_size,
241                        "step_size": step_size,
242                        "num_windows": len(windows[0]["data"]) if windows else 0
243                    }
244                })
245        
246        return windows_data
247    
248    def get_supported_formats(self) -> List[str]:
249        """
250        Get list of supported file formats for PhysioNet dataset.
251        
252        Returns:
253            List of supported file extensions
254        """
255        return ['.txt']
256    
257    def get_sensor_info(self) -> Dict[str, List[str]]:
258        """
259        Get information about sensors in the dataset.
260        
261        Returns:
262            Dictionary containing sensor information
263        """
264        return {
265            'sensors': self.metadata['sensors'],
266            'sampling_frequency': self.metadata['sampling_frequency'],
267            'window_size': self.metadata['window_size']
268        }
269    
270    def get_subject_info(self) -> Dict[str, str]:
271        """
272        Get information about subjects in the dataset.
273        
274        Returns:
275            Dictionary containing subject information
276        """
277        return self.metadata['subjects']
278    
279    def get_labels(self) -> List[str]:
280        """
281        Get labels for loaded data.
282        
283        Returns:
284            List of labels corresponding to loaded data
285        """
286        return self.labels
287    
288    def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]:
289        """
290        Filter loaded data by subject type.
291        
292        Args:
293            subject_type: 'Control' or 'Patient'
294            
295        Returns:
296            Tuple of (filtered_data, filtered_names)
297        """
298        if not self.data:
299            raise ValueError("No data loaded. Call load_data() first.")
300        
301        filtered_data = []
302        filtered_names = []
303        
304        for i, df in enumerate(self.data):
305            if df['subject_type'].iloc[0] == subject_type:
306                filtered_data.append(df)
307                filtered_names.append(self.names[i])
308        
309        return filtered_data, filtered_names

PhysioNet VGRF dataset loader class.

This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's disease and healthy controls.

PhysioNetLoader()
34    def __init__(self):
35        super().__init__(
36            name="physionet",
37            description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls"
38        )
39        self.metadata = {
40            'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8',
41                       'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'],
42            'sampling_frequency': 100,  # 100 Hz sampling frequency
43            'subjects': {
44                'Co': 'Control subjects',
45                'Pt': 'Parkinson\'s disease patients'
46            },
47            'window_size': 600,  # 6 seconds at 100 Hz
48            'url': 'https://physionet.org/files/gaitpdb/1.0.0/'
49        }
50        self.labels = []
51        self.subject_types = []

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
labels
subject_types
def load_data( self, data_dir: str, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
119    def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
120        """
121        Load PhysioNet VGRF dataset from the specified directory.
122        
123        Args:
124            data_dir: Directory to store/find the dataset
125            **kwargs: Additional arguments (unused for PhysioNet)
126            
127        Returns:
128            Tuple of (data_list, names_list)
129        """
130        # Download dataset if needed
131        dataset_path = self._download_physionet_data(data_dir)
132        
133        physionet_data = []
134        physionet_names = []
135        self.labels = []
136        self.subject_types = []
137        
138        # Load all available files
139        for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))):
140            filename = os.path.basename(filepath)
141            
142            # Extract subject type from filename
143            if 'Co' in filename:
144                subject_type = 'Control'
145                label = 'Co'
146            elif 'Pt' in filename:
147                subject_type = 'Patient'
148                label = 'Pt'
149            else:
150                continue  # Skip files that don't match expected pattern
151            
152            try:
153                # Read the file - PhysioNet files are tab-delimited with variable columns
154                # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist
155                df = pd.read_csv(filepath, delimiter='\t', header=None)
156                
157                # Handle variable number of columns
158                n_cols = min(df.shape[1], 19)  # Limit to 19 columns max
159                df = df.iloc[:, :n_cols]
160                
161                # Create column names
162                col_names = ['time']
163                for i in range(1, n_cols):
164                    if i <= 8:
165                        col_names.append(f'VGRF_L{i}')
166                    elif i <= 16:
167                        col_names.append(f'VGRF_R{i-8}')
168                    else:
169                        col_names.append(f'sensor_{i}')
170                
171                df.columns = col_names
172                
173                # Set time as index
174                df = df.set_index('time')
175                
176                # Add subject metadata
177                df['subject_type'] = subject_type
178                df['label'] = label
179                
180                physionet_data.append(df)
181                physionet_names.append(filename)
182                self.labels.append(label)
183                self.subject_types.append(subject_type)
184                
185            except Exception as e:
186                print(f"Error loading {filename}: {e}")
187                continue
188        
189        # Store loaded data
190        self.data = physionet_data
191        self.names = physionet_names
192        
193        print(f"Loaded {len(physionet_data)} PhysioNet files")
194        print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}")
195        
196        return physionet_data, physionet_names

Load PhysioNet VGRF dataset from the specified directory.

Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for PhysioNet)

Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 600, step_size: int = 100) -> List[Dict]:
198    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
199                             window_size: int = 600, step_size: int = 100) -> List[Dict]:
200        """
201        Create sliding windows from the PhysioNet dataset.
202        
203        Args:
204            data: List of DataFrames containing PhysioNet data
205            names: List of names corresponding to the data
206            window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz)
207            step_size: Step size for the sliding window (default: 100)
208            
209        Returns:
210            List of dictionaries containing sliding windows for each DataFrame
211        """
212        windows_data = []
213        
214        for idx, df in enumerate(data):
215            # Remove metadata columns for windowing
216            sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')]
217            df_sensors = df[sensor_columns]
218            
219            if df_sensors.empty or len(df_sensors) < window_size:
220                continue
221                
222            windows = []
223            
224            # Create windows for each sensor
225            for col in sensor_columns:
226                try:
227                    window_data = sliding_window(df_sensors[col].values, window_size, step_size)
228                    windows.append({"name": col, "data": window_data})
229                except Exception as e:
230                    print(f"Error creating windows for {col} in {names[idx]}: {e}")
231                    continue
232            
233            if windows:
234                windows_data.append({
235                    "name": names[idx],
236                    "windows": windows,
237                    "metadata": {
238                        "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown',
239                        "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown',
240                        "window_size": window_size,
241                        "step_size": step_size,
242                        "num_windows": len(windows[0]["data"]) if windows else 0
243                    }
244                })
245        
246        return windows_data

Create sliding windows from the PhysioNet dataset.

Args: data: List of DataFrames containing PhysioNet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) step_size: Step size for the sliding window (default: 100)

Returns: List of dictionaries containing sliding windows for each DataFrame

def get_supported_formats(self) -> List[str]:
248    def get_supported_formats(self) -> List[str]:
249        """
250        Get list of supported file formats for PhysioNet dataset.
251        
252        Returns:
253            List of supported file extensions
254        """
255        return ['.txt']

Get list of supported file formats for PhysioNet dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
257    def get_sensor_info(self) -> Dict[str, List[str]]:
258        """
259        Get information about sensors in the dataset.
260        
261        Returns:
262            Dictionary containing sensor information
263        """
264        return {
265            'sensors': self.metadata['sensors'],
266            'sampling_frequency': self.metadata['sampling_frequency'],
267            'window_size': self.metadata['window_size']
268        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_subject_info(self) -> Dict[str, str]:
270    def get_subject_info(self) -> Dict[str, str]:
271        """
272        Get information about subjects in the dataset.
273        
274        Returns:
275            Dictionary containing subject information
276        """
277        return self.metadata['subjects']

Get information about subjects in the dataset.

Returns: Dictionary containing subject information

def get_labels(self) -> List[str]:
279    def get_labels(self) -> List[str]:
280        """
281        Get labels for loaded data.
282        
283        Returns:
284            List of labels corresponding to loaded data
285        """
286        return self.labels

Get labels for loaded data.

Returns: List of labels corresponding to loaded data

def filter_by_subject_type( self, subject_type: str) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
288    def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]:
289        """
290        Filter loaded data by subject type.
291        
292        Args:
293            subject_type: 'Control' or 'Patient'
294            
295        Returns:
296            Tuple of (filtered_data, filtered_names)
297        """
298        if not self.data:
299            raise ValueError("No data loaded. Call load_data() first.")
300        
301        filtered_data = []
302        filtered_names = []
303        
304        for i, df in enumerate(self.data):
305            if df['subject_type'].iloc[0] == subject_type:
306                filtered_data.append(df)
307                filtered_names.append(self.names[i])
308        
309        return filtered_data, filtered_names

Filter loaded data by subject type.

Args: subject_type: 'Control' or 'Patient'

Returns: Tuple of (filtered_data, filtered_names)

class HARUPLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 26class HARUPLoader(BaseDatasetLoader):
 27    """
 28    HAR-UP dataset loader class.
 29    
 30    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 31    and fall detection analysis.
 32    """
 33    
 34    def __init__(self):
 35        super().__init__(
 36            name="harup",
 37            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition"
 38        )
 39        self.metadata = {
 40            'sensors': [
 41                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 42                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 43                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 44                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 45                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 46                'BrainSensor', 'Infrared'
 47            ],
 48            'components': {
 49                'Accelerometer': ['x', 'y', 'z'],
 50                'AngularVelocity': ['x', 'y', 'z'],
 51                'Luminosity': ['illuminance'],
 52                'BrainSensor': ['value'],
 53                'Infrared': ['value']
 54            },
 55            'sampling_frequency': 100,  # Hz
 56            'activities': {
 57                1: 'Walking',
 58                2: 'Walking upstairs',
 59                3: 'Walking downstairs',
 60                4: 'Sitting',
 61                5: 'Standing',
 62                6: 'Lying',
 63                7: 'Falling forward using hands',
 64                8: 'Falling forward using knees',
 65                9: 'Falling backwards',
 66                10: 'Falling sideward',
 67                11: 'Falling sitting in empty chair'
 68            }
 69        }
 70        
 71        # Features used in HAR-UP
 72        self.features = [
 73            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 74            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 75            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 76            'Energy'
 77        ]
 78    
 79    def download_harup_data(self, data_dir: str) -> Optional[str]:
 80        """
 81        Download HAR-UP dataset if not already present.
 82        
 83        Args:
 84            data_dir: Directory to store the dataset
 85            
 86        Returns:
 87            Path to the extracted dataset or None if not found
 88        """
 89        # Use the utility function to download and extract the dataset
 90        download_dataset("harup", data_dir)
 91        extract_dataset("harup", data_dir)
 92        
 93        # Check if dataset exists after download attempt
 94        dataset_path = os.path.join(data_dir, "DataSet")
 95        if not os.path.exists(dataset_path):
 96            print("HAR-UP dataset not found after download attempt.")
 97            print("Please ensure the dataset is organized in the following structure:")
 98            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
 99            return None
100        
101        return dataset_path
102    
103    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
104                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
105                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
106        """
107        Load HAR-UP dataset from the specified directory.
108        Args:
109            data_dir: Directory containing the dataset
110            subjects: List of subject IDs to load (default: all subjects)
111            activities: List of activity IDs to load (default: all activities)
112            trials: List of trial IDs to load (default: all trials)
113            **kwargs: Additional arguments
114        Returns:
115            Tuple of (data_list, names_list)
116        """
117        import re
118        import os
119        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
120        if subjects is None:
121            subjects = list(range(1, 5))  # 4 subjects
122        if activities is None:
123            activities = list(range(1, 12))  # 11 activities
124        if trials is None:
125            trials = list(range(1, 4))  # 3 trials
126
127        # Column names as per official HAR-UP documentation
128        columns = [
129            "Timestamp",
130            "EEG_NeuroSky",
131            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
132            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
133            "Belt_Luminosity",
134            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
135            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
136            "Neck_Luminosity",
137            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
138            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
139            "Pocket_Luminosity",
140            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
141            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
142            "Wrist_Luminosity",
143            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
144        ]
145
146        # If data_dir does not exist, trigger interactive download
147        if not os.path.exists(data_dir):
148            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
149            self.download_harup_data(data_dir)
150        # If still doesn't exist, error out
151        if not os.path.exists(data_dir):
152            print(f"Failed to create or download dataset directory: {data_dir}")
153            return [], []
154
155        # Find the UP_Fall_Detection_Dataset directory
156        dataset_path = None
157        for entry in os.listdir(data_dir):
158            entry_path = os.path.join(data_dir, entry)
159            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
160                dataset_path = entry_path
161                break
162        if dataset_path is None:
163            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
164            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
165            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
166            return [], []
167
168        harup_data = []
169        harup_names = []
170
171        # Iterate over subjects
172        for subject_id in subjects:
173            subject_folder = f"Subject_{subject_id:02d}"
174            subject_path = os.path.join(dataset_path, subject_folder)
175            if not os.path.isdir(subject_path):
176                continue
177            
178            # Initialize empty DataFrame for this subject
179            subject_df = pd.DataFrame()
180            
181            # Iterate over activities in order
182            for activity_id in sorted(activities):
183                activity_folder = f"A{activity_id:02d}"
184                activity_path = os.path.join(subject_path, activity_folder)
185                if not os.path.isdir(activity_path):
186                    continue
187                
188                # Iterate over trials in order
189                for trial_id in sorted(trials):
190                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
191                    file_path = os.path.join(activity_path, file_name)
192                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
193                    
194                    try:
195                        df = pd.read_csv(file_path, header=0)
196                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
197                        df['subject_id'] = subject_id
198                        df['activity_id'] = activity_id 
199                        df['trial_id'] = trial_id
200                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
201                        
202                        # Concatenate to subject's DataFrame
203                        subject_df = pd.concat([subject_df, df], ignore_index=True)
204                        harup_names.append(name)
205                        
206                    except Exception as e:
207                        print(f"Error loading {file_path}: {e}")
208            
209            # Add complete subject DataFrame to data list
210            if not subject_df.empty:
211                harup_data.append(subject_df)
212                
213        self.data = harup_data
214        self.names = harup_names
215
216        return harup_data, harup_names
217    
218    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
219                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
220        """
221        Create sliding windows from the HAR-UP dataset.
222        
223        Args:
224            data: List of DataFrames containing HAR-UP data
225            names: List of names corresponding to the data
226            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
227            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
228            
229        Returns:
230            List of dictionaries containing sliding windows for each DataFrame
231        """
232        windows_data = []
233        
234        for idx, df in enumerate(data):
235            if df.empty:
236                continue
237                
238            windows = []
239            processed_columns = set()
240            
241            # Only use numeric columns (skip TIME and any non-numeric)
242            sensor_columns = [col for col in df.columns if col not in 
243                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
244                             and pd.api.types.is_numeric_dtype(df[col])]
245            
246
247            # Process each sensor column
248            for col in sensor_columns:
249                if col not in processed_columns:
250                    
251                    window_data = sliding_window(df[col], window_size, step_size)
252                    windows.append({"name": col, "data": window_data})
253                    processed_columns.add(col)
254            
255            # Include activity ID for each window
256            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
257            windows.append({"name": "activity_id", "data": activity_windows})
258            
259            # For each window, take the most common activity ID as the label
260            labels = []
261            for window in activity_windows:
262                # Get most common activity in this window
263                unique_vals, counts = np.unique(window, return_counts=True)
264                most_common_idx = np.argmax(counts)
265                labels.append(unique_vals[most_common_idx])
266            
267            windows.append({"name": "labels", "data": np.array(labels)})
268            
269            windows_data.append({"name": names[idx], "windows": windows})
270        
271        return windows_data
272    
273    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
274                       freq_domain_features: bool = True) -> List[Dict]:
275        """
276        Extract features from sliding windows using HAR-UP feature extraction methods.
277        Args:
278            windows_data: List of dictionaries containing sliding windows
279            time_domain_features: Whether to extract time domain features
280            freq_domain_features: Whether to extract frequency domain features
281        Returns:
282            List of dictionaries containing extracted features
283        """
284        # Mapping from original sensor names to actual CSV column names
285        sensor_map = {
286            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
287            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
288            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
289            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
290            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
291            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
292            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
293            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
294            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
295            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
296            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
297            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
298            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
299            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
300            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
301            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
302            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
303            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
304            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
305            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
306            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
307            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
308            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
309            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
310            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
311            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
312            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
313            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
314            'BrainSensor': 'HELMET_RAW',
315            'Infrared1': 'IR_1',
316            'Infrared2': 'IR_2',
317            'Infrared3': 'IR_3',
318            'Infrared4': 'IR_4',
319        }
320        extractor = HARUPFeatureExtractor(verbose=True)
321        extractor.config['time_domain'] = time_domain_features
322        extractor.config['frequency_domain'] = freq_domain_features
323        all_features = []
324        for window_dict in windows_data:
325            name = window_dict["name"]
326            windows = window_dict["windows"]
327            labels = None
328            for window in windows:
329                if window["name"] == "labels":
330                    labels = window["data"]
331                    break
332            if labels is None:
333                print(f"No labels found for {name}, skipping feature extraction")
334                continue
335            filtered_windows = []
336            missing = []
337            for orig_sensor, csv_col in sensor_map.items():
338                found = False
339                for window in windows:
340                    if window["name"] == csv_col:
341                        filtered_windows.append(window)
342                        found = True
343                        break
344                if not found:
345                    missing.append((orig_sensor, csv_col))
346            if missing:
347                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
348            for window in windows:
349                if window["name"] == "activity_id" or window["name"] == "labels":
350                    filtered_windows.append(window)
351            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
352            for i, feature in enumerate(features):
353                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
354                if window_idx < len(labels):
355                    feature["label"] = labels[window_idx]
356            all_features.append({"name": name, "features": features})
357        return all_features
358    
359    def get_supported_formats(self) -> List[str]:
360        """
361        Get list of supported file formats for HAR-UP dataset.
362        
363        Returns:
364            List of supported file extensions
365        """
366        return ['.csv']
367    
368    def get_sensor_info(self) -> Dict[str, List[str]]:
369        """
370        Get information about sensors in the dataset.
371        
372        Returns:
373            Dictionary containing sensor information
374        """
375        return {
376            'sensors': self.metadata['sensors'],
377            'components': self.metadata['components'],
378            'sampling_frequency': self.metadata['sampling_frequency']
379        }
380    
381    def get_activity_info(self) -> Dict[int, str]:
382        """
383        Get information about activities in the dataset.
384        
385        Returns:
386            Dictionary mapping activity IDs to descriptions
387        """
388        return self.metadata['activities']

HAR-UP dataset loader class.

This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.

HARUPLoader()
34    def __init__(self):
35        super().__init__(
36            name="harup",
37            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition"
38        )
39        self.metadata = {
40            'sensors': [
41                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
42                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
43                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
44                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
45                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
46                'BrainSensor', 'Infrared'
47            ],
48            'components': {
49                'Accelerometer': ['x', 'y', 'z'],
50                'AngularVelocity': ['x', 'y', 'z'],
51                'Luminosity': ['illuminance'],
52                'BrainSensor': ['value'],
53                'Infrared': ['value']
54            },
55            'sampling_frequency': 100,  # Hz
56            'activities': {
57                1: 'Walking',
58                2: 'Walking upstairs',
59                3: 'Walking downstairs',
60                4: 'Sitting',
61                5: 'Standing',
62                6: 'Lying',
63                7: 'Falling forward using hands',
64                8: 'Falling forward using knees',
65                9: 'Falling backwards',
66                10: 'Falling sideward',
67                11: 'Falling sitting in empty chair'
68            }
69        }
70        
71        # Features used in HAR-UP
72        self.features = [
73            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
74            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
75            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
76            'Energy'
77        ]

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
features
def download_harup_data(self, data_dir: str) -> Optional[str]:
 79    def download_harup_data(self, data_dir: str) -> Optional[str]:
 80        """
 81        Download HAR-UP dataset if not already present.
 82        
 83        Args:
 84            data_dir: Directory to store the dataset
 85            
 86        Returns:
 87            Path to the extracted dataset or None if not found
 88        """
 89        # Use the utility function to download and extract the dataset
 90        download_dataset("harup", data_dir)
 91        extract_dataset("harup", data_dir)
 92        
 93        # Check if dataset exists after download attempt
 94        dataset_path = os.path.join(data_dir, "DataSet")
 95        if not os.path.exists(dataset_path):
 96            print("HAR-UP dataset not found after download attempt.")
 97            print("Please ensure the dataset is organized in the following structure:")
 98            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
 99            return None
100        
101        return dataset_path

Download HAR-UP dataset if not already present.

Args: data_dir: Directory to store the dataset

Returns: Path to the extracted dataset or None if not found

def load_data( self, data_dir: str, subjects: Optional[List[int]] = None, activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
103    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
104                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
105                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
106        """
107        Load HAR-UP dataset from the specified directory.
108        Args:
109            data_dir: Directory containing the dataset
110            subjects: List of subject IDs to load (default: all subjects)
111            activities: List of activity IDs to load (default: all activities)
112            trials: List of trial IDs to load (default: all trials)
113            **kwargs: Additional arguments
114        Returns:
115            Tuple of (data_list, names_list)
116        """
117        import re
118        import os
119        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
120        if subjects is None:
121            subjects = list(range(1, 5))  # 4 subjects
122        if activities is None:
123            activities = list(range(1, 12))  # 11 activities
124        if trials is None:
125            trials = list(range(1, 4))  # 3 trials
126
127        # Column names as per official HAR-UP documentation
128        columns = [
129            "Timestamp",
130            "EEG_NeuroSky",
131            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
132            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
133            "Belt_Luminosity",
134            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
135            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
136            "Neck_Luminosity",
137            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
138            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
139            "Pocket_Luminosity",
140            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
141            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
142            "Wrist_Luminosity",
143            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
144        ]
145
146        # If data_dir does not exist, trigger interactive download
147        if not os.path.exists(data_dir):
148            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
149            self.download_harup_data(data_dir)
150        # If still doesn't exist, error out
151        if not os.path.exists(data_dir):
152            print(f"Failed to create or download dataset directory: {data_dir}")
153            return [], []
154
155        # Find the UP_Fall_Detection_Dataset directory
156        dataset_path = None
157        for entry in os.listdir(data_dir):
158            entry_path = os.path.join(data_dir, entry)
159            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
160                dataset_path = entry_path
161                break
162        if dataset_path is None:
163            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
164            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
165            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
166            return [], []
167
168        harup_data = []
169        harup_names = []
170
171        # Iterate over subjects
172        for subject_id in subjects:
173            subject_folder = f"Subject_{subject_id:02d}"
174            subject_path = os.path.join(dataset_path, subject_folder)
175            if not os.path.isdir(subject_path):
176                continue
177            
178            # Initialize empty DataFrame for this subject
179            subject_df = pd.DataFrame()
180            
181            # Iterate over activities in order
182            for activity_id in sorted(activities):
183                activity_folder = f"A{activity_id:02d}"
184                activity_path = os.path.join(subject_path, activity_folder)
185                if not os.path.isdir(activity_path):
186                    continue
187                
188                # Iterate over trials in order
189                for trial_id in sorted(trials):
190                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
191                    file_path = os.path.join(activity_path, file_name)
192                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
193                    
194                    try:
195                        df = pd.read_csv(file_path, header=0)
196                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
197                        df['subject_id'] = subject_id
198                        df['activity_id'] = activity_id 
199                        df['trial_id'] = trial_id
200                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
201                        
202                        # Concatenate to subject's DataFrame
203                        subject_df = pd.concat([subject_df, df], ignore_index=True)
204                        harup_names.append(name)
205                        
206                    except Exception as e:
207                        print(f"Error loading {file_path}: {e}")
208            
209            # Add complete subject DataFrame to data list
210            if not subject_df.empty:
211                harup_data.append(subject_df)
212                
213        self.data = harup_data
214        self.names = harup_names
215
216        return harup_data, harup_names

Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 100, step_size: int = 50) -> List[Dict]:
218    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
219                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
220        """
221        Create sliding windows from the HAR-UP dataset.
222        
223        Args:
224            data: List of DataFrames containing HAR-UP data
225            names: List of names corresponding to the data
226            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
227            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
228            
229        Returns:
230            List of dictionaries containing sliding windows for each DataFrame
231        """
232        windows_data = []
233        
234        for idx, df in enumerate(data):
235            if df.empty:
236                continue
237                
238            windows = []
239            processed_columns = set()
240            
241            # Only use numeric columns (skip TIME and any non-numeric)
242            sensor_columns = [col for col in df.columns if col not in 
243                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
244                             and pd.api.types.is_numeric_dtype(df[col])]
245            
246
247            # Process each sensor column
248            for col in sensor_columns:
249                if col not in processed_columns:
250                    
251                    window_data = sliding_window(df[col], window_size, step_size)
252                    windows.append({"name": col, "data": window_data})
253                    processed_columns.add(col)
254            
255            # Include activity ID for each window
256            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
257            windows.append({"name": "activity_id", "data": activity_windows})
258            
259            # For each window, take the most common activity ID as the label
260            labels = []
261            for window in activity_windows:
262                # Get most common activity in this window
263                unique_vals, counts = np.unique(window, return_counts=True)
264                most_common_idx = np.argmax(counts)
265                labels.append(unique_vals[most_common_idx])
266            
267            windows.append({"name": "labels", "data": np.array(labels)})
268            
269            windows_data.append({"name": names[idx], "windows": windows})
270        
271        return windows_data

Create sliding windows from the HAR-UP dataset.

Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_features( self, windows_data: List[Dict], time_domain_features: bool = True, freq_domain_features: bool = True) -> List[Dict]:
273    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
274                       freq_domain_features: bool = True) -> List[Dict]:
275        """
276        Extract features from sliding windows using HAR-UP feature extraction methods.
277        Args:
278            windows_data: List of dictionaries containing sliding windows
279            time_domain_features: Whether to extract time domain features
280            freq_domain_features: Whether to extract frequency domain features
281        Returns:
282            List of dictionaries containing extracted features
283        """
284        # Mapping from original sensor names to actual CSV column names
285        sensor_map = {
286            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
287            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
288            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
289            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
290            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
291            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
292            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
293            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
294            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
295            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
296            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
297            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
298            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
299            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
300            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
301            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
302            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
303            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
304            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
305            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
306            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
307            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
308            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
309            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
310            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
311            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
312            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
313            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
314            'BrainSensor': 'HELMET_RAW',
315            'Infrared1': 'IR_1',
316            'Infrared2': 'IR_2',
317            'Infrared3': 'IR_3',
318            'Infrared4': 'IR_4',
319        }
320        extractor = HARUPFeatureExtractor(verbose=True)
321        extractor.config['time_domain'] = time_domain_features
322        extractor.config['frequency_domain'] = freq_domain_features
323        all_features = []
324        for window_dict in windows_data:
325            name = window_dict["name"]
326            windows = window_dict["windows"]
327            labels = None
328            for window in windows:
329                if window["name"] == "labels":
330                    labels = window["data"]
331                    break
332            if labels is None:
333                print(f"No labels found for {name}, skipping feature extraction")
334                continue
335            filtered_windows = []
336            missing = []
337            for orig_sensor, csv_col in sensor_map.items():
338                found = False
339                for window in windows:
340                    if window["name"] == csv_col:
341                        filtered_windows.append(window)
342                        found = True
343                        break
344                if not found:
345                    missing.append((orig_sensor, csv_col))
346            if missing:
347                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
348            for window in windows:
349                if window["name"] == "activity_id" or window["name"] == "labels":
350                    filtered_windows.append(window)
351            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
352            for i, feature in enumerate(features):
353                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
354                if window_idx < len(labels):
355                    feature["label"] = labels[window_idx]
356            all_features.append({"name": name, "features": features})
357        return all_features

Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features

def get_supported_formats(self) -> List[str]:
359    def get_supported_formats(self) -> List[str]:
360        """
361        Get list of supported file formats for HAR-UP dataset.
362        
363        Returns:
364            List of supported file extensions
365        """
366        return ['.csv']

Get list of supported file formats for HAR-UP dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
368    def get_sensor_info(self) -> Dict[str, List[str]]:
369        """
370        Get information about sensors in the dataset.
371        
372        Returns:
373            Dictionary containing sensor information
374        """
375        return {
376            'sensors': self.metadata['sensors'],
377            'components': self.metadata['components'],
378            'sampling_frequency': self.metadata['sampling_frequency']
379        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
381    def get_activity_info(self) -> Dict[int, str]:
382        """
383        Get information about activities in the dataset.
384        
385        Returns:
386            Dictionary mapping activity IDs to descriptions
387        """
388        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to descriptions

class UrFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 24class UrFallLoader(BaseDatasetLoader):
 25    """
 26    UrFall dataset loader class.
 27    
 28    This class handles loading and processing of the UrFall dataset for fall detection.
 29    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 30    and pre-extracted features from depth maps.
 31    """
 32    
 33    def __init__(self):
 34        super().__init__(
 35            name="urfall",
 36            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data"
 37        )
 38        self.metadata = {
 39            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 40            'camera': 'cam0',  # Front camera
 41            'sampling_frequency': 30,  # Depth/RGB camera fps
 42            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 43            'activities': {
 44                -1: 'Not lying (standing/walking)',
 45                0: 'Falling (transient)',
 46                1: 'Lying on ground'
 47            },
 48            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 49            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 50            'feature_columns': [
 51                'sequence_name',
 52                'frame_number',
 53                'label',
 54                'HeightWidthRatio',
 55                'MajorMinorRatio',
 56                'BoundingBoxOccupancy',
 57                'MaxStdXZ',
 58                'HHmaxRatio',
 59                'H',
 60                'D',
 61                'P40'
 62            ],
 63            'feature_descriptions': {
 64                'HeightWidthRatio': 'Bounding box height to width ratio',
 65                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 66                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 67                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 68                'HHmaxRatio': 'Human height in frame to standing height ratio',
 69                'H': 'Actual height in mm',
 70                'D': 'Distance of person center to floor in mm',
 71                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 72            }
 73        }
 74    
 75    def load_data(self, data_dir: str, 
 76                  data_types: Optional[List[str]] = None,
 77                  sequences: Optional[List[str]] = None,
 78                  use_falls: bool = True,
 79                  use_adls: bool = True,
 80                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 81        """
 82        Load UrFall dataset from the specified directory.
 83        
 84        Args:
 85            data_dir: Directory containing the dataset
 86            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 87                       'synchronization', 'video', 'features' (default: ['features'])
 88            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 89                      If None, loads all based on use_falls and use_adls
 90            use_falls: Whether to load fall sequences (default: True)
 91            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 92            **kwargs: Additional arguments
 93            
 94        Returns:
 95            Tuple of (data_list, names_list)
 96        """
 97        # Default to loading pre-extracted features if not specified
 98        if data_types is None:
 99            data_types = ['features']
100        
101        # Validate data types
102        valid_types = set(self.metadata['data_types'])
103        requested_types = set(data_types)
104        invalid_types = requested_types - valid_types
105        if invalid_types:
106            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
107        
108        # Create directory if it doesn't exist
109        os.makedirs(data_dir, exist_ok=True)
110        
111        data_list = []
112        names_list = []
113        
114        # Load pre-extracted features (CSV files)
115        if 'features' in data_types:
116            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
117            data_list.extend(features_data)
118            names_list.extend(features_names)
119        
120        # Load raw accelerometer data
121        if 'accelerometer' in data_types:
122            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
123            data_list.extend(accel_data)
124            names_list.extend(accel_names)
125        
126        # Load synchronization data
127        if 'synchronization' in data_types:
128            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
129            data_list.extend(sync_data)
130            names_list.extend(sync_names)
131        
132        # Note: Depth, RGB, and Video data are image/video files
133        # These would require specialized loading and are not typically loaded into DataFrames
134        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
135            print("Note: Depth, RGB, and Video data types contain image/video files.")
136            print("These are not loaded into DataFrames but their paths can be accessed.")
137            print("Use the get_file_paths() method to retrieve paths to these files.")
138        
139        self.data = data_list
140        return data_list, names_list
141    
142    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
143                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
144        """
145        Load pre-extracted features from CSV files.
146        
147        Args:
148            data_dir: Directory containing the dataset
149            sequences: Specific sequences to load
150            use_falls: Whether to include fall sequences
151            use_adls: Whether to include ADL sequences
152            
153        Returns:
154            Tuple of (data_list, names_list)
155        """
156        data_list = []
157        names_list = []
158        
159        # Load falls features
160        if use_falls:
161            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
162            if os.path.exists(falls_csv):
163                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
164                
165                # Filter by specific sequences if provided
166                if sequences is not None:
167                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
168                    if fall_sequences:
169                        df = df[df['sequence_name'].isin(fall_sequences)]
170                
171                # Add metadata columns
172                df['activity_type'] = 'fall'
173                df['activity_id'] = 1  # Falls are labeled as 1
174                
175                data_list.append(df)
176                names_list.append("urfall-cam0-falls")
177            else:
178                print(f"Warning: Falls features file not found at {falls_csv}")
179        
180        # Load ADLs features
181        if use_adls:
182            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
183            if os.path.exists(adls_csv):
184                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
185                
186                # Filter by specific sequences if provided
187                if sequences is not None:
188                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
189                    if adl_sequences:
190                        df = df[df['sequence_name'].isin(adl_sequences)]
191                
192                # Add metadata columns
193                df['activity_type'] = 'adl'
194                df['activity_id'] = 0  # ADLs are labeled as 0
195                
196                data_list.append(df)
197                names_list.append("urfall-cam0-adls")
198            else:
199                print(f"Warning: ADLs features file not found at {adls_csv}")
200        
201        return data_list, names_list
202    
203    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
204                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
205        """
206        Load accelerometer CSV data files.
207        
208        Args:
209            data_dir: Directory containing the dataset
210            sequences: Specific sequences to load
211            use_falls: Whether to include fall sequences
212            use_adls: Whether to include ADL sequences
213            
214        Returns:
215            Tuple of (data_list, names_list)
216        """
217        data_list = []
218        names_list = []
219        
220        # Determine which sequences to load
221        seq_list = []
222        if sequences is not None:
223            seq_list = sequences
224        else:
225            if use_falls:
226                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
227            if use_adls:
228                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
229        
230        # Load accelerometer data for each sequence
231        for seq in seq_list:
232            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
233            if os.path.exists(accel_file):
234                try:
235                    df = pd.read_csv(accel_file)
236                    df['sequence_name'] = seq
237                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
238                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
239                    data_list.append(df)
240                    names_list.append(f"{seq}-accelerometer")
241                except Exception as e:
242                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
243        
244        return data_list, names_list
245    
246    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
247                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
248        """
249        Load synchronization CSV data files.
250        
251        Args:
252            data_dir: Directory containing the dataset
253            sequences: Specific sequences to load
254            use_falls: Whether to include fall sequences
255            use_adls: Whether to include ADL sequences
256            
257        Returns:
258            Tuple of (data_list, names_list)
259        """
260        data_list = []
261        names_list = []
262        
263        # Determine which sequences to load
264        seq_list = []
265        if sequences is not None:
266            seq_list = sequences
267        else:
268            if use_falls:
269                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
270            if use_adls:
271                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
272        
273        # Load synchronization data for each sequence
274        for seq in seq_list:
275            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
276            if os.path.exists(sync_file):
277                try:
278                    df = pd.read_csv(sync_file)
279                    df['sequence_name'] = seq
280                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
281                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
282                    data_list.append(df)
283                    names_list.append(f"{seq}-synchronization")
284                except Exception as e:
285                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
286        
287        return data_list, names_list
288    
289    def get_file_paths(self, data_dir: str, data_type: str, 
290                       sequences: Optional[List[str]] = None,
291                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
292        """
293        Get file paths for image/video data types (depth, RGB, video).
294        
295        Args:
296            data_dir: Directory containing the dataset
297            data_type: Type of data ('depth', 'rgb', 'video')
298            sequences: Specific sequences to get paths for
299            use_falls: Whether to include fall sequences
300            use_adls: Whether to include ADL sequences
301            
302        Returns:
303            Dictionary mapping sequence names to file paths
304        """
305        if data_type not in ['depth', 'rgb', 'video']:
306            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
307        
308        file_paths = {}
309        
310        # Determine which sequences to include
311        seq_list = []
312        if sequences is not None:
313            seq_list = sequences
314        else:
315            if use_falls:
316                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
317            if use_adls:
318                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
319        
320        # Map data type to file extension
321        extension_map = {
322            'depth': '-cam0-d.zip',
323            'rgb': '-cam0-rgb.zip',
324            'video': '-cam0.mp4'
325        }
326        
327        ext = extension_map[data_type]
328        
329        for seq in seq_list:
330            file_path = os.path.join(data_dir, f"{seq}{ext}")
331            if os.path.exists(file_path):
332                file_paths[seq] = file_path
333        
334        return file_paths
335    
336    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
337                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
338        """
339        Create sliding windows from the loaded data.
340        
341        Args:
342            data: List of DataFrames containing the dataset
343            names: List of names corresponding to each DataFrame
344            window_size: Size of the sliding window (default: 30 frames for depth features)
345            step_size: Step size for sliding window (default: 15 frames)
346            
347        Returns:
348            List of dictionaries containing windowed data
349        """
350        windows_data = []
351        
352        for idx, df in enumerate(data):
353            if df.empty:
354                continue
355            
356            # Get numeric feature columns (exclude metadata columns)
357            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
358            feature_cols = [col for col in df.columns 
359                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
360            
361            if not feature_cols:
362                continue
363            
364            windows = []
365            
366            # Create windows for each feature column
367            for col in feature_cols:
368                win = sliding_window(df[col].values, window_size, step_size)
369                windows.append({"name": col, "data": win})
370            
371            # Create windows for labels if present
372            if 'label' in df.columns:
373                label_windows = sliding_window(df['label'].values, window_size, step_size)
374                # Majority voting for each window
375                labels = []
376                for w in label_windows:
377                    vals, counts = np.unique(w, return_counts=True)
378                    labels.append(vals[np.argmax(counts)])
379                windows.append({"name": "labels", "data": np.array(labels)})
380            
381            # Create activity_id windows
382            if 'activity_id' in df.columns:
383                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
384                windows.append({"name": "activity_id", "data": activity_windows})
385            
386            windows_data.append({"name": names[idx], "windows": windows})
387        
388        return windows_data
389    
390    def get_supported_formats(self) -> List[str]:
391        """
392        Get list of supported file formats for UrFall dataset.
393        
394        Returns:
395            List of supported file extensions
396        """
397        return ['.csv', '.zip', '.mp4']
398    
399    def get_sensor_info(self) -> Dict[str, any]:
400        """
401        Get information about sensors in the dataset.
402        
403        Returns:
404            Dictionary containing sensor information
405        """
406        return {
407            'data_types': self.metadata['data_types'],
408            'camera': self.metadata['camera'],
409            'sampling_frequency': self.metadata['sampling_frequency'],
410            'accelerometer_frequency': self.metadata['accelerometer_frequency']
411        }
412    
413    def get_activity_info(self) -> Dict[int, str]:
414        """
415        Get information about activities in the dataset.
416        
417        Returns:
418            Dictionary mapping activity IDs to labels
419        """
420        return self.metadata['activities']
421    
422    def get_feature_info(self) -> Dict[str, str]:
423        """
424        Get information about pre-extracted features.
425        
426        Returns:
427            Dictionary mapping feature names to descriptions
428        """
429        return self.metadata['feature_descriptions']

UrFall dataset loader class.

This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.

UrFallLoader()
33    def __init__(self):
34        super().__init__(
35            name="urfall",
36            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data"
37        )
38        self.metadata = {
39            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
40            'camera': 'cam0',  # Front camera
41            'sampling_frequency': 30,  # Depth/RGB camera fps
42            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
43            'activities': {
44                -1: 'Not lying (standing/walking)',
45                0: 'Falling (transient)',
46                1: 'Lying on ground'
47            },
48            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
49            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
50            'feature_columns': [
51                'sequence_name',
52                'frame_number',
53                'label',
54                'HeightWidthRatio',
55                'MajorMinorRatio',
56                'BoundingBoxOccupancy',
57                'MaxStdXZ',
58                'HHmaxRatio',
59                'H',
60                'D',
61                'P40'
62            ],
63            'feature_descriptions': {
64                'HeightWidthRatio': 'Bounding box height to width ratio',
65                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
66                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
67                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
68                'HHmaxRatio': 'Human height in frame to standing height ratio',
69                'H': 'Actual height in mm',
70                'D': 'Distance of person center to floor in mm',
71                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
72            }
73        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 75    def load_data(self, data_dir: str, 
 76                  data_types: Optional[List[str]] = None,
 77                  sequences: Optional[List[str]] = None,
 78                  use_falls: bool = True,
 79                  use_adls: bool = True,
 80                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 81        """
 82        Load UrFall dataset from the specified directory.
 83        
 84        Args:
 85            data_dir: Directory containing the dataset
 86            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 87                       'synchronization', 'video', 'features' (default: ['features'])
 88            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 89                      If None, loads all based on use_falls and use_adls
 90            use_falls: Whether to load fall sequences (default: True)
 91            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 92            **kwargs: Additional arguments
 93            
 94        Returns:
 95            Tuple of (data_list, names_list)
 96        """
 97        # Default to loading pre-extracted features if not specified
 98        if data_types is None:
 99            data_types = ['features']
100        
101        # Validate data types
102        valid_types = set(self.metadata['data_types'])
103        requested_types = set(data_types)
104        invalid_types = requested_types - valid_types
105        if invalid_types:
106            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
107        
108        # Create directory if it doesn't exist
109        os.makedirs(data_dir, exist_ok=True)
110        
111        data_list = []
112        names_list = []
113        
114        # Load pre-extracted features (CSV files)
115        if 'features' in data_types:
116            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
117            data_list.extend(features_data)
118            names_list.extend(features_names)
119        
120        # Load raw accelerometer data
121        if 'accelerometer' in data_types:
122            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
123            data_list.extend(accel_data)
124            names_list.extend(accel_names)
125        
126        # Load synchronization data
127        if 'synchronization' in data_types:
128            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
129            data_list.extend(sync_data)
130            names_list.extend(sync_names)
131        
132        # Note: Depth, RGB, and Video data are image/video files
133        # These would require specialized loading and are not typically loaded into DataFrames
134        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
135            print("Note: Depth, RGB, and Video data types contain image/video files.")
136            print("These are not loaded into DataFrames but their paths can be accessed.")
137            print("Use the get_file_paths() method to retrieve paths to these files.")
138        
139        self.data = data_list
140        return data_list, names_list

Load UrFall dataset from the specified directory.

Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments

Returns: Tuple of (data_list, names_list)

def get_file_paths( self, data_dir: str, data_type: str, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
289    def get_file_paths(self, data_dir: str, data_type: str, 
290                       sequences: Optional[List[str]] = None,
291                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
292        """
293        Get file paths for image/video data types (depth, RGB, video).
294        
295        Args:
296            data_dir: Directory containing the dataset
297            data_type: Type of data ('depth', 'rgb', 'video')
298            sequences: Specific sequences to get paths for
299            use_falls: Whether to include fall sequences
300            use_adls: Whether to include ADL sequences
301            
302        Returns:
303            Dictionary mapping sequence names to file paths
304        """
305        if data_type not in ['depth', 'rgb', 'video']:
306            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
307        
308        file_paths = {}
309        
310        # Determine which sequences to include
311        seq_list = []
312        if sequences is not None:
313            seq_list = sequences
314        else:
315            if use_falls:
316                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
317            if use_adls:
318                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
319        
320        # Map data type to file extension
321        extension_map = {
322            'depth': '-cam0-d.zip',
323            'rgb': '-cam0-rgb.zip',
324            'video': '-cam0.mp4'
325        }
326        
327        ext = extension_map[data_type]
328        
329        for seq in seq_list:
330            file_path = os.path.join(data_dir, f"{seq}{ext}")
331            if os.path.exists(file_path):
332                file_paths[seq] = file_path
333        
334        return file_paths

Get file paths for image/video data types (depth, RGB, video).

Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences

Returns: Dictionary mapping sequence names to file paths

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 30, step_size: int = 15) -> List[Dict]:
336    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
337                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
338        """
339        Create sliding windows from the loaded data.
340        
341        Args:
342            data: List of DataFrames containing the dataset
343            names: List of names corresponding to each DataFrame
344            window_size: Size of the sliding window (default: 30 frames for depth features)
345            step_size: Step size for sliding window (default: 15 frames)
346            
347        Returns:
348            List of dictionaries containing windowed data
349        """
350        windows_data = []
351        
352        for idx, df in enumerate(data):
353            if df.empty:
354                continue
355            
356            # Get numeric feature columns (exclude metadata columns)
357            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
358            feature_cols = [col for col in df.columns 
359                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
360            
361            if not feature_cols:
362                continue
363            
364            windows = []
365            
366            # Create windows for each feature column
367            for col in feature_cols:
368                win = sliding_window(df[col].values, window_size, step_size)
369                windows.append({"name": col, "data": win})
370            
371            # Create windows for labels if present
372            if 'label' in df.columns:
373                label_windows = sliding_window(df['label'].values, window_size, step_size)
374                # Majority voting for each window
375                labels = []
376                for w in label_windows:
377                    vals, counts = np.unique(w, return_counts=True)
378                    labels.append(vals[np.argmax(counts)])
379                windows.append({"name": "labels", "data": np.array(labels)})
380            
381            # Create activity_id windows
382            if 'activity_id' in df.columns:
383                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
384                windows.append({"name": "activity_id", "data": activity_windows})
385            
386            windows_data.append({"name": names[idx], "windows": windows})
387        
388        return windows_data

Create sliding windows from the loaded data.

Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)

Returns: List of dictionaries containing windowed data

def get_supported_formats(self) -> List[str]:
390    def get_supported_formats(self) -> List[str]:
391        """
392        Get list of supported file formats for UrFall dataset.
393        
394        Returns:
395            List of supported file extensions
396        """
397        return ['.csv', '.zip', '.mp4']

Get list of supported file formats for UrFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, <built-in function any>]:
399    def get_sensor_info(self) -> Dict[str, any]:
400        """
401        Get information about sensors in the dataset.
402        
403        Returns:
404            Dictionary containing sensor information
405        """
406        return {
407            'data_types': self.metadata['data_types'],
408            'camera': self.metadata['camera'],
409            'sampling_frequency': self.metadata['sampling_frequency'],
410            'accelerometer_frequency': self.metadata['accelerometer_frequency']
411        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
413    def get_activity_info(self) -> Dict[int, str]:
414        """
415        Get information about activities in the dataset.
416        
417        Returns:
418            Dictionary mapping activity IDs to labels
419        """
420        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to labels

def get_feature_info(self) -> Dict[str, str]:
422    def get_feature_info(self) -> Dict[str, str]:
423        """
424        Get information about pre-extracted features.
425        
426        Returns:
427            Dictionary mapping feature names to descriptions
428        """
429        return self.metadata['feature_descriptions']

Get information about pre-extracted features.

Returns: Dictionary mapping feature names to descriptions

def load_daphnet_data(data_dir: str):
170def load_daphnet_data(data_dir: str):
171    """
172    Legacy function for loading Daphnet data.
173    
174    Args:
175        data_dir: Directory to store the dataset
176        
177    Returns:
178        Tuple of (data_list, names_list)
179    """
180    loader = DaphnetLoader()
181    return loader.load_data(data_dir)

Legacy function for loading Daphnet data.

Args: data_dir: Directory to store the dataset

Returns: Tuple of (data_list, names_list)

def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
184def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32):
185    """
186    Legacy function for creating sliding windows.
187    
188    Args:
189        daphnet: List of dataframes containing Daphnet data
190        daphnet_names: List of names of the Daphnet dataframes
191        window_size: Size of the sliding window
192        step_size: Step size for the sliding window
193        
194    Returns:
195        List of dictionaries containing sliding windows for each DataFrame
196    """
197    loader = DaphnetLoader()
198    return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)

Legacy function for creating sliding windows.

Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def load_mobifall_data():
104def load_mobifall_data():
105    """
106    Legacy function for loading MobiFall data.
107    
108    Returns:
109        Tuple of (data_list, names_list)
110    """
111    loader = MobiFallLoader()
112    return loader.load_data("")

Legacy function for loading MobiFall data.

Returns: Tuple of (data_list, names_list)

def load_arduous_data():
104def load_arduous_data():
105    """
106    Legacy function for loading Arduous data.
107    
108    Returns:
109        Tuple of (data_list, names_list)
110    """
111    loader = ArduousLoader()
112    return loader.load_data("")

Legacy function for loading Arduous data.

Returns: Tuple of (data_list, names_list)

def load_physionet_data(data_dir: str) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
313def load_physionet_data(data_dir: str) -> Tuple[List[pd.DataFrame], List[str]]:
314    """
315    Legacy function to load PhysioNet data.
316    
317    Args:
318        data_dir: Directory containing the dataset
319        
320    Returns:
321        Tuple of (data_list, names_list)
322    """
323    loader = PhysioNetLoader()
324    return loader.load_data(data_dir)

Legacy function to load PhysioNet data.

Args: data_dir: Directory containing the dataset

Returns: Tuple of (data_list, names_list)

def create_physionet_windows( data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 600, step_size: int = 100) -> List[Dict]:
327def create_physionet_windows(data: List[pd.DataFrame], names: List[str], 
328                           window_size: int = 600, step_size: int = 100) -> List[Dict]:
329    """
330    Legacy function to create sliding windows from PhysioNet data.
331    
332    Args:
333        data: List of DataFrames
334        names: List of names
335        window_size: Size of sliding window
336        step_size: Step size for sliding window
337        
338    Returns:
339        List of sliding window dictionaries
340    """
341    loader = PhysioNetLoader()
342    return loader.create_sliding_windows(data, names, window_size, step_size) 

Legacy function to create sliding windows from PhysioNet data.

Args: data: List of DataFrames names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of sliding window dictionaries

def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
392def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
393    """
394    Legacy function for loading HAR-UP data.
395    
396    Args:
397        data_dir: Directory containing the dataset
398        subjects: List of subject IDs to load (default: all subjects)
399        activities: List of activity IDs to load (default: all activities)
400        trials: List of trial IDs to load (default: all trials)
401        
402    Returns:
403        Tuple of (data_list, names_list)
404    """
405    loader = HARUPLoader()
406    return loader.load_data(data_dir, subjects, activities, trials)

Legacy function for loading HAR-UP data.

Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)

Returns: Tuple of (data_list, names_list)

def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
409def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
410    """
411    Legacy function for creating sliding windows from HAR-UP data.
412    
413    Args:
414        harup_data: List of dataframes containing HAR-UP data
415        harup_names: List of names of the HAR-UP dataframes
416        window_size: Size of the sliding window
417        step_size: Step size for the sliding window
418        
419    Returns:
420        List of dictionaries containing sliding windows for each DataFrame
421    """
422    loader = HARUPLoader()
423    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)

Legacy function for creating sliding windows from HAR-UP data.

Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
426def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
427    """
428    Legacy function for extracting features from HAR-UP windows.
429    
430    Args:
431        windows_data: List of dictionaries containing sliding windows
432        time_domain: Whether to extract time domain features
433        freq_domain: Whether to extract frequency domain features
434        
435    Returns:
436        List of dictionaries containing extracted features
437    """
438    loader = HARUPLoader()
439    return loader.extract_features(windows_data, time_domain, freq_domain)

Legacy function for extracting features from HAR-UP windows.

Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features

Returns: List of dictionaries containing extracted features

def load_urfall_data( data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True):
433def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
434                     sequences: Optional[List[str]] = None,
435                     use_falls: bool = True, use_adls: bool = True):
436    """
437    Load UrFall dataset using the legacy function interface.
438    
439    Args:
440        data_dir: Directory containing the dataset
441        data_types: List of data types to load
442        sequences: List of specific sequences to load
443        use_falls: Whether to load fall sequences
444        use_adls: Whether to load ADL sequences
445        
446    Returns:
447        Tuple of (data_list, names_list)
448    """
449    loader = UrFallLoader()
450    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
451                           use_falls=use_falls, use_adls=use_adls)

Load UrFall dataset using the legacy function interface.

Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences

Returns: Tuple of (data_list, names_list)

def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
454def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
455    """
456    Create sliding windows from UrFall data using the legacy function interface.
457    
458    Args:
459        urfall_data: List of DataFrames
460        urfall_names: List of names
461        window_size: Size of sliding window
462        step_size: Step size for sliding window
463        
464    Returns:
465        List of dictionaries containing windowed data
466    """
467    loader = UrFallLoader()
468    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)

Create sliding windows from UrFall data using the legacy function interface.

Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of dictionaries containing windowed data

def download_dataset(dataset_name, data_dir):
25def download_dataset(dataset_name, data_dir):
26    """Download the dataset."""
27    if dataset_name == "daphnet":
28        download_daphnet_data(data_dir)
29    elif dataset_name == "mobifall":
30        download_mobifall_data(data_dir)
31    elif dataset_name == "arduous":
32        download_arduous_data(data_dir)
33    elif dataset_name == "harup":
34        download_harup_data(data_dir)
35    elif dataset_name == "urfall":
36        download_urfall_data(data_dir)
37    elif dataset_name == "physionet":
38        # PhysioNet dataset is handled by the PhysioNetLoader itself
39        pass
40    else:
41        raise ValueError(f"Dataset {dataset_name} not supported.")

Download the dataset.

def extract_dataset(dataset_name, data_dir):
243def extract_dataset(dataset_name, data_dir):
244    """Extract the dataset."""
245    if dataset_name == "daphnet":
246        extract_daphnet_data(data_dir)
247    elif dataset_name == "mobifall":
248        extract_mobifall_data(data_dir)
249    elif dataset_name == "arduous":
250        extract_arduous_data(data_dir)
251    elif dataset_name == "harup":
252        extract_harup_data(data_dir)
253    elif dataset_name == "urfall":
254        extract_urfall_data(data_dir)
255    elif dataset_name == "physionet":
256        # PhysioNet dataset is handled by the PhysioNetLoader itself
257        pass
258    else:
259        raise ValueError(f"Dataset {dataset_name} not supported.")

Extract the dataset.

def sliding_window(data, window_size, step_size):
320def sliding_window(data, window_size, step_size):
321    num_windows = (len(data) - window_size) // step_size + 1
322    windows = []
323    for i in range(num_windows):
324        start = i * step_size
325        end = start + window_size
326        windows.append(data[start:end])
327    return windows
def get_dataset_manager():
53def get_dataset_manager():
54    """Get the singleton DatasetManager instance."""
55    return DatasetManager()

Get the singleton DatasetManager instance.

def get_available_datasets():
58def get_available_datasets():
59    """Get list of available dataset names."""
60    return DatasetManager().get_available_components()

Get list of available dataset names.

def load_dataset(name: str, data_dir: str, **kwargs):
63def load_dataset(name: str, data_dir: str, **kwargs):
64    """
65    Load a dataset using the DatasetManager.
66    
67    Args:
68        name: Name of the dataset loader
69        data_dir: Directory containing the dataset
70        **kwargs: Additional arguments for the loader
71        
72    Returns:
73        Dataset loader instance with loaded data
74    """
75    return DatasetManager().load_dataset(name, data_dir, **kwargs)

Load a dataset using the DatasetManager.

Args: name: Name of the dataset loader data_dir: Directory containing the dataset **kwargs: Additional arguments for the loader

Returns: Dataset loader instance with loaded data