gaitsetpy.dataset
dataset: Handles loading and processing of supported datasets.
This module provides both the new class-based dataset loaders and legacy function-based API. All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager.
Supported datasets:
- Daphnet: Freezing of Gait dataset
- MobiFall: Fall detection dataset
- Arduous: Daily activity recognition dataset
- PhysioNet: VGRF dataset for Parkinson's disease gait analysis
- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition
- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data
1""" 2dataset: Handles loading and processing of supported datasets. 3 4This module provides both the new class-based dataset loaders and legacy function-based API. 5All dataset loaders inherit from BaseDatasetLoader and are registered with the DatasetManager. 6 7Supported datasets: 8- Daphnet: Freezing of Gait dataset 9- MobiFall: Fall detection dataset 10- Arduous: Daily activity recognition dataset 11- PhysioNet: VGRF dataset for Parkinson's disease gait analysis 12- HAR-UP: Multimodal System for Fall Detection and Human Activity Recognition 13- UrFall: University of Rzeszow Fall Detection Dataset with multimodal data 14 15""" 16 17# Import the new class-based loaders 18from .daphnet import DaphnetLoader 19from .mobifall import MobiFallLoader 20from .arduous import ArduousLoader 21from .physionet import PhysioNetLoader 22from .harup import HARUPLoader 23from .urfall import UrFallLoader 24 25# Import legacy functions for backward compatibility 26from .daphnet import load_daphnet_data, create_sliding_windows 27from .mobifall import load_mobifall_data 28from .arduous import load_arduous_data 29from .physionet import load_physionet_data, create_physionet_windows 30from .harup import load_harup_data, create_harup_windows, extract_harup_features 31from .urfall import load_urfall_data, create_urfall_windows 32from .utils import download_dataset, extract_dataset, sliding_window 33 34# Import managers 35from ..core.managers import DatasetManager 36 37# Register all dataset loaders with the manager 38def _register_datasets(): 39 """Register all available dataset loaders with the DatasetManager.""" 40 manager = DatasetManager() 41 manager.register_dataset("daphnet", DaphnetLoader) 42 manager.register_dataset("mobifall", MobiFallLoader) 43 manager.register_dataset("arduous", ArduousLoader) 44 manager.register_dataset("physionet", PhysioNetLoader) 45 manager.register_dataset("harup", HARUPLoader) 46 manager.register_dataset("urfall", UrFallLoader) 47 48# Auto-register datasets when module is imported 49_register_datasets() 50 51# Convenient access to the dataset manager 52def get_dataset_manager(): 53 """Get the singleton DatasetManager instance.""" 54 return DatasetManager() 55 56# Helper function to get available datasets 57def get_available_datasets(): 58 """Get list of available dataset names.""" 59 return DatasetManager().get_available_components() 60 61# Helper function to load dataset using manager 62def load_dataset(name: str, data_dir: str, **kwargs): 63 """ 64 Load a dataset using the DatasetManager. 65 66 Args: 67 name: Name of the dataset loader 68 data_dir: Directory containing the dataset 69 **kwargs: Additional arguments for the loader 70 71 Returns: 72 Dataset loader instance with loaded data 73 """ 74 return DatasetManager().load_dataset(name, data_dir, **kwargs) 75 76__all__ = [ 77 # New class-based loaders 78 'DaphnetLoader', 79 'MobiFallLoader', 80 'ArduousLoader', 81 'PhysioNetLoader', 82 'HARUPLoader', 83 'UrFallLoader', 84 # Legacy functions for backward compatibility 85 'load_daphnet_data', 86 'create_sliding_windows', 87 'load_mobifall_data', 88 'load_arduous_data', 89 'load_physionet_data', 90 'create_physionet_windows', 91 'load_harup_data', 92 'create_harup_windows', 93 'extract_harup_features', 94 'load_urfall_data', 95 'create_urfall_windows', 96 'download_dataset', 97 'extract_dataset', 98 'sliding_window', 99 # Manager functions 100 'get_dataset_manager', 101 'get_available_datasets', 102 'load_dataset' 103]
18class DaphnetLoader(BaseDatasetLoader): 19 """ 20 Daphnet dataset loader class. 21 22 This class handles loading and processing of the Daphnet dataset for gait analysis. 23 """ 24 25 def __init__(self): 26 super().__init__( 27 name="daphnet", 28 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease" 29 ) 30 self.metadata = { 31 'sensors': ['shank', 'thigh', 'trunk'], 32 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 33 'sampling_frequency': 64, 34 'annotations': { 35 0: 'not_valid', 36 1: 'no_freeze', 37 2: 'freeze' 38 } 39 } 40 41 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 42 """ 43 Load Daphnet dataset from the specified directory. 44 45 Args: 46 data_dir: Directory to store/find the dataset 47 **kwargs: Additional arguments (unused for Daphnet) 48 49 Returns: 50 Tuple of (data_list, names_list) 51 """ 52 # Download and extract if needed 53 download_dataset("daphnet", data_dir) 54 extract_dataset("daphnet", data_dir) 55 56 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 57 daphnet_data = [] 58 daphnet_names = [] 59 60 # Load all subject files 61 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 62 # Extract filename from path 63 filename = os.path.basename(file) 64 daphnet_names.append(filename) 65 66 # Load CSV with proper column names 67 column_names = [ 68 "time", "shank_h_fd", "shank_v", "shank_h_l", 69 "thigh_h_fd", "thigh_v", "thigh_h_l", 70 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 71 ] 72 73 df = pd.read_csv(file, sep=" ", names=column_names) 74 75 # Set time as index 76 df = df.set_index("time") 77 78 # Calculate magnitude for each sensor 79 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 80 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 81 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 82 83 # Reorder columns for consistency 84 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 85 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 86 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 87 88 daphnet_data.append(df) 89 90 # Store loaded data 91 self.data = daphnet_data 92 self.names = daphnet_names 93 94 return daphnet_data, daphnet_names 95 96 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 97 window_size: int = 192, step_size: int = 32) -> List[Dict]: 98 """ 99 Create sliding windows from the Daphnet dataset. 100 101 Args: 102 data: List of DataFrames containing Daphnet data 103 names: List of names corresponding to the data 104 window_size: Size of the sliding window (default: 192) 105 step_size: Step size for the sliding window (default: 32) 106 107 Returns: 108 List of dictionaries containing sliding windows for each DataFrame 109 """ 110 windows_data = [] 111 112 for idx, df in enumerate(data): 113 # Filter out invalid data (annotations == 0) 114 df_filtered = df[df.annotations > 0] 115 116 if df_filtered.empty: 117 continue 118 119 windows = [] 120 processed_columns = set() 121 122 # Process each sensor column 123 for col in df_filtered.columns: 124 if col != "annotations" and col not in processed_columns: 125 window_data = sliding_window(df_filtered[col], window_size, step_size) 126 windows.append({"name": col, "data": window_data}) 127 processed_columns.add(col) 128 129 # Include annotations separately 130 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 131 windows.append({"name": "annotations", "data": annotations_window}) 132 133 windows_data.append({"name": names[idx], "windows": windows}) 134 135 return windows_data 136 137 def get_supported_formats(self) -> List[str]: 138 """ 139 Get list of supported file formats for Daphnet dataset. 140 141 Returns: 142 List of supported file extensions 143 """ 144 return ['.txt'] 145 146 def get_sensor_info(self) -> Dict[str, List[str]]: 147 """ 148 Get information about sensors in the dataset. 149 150 Returns: 151 Dictionary containing sensor information 152 """ 153 return { 154 'sensors': self.metadata['sensors'], 155 'components': self.metadata['components'], 156 'sampling_frequency': self.metadata['sampling_frequency'] 157 } 158 159 def get_annotation_info(self) -> Dict[int, str]: 160 """ 161 Get information about annotations in the dataset. 162 163 Returns: 164 Dictionary mapping annotation values to descriptions 165 """ 166 return self.metadata['annotations']
Daphnet dataset loader class.
This class handles loading and processing of the Daphnet dataset for gait analysis.
25 def __init__(self): 26 super().__init__( 27 name="daphnet", 28 description="Daphnet Freezing of Gait Dataset - Contains accelerometer data from subjects with Parkinson's disease" 29 ) 30 self.metadata = { 31 'sensors': ['shank', 'thigh', 'trunk'], 32 'components': ['h_fd', 'v', 'h_l'], # horizontal forward, vertical, horizontal lateral 33 'sampling_frequency': 64, 34 'annotations': { 35 0: 'not_valid', 36 1: 'no_freeze', 37 2: 'freeze' 38 } 39 }
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
41 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 42 """ 43 Load Daphnet dataset from the specified directory. 44 45 Args: 46 data_dir: Directory to store/find the dataset 47 **kwargs: Additional arguments (unused for Daphnet) 48 49 Returns: 50 Tuple of (data_list, names_list) 51 """ 52 # Download and extract if needed 53 download_dataset("daphnet", data_dir) 54 extract_dataset("daphnet", data_dir) 55 56 file_path = os.path.join(data_dir, "dataset_fog_release/dataset") 57 daphnet_data = [] 58 daphnet_names = [] 59 60 # Load all subject files 61 for file in sorted(glob(os.path.join(file_path, "S*.txt"))): 62 # Extract filename from path 63 filename = os.path.basename(file) 64 daphnet_names.append(filename) 65 66 # Load CSV with proper column names 67 column_names = [ 68 "time", "shank_h_fd", "shank_v", "shank_h_l", 69 "thigh_h_fd", "thigh_v", "thigh_h_l", 70 "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations" 71 ] 72 73 df = pd.read_csv(file, sep=" ", names=column_names) 74 75 # Set time as index 76 df = df.set_index("time") 77 78 # Calculate magnitude for each sensor 79 df["thigh"] = np.sqrt(df["thigh_h_l"]**2 + df["thigh_v"]**2 + df["thigh_h_fd"]**2) 80 df["shank"] = np.sqrt(df["shank_h_l"]**2 + df["shank_v"]**2 + df["shank_h_fd"]**2) 81 df["trunk"] = np.sqrt(df["trunk_h_l"]**2 + df["trunk_v"]**2 + df["trunk_h_fd"]**2) 82 83 # Reorder columns for consistency 84 df = df[["shank", "shank_h_fd", "shank_v", "shank_h_l", 85 "thigh", "thigh_h_fd", "thigh_v", "thigh_h_l", 86 "trunk", "trunk_h_fd", "trunk_v", "trunk_h_l", "annotations"]] 87 88 daphnet_data.append(df) 89 90 # Store loaded data 91 self.data = daphnet_data 92 self.names = daphnet_names 93 94 return daphnet_data, daphnet_names
Load Daphnet dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Daphnet)
Returns: Tuple of (data_list, names_list)
96 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 97 window_size: int = 192, step_size: int = 32) -> List[Dict]: 98 """ 99 Create sliding windows from the Daphnet dataset. 100 101 Args: 102 data: List of DataFrames containing Daphnet data 103 names: List of names corresponding to the data 104 window_size: Size of the sliding window (default: 192) 105 step_size: Step size for the sliding window (default: 32) 106 107 Returns: 108 List of dictionaries containing sliding windows for each DataFrame 109 """ 110 windows_data = [] 111 112 for idx, df in enumerate(data): 113 # Filter out invalid data (annotations == 0) 114 df_filtered = df[df.annotations > 0] 115 116 if df_filtered.empty: 117 continue 118 119 windows = [] 120 processed_columns = set() 121 122 # Process each sensor column 123 for col in df_filtered.columns: 124 if col != "annotations" and col not in processed_columns: 125 window_data = sliding_window(df_filtered[col], window_size, step_size) 126 windows.append({"name": col, "data": window_data}) 127 processed_columns.add(col) 128 129 # Include annotations separately 130 annotations_window = sliding_window(df_filtered["annotations"], window_size, step_size) 131 windows.append({"name": "annotations", "data": annotations_window}) 132 133 windows_data.append({"name": names[idx], "windows": windows}) 134 135 return windows_data
Create sliding windows from the Daphnet dataset.
Args: data: List of DataFrames containing Daphnet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
137 def get_supported_formats(self) -> List[str]: 138 """ 139 Get list of supported file formats for Daphnet dataset. 140 141 Returns: 142 List of supported file extensions 143 """ 144 return ['.txt']
Get list of supported file formats for Daphnet dataset.
Returns: List of supported file extensions
146 def get_sensor_info(self) -> Dict[str, List[str]]: 147 """ 148 Get information about sensors in the dataset. 149 150 Returns: 151 Dictionary containing sensor information 152 """ 153 return { 154 'sensors': self.metadata['sensors'], 155 'components': self.metadata['components'], 156 'sampling_frequency': self.metadata['sampling_frequency'] 157 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
159 def get_annotation_info(self) -> Dict[int, str]: 160 """ 161 Get information about annotations in the dataset. 162 163 Returns: 164 Dictionary mapping annotation values to descriptions 165 """ 166 return self.metadata['annotations']
Get information about annotations in the dataset.
Returns: Dictionary mapping annotation values to descriptions
Inherited Members
17class MobiFallLoader(BaseDatasetLoader): 18 """ 19 MobiFall dataset loader class. 20 21 This class handles loading and processing of the MobiFall dataset for gait analysis. 22 """ 23 24 def __init__(self): 25 super().__init__( 26 name="mobifall", 27 description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection" 28 ) 29 self.metadata = { 30 'sensors': ['accelerometer', 'gyroscope'], 31 'components': ['x', 'y', 'z'], 32 'sampling_frequency': 100, # Typical for MobiFall 33 'activities': ['ADL', 'FALL'] # Activities of Daily Living and Falls 34 } 35 36 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 37 """ 38 Load MobiFall dataset from the specified directory. 39 40 Args: 41 data_dir: Directory to store/find the dataset 42 **kwargs: Additional arguments (unused for MobiFall) 43 44 Returns: 45 Tuple of (data_list, names_list) 46 """ 47 # TODO: Implement MobiFall data loading 48 # This is a placeholder implementation 49 print("MobiFall data loading is not yet implemented") 50 return [], [] 51 52 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 53 window_size: int = 192, step_size: int = 32) -> List[Dict]: 54 """ 55 Create sliding windows from the MobiFall dataset. 56 57 Args: 58 data: List of DataFrames containing MobiFall data 59 names: List of names corresponding to the data 60 window_size: Size of the sliding window (default: 192) 61 step_size: Step size for the sliding window (default: 32) 62 63 Returns: 64 List of dictionaries containing sliding windows for each DataFrame 65 """ 66 # TODO: Implement MobiFall sliding window creation 67 # This is a placeholder implementation 68 print("MobiFall sliding window creation is not yet implemented") 69 return [] 70 71 def get_supported_formats(self) -> List[str]: 72 """ 73 Get list of supported file formats for MobiFall dataset. 74 75 Returns: 76 List of supported file extensions 77 """ 78 return ['.csv', '.txt'] 79 80 def get_sensor_info(self) -> Dict[str, List[str]]: 81 """ 82 Get information about sensors in the dataset. 83 84 Returns: 85 Dictionary containing sensor information 86 """ 87 return { 88 'sensors': self.metadata['sensors'], 89 'components': self.metadata['components'], 90 'sampling_frequency': self.metadata['sampling_frequency'] 91 } 92 93 def get_activity_info(self) -> List[str]: 94 """ 95 Get information about activities in the dataset. 96 97 Returns: 98 List of activity types 99 """ 100 return self.metadata['activities']
MobiFall dataset loader class.
This class handles loading and processing of the MobiFall dataset for gait analysis.
24 def __init__(self): 25 super().__init__( 26 name="mobifall", 27 description="MobiFall Dataset - Contains accelerometer and gyroscope data for fall detection" 28 ) 29 self.metadata = { 30 'sensors': ['accelerometer', 'gyroscope'], 31 'components': ['x', 'y', 'z'], 32 'sampling_frequency': 100, # Typical for MobiFall 33 'activities': ['ADL', 'FALL'] # Activities of Daily Living and Falls 34 }
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
36 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 37 """ 38 Load MobiFall dataset from the specified directory. 39 40 Args: 41 data_dir: Directory to store/find the dataset 42 **kwargs: Additional arguments (unused for MobiFall) 43 44 Returns: 45 Tuple of (data_list, names_list) 46 """ 47 # TODO: Implement MobiFall data loading 48 # This is a placeholder implementation 49 print("MobiFall data loading is not yet implemented") 50 return [], []
Load MobiFall dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for MobiFall)
Returns: Tuple of (data_list, names_list)
52 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 53 window_size: int = 192, step_size: int = 32) -> List[Dict]: 54 """ 55 Create sliding windows from the MobiFall dataset. 56 57 Args: 58 data: List of DataFrames containing MobiFall data 59 names: List of names corresponding to the data 60 window_size: Size of the sliding window (default: 192) 61 step_size: Step size for the sliding window (default: 32) 62 63 Returns: 64 List of dictionaries containing sliding windows for each DataFrame 65 """ 66 # TODO: Implement MobiFall sliding window creation 67 # This is a placeholder implementation 68 print("MobiFall sliding window creation is not yet implemented") 69 return []
Create sliding windows from the MobiFall dataset.
Args: data: List of DataFrames containing MobiFall data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
71 def get_supported_formats(self) -> List[str]: 72 """ 73 Get list of supported file formats for MobiFall dataset. 74 75 Returns: 76 List of supported file extensions 77 """ 78 return ['.csv', '.txt']
Get list of supported file formats for MobiFall dataset.
Returns: List of supported file extensions
80 def get_sensor_info(self) -> Dict[str, List[str]]: 81 """ 82 Get information about sensors in the dataset. 83 84 Returns: 85 Dictionary containing sensor information 86 """ 87 return { 88 'sensors': self.metadata['sensors'], 89 'components': self.metadata['components'], 90 'sampling_frequency': self.metadata['sampling_frequency'] 91 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
93 def get_activity_info(self) -> List[str]: 94 """ 95 Get information about activities in the dataset. 96 97 Returns: 98 List of activity types 99 """ 100 return self.metadata['activities']
Get information about activities in the dataset.
Returns: List of activity types
Inherited Members
17class ArduousLoader(BaseDatasetLoader): 18 """ 19 Arduous dataset loader class. 20 21 This class handles loading and processing of the Arduous dataset for gait analysis. 22 """ 23 24 def __init__(self): 25 super().__init__( 26 name="arduous", 27 description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition" 28 ) 29 self.metadata = { 30 'sensors': ['accelerometer', 'gyroscope', 'magnetometer'], 31 'components': ['x', 'y', 'z'], 32 'sampling_frequency': 50, # Typical for Arduous 33 'activities': ['walking', 'running', 'sitting', 'standing', 'lying'] 34 } 35 36 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 37 """ 38 Load Arduous dataset from the specified directory. 39 40 Args: 41 data_dir: Directory to store/find the dataset 42 **kwargs: Additional arguments (unused for Arduous) 43 44 Returns: 45 Tuple of (data_list, names_list) 46 """ 47 # TODO: Implement Arduous data loading 48 # This is a placeholder implementation 49 print("Arduous data loading is not yet implemented") 50 return [], [] 51 52 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 53 window_size: int = 192, step_size: int = 32) -> List[Dict]: 54 """ 55 Create sliding windows from the Arduous dataset. 56 57 Args: 58 data: List of DataFrames containing Arduous data 59 names: List of names corresponding to the data 60 window_size: Size of the sliding window (default: 192) 61 step_size: Step size for the sliding window (default: 32) 62 63 Returns: 64 List of dictionaries containing sliding windows for each DataFrame 65 """ 66 # TODO: Implement Arduous sliding window creation 67 # This is a placeholder implementation 68 print("Arduous sliding window creation is not yet implemented") 69 return [] 70 71 def get_supported_formats(self) -> List[str]: 72 """ 73 Get list of supported file formats for Arduous dataset. 74 75 Returns: 76 List of supported file extensions 77 """ 78 return ['.csv', '.txt'] 79 80 def get_sensor_info(self) -> Dict[str, List[str]]: 81 """ 82 Get information about sensors in the dataset. 83 84 Returns: 85 Dictionary containing sensor information 86 """ 87 return { 88 'sensors': self.metadata['sensors'], 89 'components': self.metadata['components'], 90 'sampling_frequency': self.metadata['sampling_frequency'] 91 } 92 93 def get_activity_info(self) -> List[str]: 94 """ 95 Get information about activities in the dataset. 96 97 Returns: 98 List of activity types 99 """ 100 return self.metadata['activities']
Arduous dataset loader class.
This class handles loading and processing of the Arduous dataset for gait analysis.
24 def __init__(self): 25 super().__init__( 26 name="arduous", 27 description="Arduous Dataset - Contains multi-sensor wearable data for daily activity recognition" 28 ) 29 self.metadata = { 30 'sensors': ['accelerometer', 'gyroscope', 'magnetometer'], 31 'components': ['x', 'y', 'z'], 32 'sampling_frequency': 50, # Typical for Arduous 33 'activities': ['walking', 'running', 'sitting', 'standing', 'lying'] 34 }
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
36 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 37 """ 38 Load Arduous dataset from the specified directory. 39 40 Args: 41 data_dir: Directory to store/find the dataset 42 **kwargs: Additional arguments (unused for Arduous) 43 44 Returns: 45 Tuple of (data_list, names_list) 46 """ 47 # TODO: Implement Arduous data loading 48 # This is a placeholder implementation 49 print("Arduous data loading is not yet implemented") 50 return [], []
Load Arduous dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for Arduous)
Returns: Tuple of (data_list, names_list)
52 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 53 window_size: int = 192, step_size: int = 32) -> List[Dict]: 54 """ 55 Create sliding windows from the Arduous dataset. 56 57 Args: 58 data: List of DataFrames containing Arduous data 59 names: List of names corresponding to the data 60 window_size: Size of the sliding window (default: 192) 61 step_size: Step size for the sliding window (default: 32) 62 63 Returns: 64 List of dictionaries containing sliding windows for each DataFrame 65 """ 66 # TODO: Implement Arduous sliding window creation 67 # This is a placeholder implementation 68 print("Arduous sliding window creation is not yet implemented") 69 return []
Create sliding windows from the Arduous dataset.
Args: data: List of DataFrames containing Arduous data names: List of names corresponding to the data window_size: Size of the sliding window (default: 192) step_size: Step size for the sliding window (default: 32)
Returns: List of dictionaries containing sliding windows for each DataFrame
71 def get_supported_formats(self) -> List[str]: 72 """ 73 Get list of supported file formats for Arduous dataset. 74 75 Returns: 76 List of supported file extensions 77 """ 78 return ['.csv', '.txt']
Get list of supported file formats for Arduous dataset.
Returns: List of supported file extensions
80 def get_sensor_info(self) -> Dict[str, List[str]]: 81 """ 82 Get information about sensors in the dataset. 83 84 Returns: 85 Dictionary containing sensor information 86 """ 87 return { 88 'sensors': self.metadata['sensors'], 89 'components': self.metadata['components'], 90 'sampling_frequency': self.metadata['sampling_frequency'] 91 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
93 def get_activity_info(self) -> List[str]: 94 """ 95 Get information about activities in the dataset. 96 97 Returns: 98 List of activity types 99 """ 100 return self.metadata['activities']
Get information about activities in the dataset.
Returns: List of activity types
Inherited Members
25class PhysioNetLoader(BaseDatasetLoader): 26 """ 27 PhysioNet VGRF dataset loader class. 28 29 This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. 30 The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's 31 disease and healthy controls. 32 """ 33 34 def __init__(self): 35 super().__init__( 36 name="physionet", 37 description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls" 38 ) 39 self.metadata = { 40 'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8', 41 'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'], 42 'sampling_frequency': 100, # 100 Hz sampling frequency 43 'subjects': { 44 'Co': 'Control subjects', 45 'Pt': 'Parkinson\'s disease patients' 46 }, 47 'window_size': 600, # 6 seconds at 100 Hz 48 'url': 'https://physionet.org/files/gaitpdb/1.0.0/' 49 } 50 self.labels = [] 51 self.subject_types = [] 52 53 def _download_physionet_data(self, data_dir: str) -> str: 54 """ 55 Download PhysioNet dataset if not already present. 56 57 Args: 58 data_dir: Directory to store the dataset 59 60 Returns: 61 Path to the downloaded/existing dataset directory 62 """ 63 dataset_path = os.path.join(data_dir, "physionet_gaitpdb") 64 65 if os.path.exists(dataset_path) and len(os.listdir(dataset_path)) > 0: 66 print(f"PhysioNet dataset already exists at: {dataset_path}") 67 return dataset_path 68 69 os.makedirs(dataset_path, exist_ok=True) 70 71 # Download the dataset files 72 base_url = "https://physionet.org/files/gaitpdb/1.0.0/" 73 74 # Get list of files (basic file names based on the reference) 75 file_patterns = [ 76 # Control subjects - Ga prefix 77 *[f"GaCo{i:02d}_{j:02d}.txt" for i in range(1, 18) for j in range(1, 3)], 78 "GaCo22_01.txt", "GaCo22_10.txt", 79 80 # Parkinson's patients - Ga prefix 81 *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(3, 10) for j in range(1, 3)], 82 *[f"GaPt{i:02d}_{j:02d}.txt" for i in range(12, 34) for j in range(1, 3)], 83 *[f"GaPt{i:02d}_10.txt" for i in range(13, 34)], 84 85 # Control subjects - Ju prefix 86 *[f"JuCo{i:02d}_01.txt" for i in range(1, 27)], 87 88 # Parkinson's patients - Ju prefix 89 *[f"JuPt{i:02d}_{j:02d}.txt" for i in range(1, 30) for j in range(1, 8)], 90 91 # Control subjects - Si prefix 92 *[f"SiCo{i:02d}_01.txt" for i in range(1, 31)], 93 94 # Parkinson's patients - Si prefix 95 *[f"SiPt{i:02d}_01.txt" for i in range(2, 41)] 96 ] 97 98 print(f"Downloading PhysioNet dataset to {dataset_path}") 99 for filename in tqdm(file_patterns, desc="Downloading files"): 100 file_url = base_url + filename 101 file_path = os.path.join(dataset_path, filename) 102 103 if os.path.exists(file_path): 104 continue 105 106 try: 107 response = requests.get(file_url, stream=True) 108 if response.status_code == 200: 109 with open(file_path, 'wb') as f: 110 for chunk in response.iter_content(chunk_size=8192): 111 f.write(chunk) 112 else: 113 print(f"Could not download {filename} (status: {response.status_code})") 114 except Exception as e: 115 print(f"Error downloading {filename}: {e}") 116 117 return dataset_path 118 119 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 120 """ 121 Load PhysioNet VGRF dataset from the specified directory. 122 123 Args: 124 data_dir: Directory to store/find the dataset 125 **kwargs: Additional arguments (unused for PhysioNet) 126 127 Returns: 128 Tuple of (data_list, names_list) 129 """ 130 # Download dataset if needed 131 dataset_path = self._download_physionet_data(data_dir) 132 133 physionet_data = [] 134 physionet_names = [] 135 self.labels = [] 136 self.subject_types = [] 137 138 # Load all available files 139 for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))): 140 filename = os.path.basename(filepath) 141 142 # Extract subject type from filename 143 if 'Co' in filename: 144 subject_type = 'Control' 145 label = 'Co' 146 elif 'Pt' in filename: 147 subject_type = 'Patient' 148 label = 'Pt' 149 else: 150 continue # Skip files that don't match expected pattern 151 152 try: 153 # Read the file - PhysioNet files are tab-delimited with variable columns 154 # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist 155 df = pd.read_csv(filepath, delimiter='\t', header=None) 156 157 # Handle variable number of columns 158 n_cols = min(df.shape[1], 19) # Limit to 19 columns max 159 df = df.iloc[:, :n_cols] 160 161 # Create column names 162 col_names = ['time'] 163 for i in range(1, n_cols): 164 if i <= 8: 165 col_names.append(f'VGRF_L{i}') 166 elif i <= 16: 167 col_names.append(f'VGRF_R{i-8}') 168 else: 169 col_names.append(f'sensor_{i}') 170 171 df.columns = col_names 172 173 # Set time as index 174 df = df.set_index('time') 175 176 # Add subject metadata 177 df['subject_type'] = subject_type 178 df['label'] = label 179 180 physionet_data.append(df) 181 physionet_names.append(filename) 182 self.labels.append(label) 183 self.subject_types.append(subject_type) 184 185 except Exception as e: 186 print(f"Error loading {filename}: {e}") 187 continue 188 189 # Store loaded data 190 self.data = physionet_data 191 self.names = physionet_names 192 193 print(f"Loaded {len(physionet_data)} PhysioNet files") 194 print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}") 195 196 return physionet_data, physionet_names 197 198 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 199 window_size: int = 600, step_size: int = 100) -> List[Dict]: 200 """ 201 Create sliding windows from the PhysioNet dataset. 202 203 Args: 204 data: List of DataFrames containing PhysioNet data 205 names: List of names corresponding to the data 206 window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) 207 step_size: Step size for the sliding window (default: 100) 208 209 Returns: 210 List of dictionaries containing sliding windows for each DataFrame 211 """ 212 windows_data = [] 213 214 for idx, df in enumerate(data): 215 # Remove metadata columns for windowing 216 sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')] 217 df_sensors = df[sensor_columns] 218 219 if df_sensors.empty or len(df_sensors) < window_size: 220 continue 221 222 windows = [] 223 224 # Create windows for each sensor 225 for col in sensor_columns: 226 try: 227 window_data = sliding_window(df_sensors[col].values, window_size, step_size) 228 windows.append({"name": col, "data": window_data}) 229 except Exception as e: 230 print(f"Error creating windows for {col} in {names[idx]}: {e}") 231 continue 232 233 if windows: 234 windows_data.append({ 235 "name": names[idx], 236 "windows": windows, 237 "metadata": { 238 "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown', 239 "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown', 240 "window_size": window_size, 241 "step_size": step_size, 242 "num_windows": len(windows[0]["data"]) if windows else 0 243 } 244 }) 245 246 return windows_data 247 248 def get_supported_formats(self) -> List[str]: 249 """ 250 Get list of supported file formats for PhysioNet dataset. 251 252 Returns: 253 List of supported file extensions 254 """ 255 return ['.txt'] 256 257 def get_sensor_info(self) -> Dict[str, List[str]]: 258 """ 259 Get information about sensors in the dataset. 260 261 Returns: 262 Dictionary containing sensor information 263 """ 264 return { 265 'sensors': self.metadata['sensors'], 266 'sampling_frequency': self.metadata['sampling_frequency'], 267 'window_size': self.metadata['window_size'] 268 } 269 270 def get_subject_info(self) -> Dict[str, str]: 271 """ 272 Get information about subjects in the dataset. 273 274 Returns: 275 Dictionary containing subject information 276 """ 277 return self.metadata['subjects'] 278 279 def get_labels(self) -> List[str]: 280 """ 281 Get labels for loaded data. 282 283 Returns: 284 List of labels corresponding to loaded data 285 """ 286 return self.labels 287 288 def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]: 289 """ 290 Filter loaded data by subject type. 291 292 Args: 293 subject_type: 'Control' or 'Patient' 294 295 Returns: 296 Tuple of (filtered_data, filtered_names) 297 """ 298 if not self.data: 299 raise ValueError("No data loaded. Call load_data() first.") 300 301 filtered_data = [] 302 filtered_names = [] 303 304 for i, df in enumerate(self.data): 305 if df['subject_type'].iloc[0] == subject_type: 306 filtered_data.append(df) 307 filtered_names.append(self.names[i]) 308 309 return filtered_data, filtered_names
PhysioNet VGRF dataset loader class.
This class handles loading and processing of the PhysioNet Gait in Parkinson's Disease dataset. The dataset contains vertical ground reaction force (VGRF) data from subjects with Parkinson's disease and healthy controls.
34 def __init__(self): 35 super().__init__( 36 name="physionet", 37 description="PhysioNet Gait in Parkinson's Disease Dataset - Contains VGRF data from subjects with Parkinson's disease and healthy controls" 38 ) 39 self.metadata = { 40 'sensors': ['VGRF_L1', 'VGRF_L2', 'VGRF_L3', 'VGRF_L4', 'VGRF_L5', 'VGRF_L6', 'VGRF_L7', 'VGRF_L8', 41 'VGRF_R1', 'VGRF_R2', 'VGRF_R3', 'VGRF_R4', 'VGRF_R5', 'VGRF_R6', 'VGRF_R7', 'VGRF_R8'], 42 'sampling_frequency': 100, # 100 Hz sampling frequency 43 'subjects': { 44 'Co': 'Control subjects', 45 'Pt': 'Parkinson\'s disease patients' 46 }, 47 'window_size': 600, # 6 seconds at 100 Hz 48 'url': 'https://physionet.org/files/gaitpdb/1.0.0/' 49 } 50 self.labels = [] 51 self.subject_types = []
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
119 def load_data(self, data_dir: str, **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 120 """ 121 Load PhysioNet VGRF dataset from the specified directory. 122 123 Args: 124 data_dir: Directory to store/find the dataset 125 **kwargs: Additional arguments (unused for PhysioNet) 126 127 Returns: 128 Tuple of (data_list, names_list) 129 """ 130 # Download dataset if needed 131 dataset_path = self._download_physionet_data(data_dir) 132 133 physionet_data = [] 134 physionet_names = [] 135 self.labels = [] 136 self.subject_types = [] 137 138 # Load all available files 139 for filepath in sorted(glob(os.path.join(dataset_path, "Ga*.txt"))): 140 filename = os.path.basename(filepath) 141 142 # Extract subject type from filename 143 if 'Co' in filename: 144 subject_type = 'Control' 145 label = 'Co' 146 elif 'Pt' in filename: 147 subject_type = 'Patient' 148 label = 'Pt' 149 else: 150 continue # Skip files that don't match expected pattern 151 152 try: 153 # Read the file - PhysioNet files are tab-delimited with variable columns 154 # Column 0: time, Columns 1-16: VGRF sensors, additional columns may exist 155 df = pd.read_csv(filepath, delimiter='\t', header=None) 156 157 # Handle variable number of columns 158 n_cols = min(df.shape[1], 19) # Limit to 19 columns max 159 df = df.iloc[:, :n_cols] 160 161 # Create column names 162 col_names = ['time'] 163 for i in range(1, n_cols): 164 if i <= 8: 165 col_names.append(f'VGRF_L{i}') 166 elif i <= 16: 167 col_names.append(f'VGRF_R{i-8}') 168 else: 169 col_names.append(f'sensor_{i}') 170 171 df.columns = col_names 172 173 # Set time as index 174 df = df.set_index('time') 175 176 # Add subject metadata 177 df['subject_type'] = subject_type 178 df['label'] = label 179 180 physionet_data.append(df) 181 physionet_names.append(filename) 182 self.labels.append(label) 183 self.subject_types.append(subject_type) 184 185 except Exception as e: 186 print(f"Error loading {filename}: {e}") 187 continue 188 189 # Store loaded data 190 self.data = physionet_data 191 self.names = physionet_names 192 193 print(f"Loaded {len(physionet_data)} PhysioNet files") 194 print(f"Subject distribution: {dict(zip(*np.unique(self.subject_types, return_counts=True)))}") 195 196 return physionet_data, physionet_names
Load PhysioNet VGRF dataset from the specified directory.
Args: data_dir: Directory to store/find the dataset **kwargs: Additional arguments (unused for PhysioNet)
Returns: Tuple of (data_list, names_list)
198 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 199 window_size: int = 600, step_size: int = 100) -> List[Dict]: 200 """ 201 Create sliding windows from the PhysioNet dataset. 202 203 Args: 204 data: List of DataFrames containing PhysioNet data 205 names: List of names corresponding to the data 206 window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) 207 step_size: Step size for the sliding window (default: 100) 208 209 Returns: 210 List of dictionaries containing sliding windows for each DataFrame 211 """ 212 windows_data = [] 213 214 for idx, df in enumerate(data): 215 # Remove metadata columns for windowing 216 sensor_columns = [col for col in df.columns if col.startswith('VGRF_') or col.startswith('sensor_')] 217 df_sensors = df[sensor_columns] 218 219 if df_sensors.empty or len(df_sensors) < window_size: 220 continue 221 222 windows = [] 223 224 # Create windows for each sensor 225 for col in sensor_columns: 226 try: 227 window_data = sliding_window(df_sensors[col].values, window_size, step_size) 228 windows.append({"name": col, "data": window_data}) 229 except Exception as e: 230 print(f"Error creating windows for {col} in {names[idx]}: {e}") 231 continue 232 233 if windows: 234 windows_data.append({ 235 "name": names[idx], 236 "windows": windows, 237 "metadata": { 238 "subject_type": df['subject_type'].iloc[0] if 'subject_type' in df.columns else 'Unknown', 239 "label": df['label'].iloc[0] if 'label' in df.columns else 'Unknown', 240 "window_size": window_size, 241 "step_size": step_size, 242 "num_windows": len(windows[0]["data"]) if windows else 0 243 } 244 }) 245 246 return windows_data
Create sliding windows from the PhysioNet dataset.
Args: data: List of DataFrames containing PhysioNet data names: List of names corresponding to the data window_size: Size of the sliding window (default: 600 for 6 seconds at 100Hz) step_size: Step size for the sliding window (default: 100)
Returns: List of dictionaries containing sliding windows for each DataFrame
248 def get_supported_formats(self) -> List[str]: 249 """ 250 Get list of supported file formats for PhysioNet dataset. 251 252 Returns: 253 List of supported file extensions 254 """ 255 return ['.txt']
Get list of supported file formats for PhysioNet dataset.
Returns: List of supported file extensions
257 def get_sensor_info(self) -> Dict[str, List[str]]: 258 """ 259 Get information about sensors in the dataset. 260 261 Returns: 262 Dictionary containing sensor information 263 """ 264 return { 265 'sensors': self.metadata['sensors'], 266 'sampling_frequency': self.metadata['sampling_frequency'], 267 'window_size': self.metadata['window_size'] 268 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
270 def get_subject_info(self) -> Dict[str, str]: 271 """ 272 Get information about subjects in the dataset. 273 274 Returns: 275 Dictionary containing subject information 276 """ 277 return self.metadata['subjects']
Get information about subjects in the dataset.
Returns: Dictionary containing subject information
279 def get_labels(self) -> List[str]: 280 """ 281 Get labels for loaded data. 282 283 Returns: 284 List of labels corresponding to loaded data 285 """ 286 return self.labels
Get labels for loaded data.
Returns: List of labels corresponding to loaded data
288 def filter_by_subject_type(self, subject_type: str) -> Tuple[List[pd.DataFrame], List[str]]: 289 """ 290 Filter loaded data by subject type. 291 292 Args: 293 subject_type: 'Control' or 'Patient' 294 295 Returns: 296 Tuple of (filtered_data, filtered_names) 297 """ 298 if not self.data: 299 raise ValueError("No data loaded. Call load_data() first.") 300 301 filtered_data = [] 302 filtered_names = [] 303 304 for i, df in enumerate(self.data): 305 if df['subject_type'].iloc[0] == subject_type: 306 filtered_data.append(df) 307 filtered_names.append(self.names[i]) 308 309 return filtered_data, filtered_names
Filter loaded data by subject type.
Args: subject_type: 'Control' or 'Patient'
Returns: Tuple of (filtered_data, filtered_names)
Inherited Members
26class HARUPLoader(BaseDatasetLoader): 27 """ 28 HAR-UP dataset loader class. 29 30 This class handles loading and processing of the HAR-UP dataset for human activity recognition 31 and fall detection analysis. 32 """ 33 34 def __init__(self): 35 super().__init__( 36 name="harup", 37 description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition" 38 ) 39 self.metadata = { 40 'sensors': [ 41 'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity', 42 'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity', 43 'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity', 44 'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity', 45 'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity', 46 'BrainSensor', 'Infrared' 47 ], 48 'components': { 49 'Accelerometer': ['x', 'y', 'z'], 50 'AngularVelocity': ['x', 'y', 'z'], 51 'Luminosity': ['illuminance'], 52 'BrainSensor': ['value'], 53 'Infrared': ['value'] 54 }, 55 'sampling_frequency': 100, # Hz 56 'activities': { 57 1: 'Walking', 58 2: 'Walking upstairs', 59 3: 'Walking downstairs', 60 4: 'Sitting', 61 5: 'Standing', 62 6: 'Lying', 63 7: 'Falling forward using hands', 64 8: 'Falling forward using knees', 65 9: 'Falling backwards', 66 10: 'Falling sideward', 67 11: 'Falling sitting in empty chair' 68 } 69 } 70 71 # Features used in HAR-UP 72 self.features = [ 73 'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude', 74 'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness', 75 'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation', 76 'Energy' 77 ] 78 79 def download_harup_data(self, data_dir: str) -> Optional[str]: 80 """ 81 Download HAR-UP dataset if not already present. 82 83 Args: 84 data_dir: Directory to store the dataset 85 86 Returns: 87 Path to the extracted dataset or None if not found 88 """ 89 # Use the utility function to download and extract the dataset 90 download_dataset("harup", data_dir) 91 extract_dataset("harup", data_dir) 92 93 # Check if dataset exists after download attempt 94 dataset_path = os.path.join(data_dir, "DataSet") 95 if not os.path.exists(dataset_path): 96 print("HAR-UP dataset not found after download attempt.") 97 print("Please ensure the dataset is organized in the following structure:") 98 print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv") 99 return None 100 101 return dataset_path 102 103 def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 104 activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, 105 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 106 """ 107 Load HAR-UP dataset from the specified directory. 108 Args: 109 data_dir: Directory containing the dataset 110 subjects: List of subject IDs to load (default: all subjects) 111 activities: List of activity IDs to load (default: all activities) 112 trials: List of trial IDs to load (default: all trials) 113 **kwargs: Additional arguments 114 Returns: 115 Tuple of (data_list, names_list) 116 """ 117 import re 118 import os 119 # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials) 120 if subjects is None: 121 subjects = list(range(1, 5)) # 4 subjects 122 if activities is None: 123 activities = list(range(1, 12)) # 11 activities 124 if trials is None: 125 trials = list(range(1, 4)) # 3 trials 126 127 # Column names as per official HAR-UP documentation 128 columns = [ 129 "Timestamp", 130 "EEG_NeuroSky", 131 "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z", 132 "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z", 133 "Belt_Luminosity", 134 "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z", 135 "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z", 136 "Neck_Luminosity", 137 "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z", 138 "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z", 139 "Pocket_Luminosity", 140 "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z", 141 "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z", 142 "Wrist_Luminosity", 143 "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4" 144 ] 145 146 # If data_dir does not exist, trigger interactive download 147 if not os.path.exists(data_dir): 148 print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...") 149 self.download_harup_data(data_dir) 150 # If still doesn't exist, error out 151 if not os.path.exists(data_dir): 152 print(f"Failed to create or download dataset directory: {data_dir}") 153 return [], [] 154 155 # Find the UP_Fall_Detection_Dataset directory 156 dataset_path = None 157 for entry in os.listdir(data_dir): 158 entry_path = os.path.join(data_dir, entry) 159 if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"): 160 dataset_path = entry_path 161 break 162 if dataset_path is None: 163 print("UP_Fall_Detection_Dataset directory not found in", data_dir) 164 print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.") 165 print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.") 166 return [], [] 167 168 harup_data = [] 169 harup_names = [] 170 171 # Iterate over subjects 172 for subject_id in subjects: 173 subject_folder = f"Subject_{subject_id:02d}" 174 subject_path = os.path.join(dataset_path, subject_folder) 175 if not os.path.isdir(subject_path): 176 continue 177 178 # Initialize empty DataFrame for this subject 179 subject_df = pd.DataFrame() 180 181 # Iterate over activities in order 182 for activity_id in sorted(activities): 183 activity_folder = f"A{activity_id:02d}" 184 activity_path = os.path.join(subject_path, activity_folder) 185 if not os.path.isdir(activity_path): 186 continue 187 188 # Iterate over trials in order 189 for trial_id in sorted(trials): 190 file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv" 191 file_path = os.path.join(activity_path, file_name) 192 name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}" 193 194 try: 195 df = pd.read_csv(file_path, header=0) 196 print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}") 197 df['subject_id'] = subject_id 198 df['activity_id'] = activity_id 199 df['trial_id'] = trial_id 200 df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}") 201 202 # Concatenate to subject's DataFrame 203 subject_df = pd.concat([subject_df, df], ignore_index=True) 204 harup_names.append(name) 205 206 except Exception as e: 207 print(f"Error loading {file_path}: {e}") 208 209 # Add complete subject DataFrame to data list 210 if not subject_df.empty: 211 harup_data.append(subject_df) 212 213 self.data = harup_data 214 self.names = harup_names 215 216 return harup_data, harup_names 217 218 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 219 window_size: int = 100, step_size: int = 50) -> List[Dict]: 220 """ 221 Create sliding windows from the HAR-UP dataset. 222 223 Args: 224 data: List of DataFrames containing HAR-UP data 225 names: List of names corresponding to the data 226 window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) 227 step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz) 228 229 Returns: 230 List of dictionaries containing sliding windows for each DataFrame 231 """ 232 windows_data = [] 233 234 for idx, df in enumerate(data): 235 if df.empty: 236 continue 237 238 windows = [] 239 processed_columns = set() 240 241 # Only use numeric columns (skip TIME and any non-numeric) 242 sensor_columns = [col for col in df.columns if col not in 243 ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME'] 244 and pd.api.types.is_numeric_dtype(df[col])] 245 246 247 # Process each sensor column 248 for col in sensor_columns: 249 if col not in processed_columns: 250 251 window_data = sliding_window(df[col], window_size, step_size) 252 windows.append({"name": col, "data": window_data}) 253 processed_columns.add(col) 254 255 # Include activity ID for each window 256 activity_windows = sliding_window(df["activity_id"], window_size, step_size) 257 windows.append({"name": "activity_id", "data": activity_windows}) 258 259 # For each window, take the most common activity ID as the label 260 labels = [] 261 for window in activity_windows: 262 # Get most common activity in this window 263 unique_vals, counts = np.unique(window, return_counts=True) 264 most_common_idx = np.argmax(counts) 265 labels.append(unique_vals[most_common_idx]) 266 267 windows.append({"name": "labels", "data": np.array(labels)}) 268 269 windows_data.append({"name": names[idx], "windows": windows}) 270 271 return windows_data 272 273 def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True, 274 freq_domain_features: bool = True) -> List[Dict]: 275 """ 276 Extract features from sliding windows using HAR-UP feature extraction methods. 277 Args: 278 windows_data: List of dictionaries containing sliding windows 279 time_domain_features: Whether to extract time domain features 280 freq_domain_features: Whether to extract frequency domain features 281 Returns: 282 List of dictionaries containing extracted features 283 """ 284 # Mapping from original sensor names to actual CSV column names 285 sensor_map = { 286 'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X', 287 'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y', 288 'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z', 289 'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X', 290 'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y', 291 'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z', 292 'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY', 293 'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X', 294 'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y', 295 'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z', 296 'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X', 297 'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y', 298 'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z', 299 'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY', 300 'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X', 301 'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y', 302 'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z', 303 'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X', 304 'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y', 305 'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z', 306 'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY', 307 'WristAccelerometer: x-axis (g)': 'WRST_ACC_X', 308 'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y', 309 'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z', 310 'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X', 311 'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y', 312 'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z', 313 'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY', 314 'BrainSensor': 'HELMET_RAW', 315 'Infrared1': 'IR_1', 316 'Infrared2': 'IR_2', 317 'Infrared3': 'IR_3', 318 'Infrared4': 'IR_4', 319 } 320 extractor = HARUPFeatureExtractor(verbose=True) 321 extractor.config['time_domain'] = time_domain_features 322 extractor.config['frequency_domain'] = freq_domain_features 323 all_features = [] 324 for window_dict in windows_data: 325 name = window_dict["name"] 326 windows = window_dict["windows"] 327 labels = None 328 for window in windows: 329 if window["name"] == "labels": 330 labels = window["data"] 331 break 332 if labels is None: 333 print(f"No labels found for {name}, skipping feature extraction") 334 continue 335 filtered_windows = [] 336 missing = [] 337 for orig_sensor, csv_col in sensor_map.items(): 338 found = False 339 for window in windows: 340 if window["name"] == csv_col: 341 filtered_windows.append(window) 342 found = True 343 break 344 if not found: 345 missing.append((orig_sensor, csv_col)) 346 if missing: 347 print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}") 348 for window in windows: 349 if window["name"] == "activity_id" or window["name"] == "labels": 350 filtered_windows.append(window) 351 features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency']) 352 for i, feature in enumerate(features): 353 window_idx = i // (len(filtered_windows) - 2) # Subtract 2 for labels and activity_id 354 if window_idx < len(labels): 355 feature["label"] = labels[window_idx] 356 all_features.append({"name": name, "features": features}) 357 return all_features 358 359 def get_supported_formats(self) -> List[str]: 360 """ 361 Get list of supported file formats for HAR-UP dataset. 362 363 Returns: 364 List of supported file extensions 365 """ 366 return ['.csv'] 367 368 def get_sensor_info(self) -> Dict[str, List[str]]: 369 """ 370 Get information about sensors in the dataset. 371 372 Returns: 373 Dictionary containing sensor information 374 """ 375 return { 376 'sensors': self.metadata['sensors'], 377 'components': self.metadata['components'], 378 'sampling_frequency': self.metadata['sampling_frequency'] 379 } 380 381 def get_activity_info(self) -> Dict[int, str]: 382 """ 383 Get information about activities in the dataset. 384 385 Returns: 386 Dictionary mapping activity IDs to descriptions 387 """ 388 return self.metadata['activities']
HAR-UP dataset loader class.
This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.
34 def __init__(self): 35 super().__init__( 36 name="harup", 37 description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition" 38 ) 39 self.metadata = { 40 'sensors': [ 41 'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity', 42 'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity', 43 'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity', 44 'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity', 45 'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity', 46 'BrainSensor', 'Infrared' 47 ], 48 'components': { 49 'Accelerometer': ['x', 'y', 'z'], 50 'AngularVelocity': ['x', 'y', 'z'], 51 'Luminosity': ['illuminance'], 52 'BrainSensor': ['value'], 53 'Infrared': ['value'] 54 }, 55 'sampling_frequency': 100, # Hz 56 'activities': { 57 1: 'Walking', 58 2: 'Walking upstairs', 59 3: 'Walking downstairs', 60 4: 'Sitting', 61 5: 'Standing', 62 6: 'Lying', 63 7: 'Falling forward using hands', 64 8: 'Falling forward using knees', 65 9: 'Falling backwards', 66 10: 'Falling sideward', 67 11: 'Falling sitting in empty chair' 68 } 69 } 70 71 # Features used in HAR-UP 72 self.features = [ 73 'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude', 74 'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness', 75 'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation', 76 'Energy' 77 ]
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
79 def download_harup_data(self, data_dir: str) -> Optional[str]: 80 """ 81 Download HAR-UP dataset if not already present. 82 83 Args: 84 data_dir: Directory to store the dataset 85 86 Returns: 87 Path to the extracted dataset or None if not found 88 """ 89 # Use the utility function to download and extract the dataset 90 download_dataset("harup", data_dir) 91 extract_dataset("harup", data_dir) 92 93 # Check if dataset exists after download attempt 94 dataset_path = os.path.join(data_dir, "DataSet") 95 if not os.path.exists(dataset_path): 96 print("HAR-UP dataset not found after download attempt.") 97 print("Please ensure the dataset is organized in the following structure:") 98 print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv") 99 return None 100 101 return dataset_path
Download HAR-UP dataset if not already present.
Args: data_dir: Directory to store the dataset
Returns: Path to the extracted dataset or None if not found
103 def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 104 activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, 105 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 106 """ 107 Load HAR-UP dataset from the specified directory. 108 Args: 109 data_dir: Directory containing the dataset 110 subjects: List of subject IDs to load (default: all subjects) 111 activities: List of activity IDs to load (default: all activities) 112 trials: List of trial IDs to load (default: all trials) 113 **kwargs: Additional arguments 114 Returns: 115 Tuple of (data_list, names_list) 116 """ 117 import re 118 import os 119 # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials) 120 if subjects is None: 121 subjects = list(range(1, 5)) # 4 subjects 122 if activities is None: 123 activities = list(range(1, 12)) # 11 activities 124 if trials is None: 125 trials = list(range(1, 4)) # 3 trials 126 127 # Column names as per official HAR-UP documentation 128 columns = [ 129 "Timestamp", 130 "EEG_NeuroSky", 131 "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z", 132 "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z", 133 "Belt_Luminosity", 134 "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z", 135 "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z", 136 "Neck_Luminosity", 137 "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z", 138 "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z", 139 "Pocket_Luminosity", 140 "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z", 141 "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z", 142 "Wrist_Luminosity", 143 "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4" 144 ] 145 146 # If data_dir does not exist, trigger interactive download 147 if not os.path.exists(data_dir): 148 print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...") 149 self.download_harup_data(data_dir) 150 # If still doesn't exist, error out 151 if not os.path.exists(data_dir): 152 print(f"Failed to create or download dataset directory: {data_dir}") 153 return [], [] 154 155 # Find the UP_Fall_Detection_Dataset directory 156 dataset_path = None 157 for entry in os.listdir(data_dir): 158 entry_path = os.path.join(data_dir, entry) 159 if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"): 160 dataset_path = entry_path 161 break 162 if dataset_path is None: 163 print("UP_Fall_Detection_Dataset directory not found in", data_dir) 164 print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.") 165 print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.") 166 return [], [] 167 168 harup_data = [] 169 harup_names = [] 170 171 # Iterate over subjects 172 for subject_id in subjects: 173 subject_folder = f"Subject_{subject_id:02d}" 174 subject_path = os.path.join(dataset_path, subject_folder) 175 if not os.path.isdir(subject_path): 176 continue 177 178 # Initialize empty DataFrame for this subject 179 subject_df = pd.DataFrame() 180 181 # Iterate over activities in order 182 for activity_id in sorted(activities): 183 activity_folder = f"A{activity_id:02d}" 184 activity_path = os.path.join(subject_path, activity_folder) 185 if not os.path.isdir(activity_path): 186 continue 187 188 # Iterate over trials in order 189 for trial_id in sorted(trials): 190 file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv" 191 file_path = os.path.join(activity_path, file_name) 192 name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}" 193 194 try: 195 df = pd.read_csv(file_path, header=0) 196 print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}") 197 df['subject_id'] = subject_id 198 df['activity_id'] = activity_id 199 df['trial_id'] = trial_id 200 df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}") 201 202 # Concatenate to subject's DataFrame 203 subject_df = pd.concat([subject_df, df], ignore_index=True) 204 harup_names.append(name) 205 206 except Exception as e: 207 print(f"Error loading {file_path}: {e}") 208 209 # Add complete subject DataFrame to data list 210 if not subject_df.empty: 211 harup_data.append(subject_df) 212 213 self.data = harup_data 214 self.names = harup_names 215 216 return harup_data, harup_names
Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)
218 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 219 window_size: int = 100, step_size: int = 50) -> List[Dict]: 220 """ 221 Create sliding windows from the HAR-UP dataset. 222 223 Args: 224 data: List of DataFrames containing HAR-UP data 225 names: List of names corresponding to the data 226 window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) 227 step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz) 228 229 Returns: 230 List of dictionaries containing sliding windows for each DataFrame 231 """ 232 windows_data = [] 233 234 for idx, df in enumerate(data): 235 if df.empty: 236 continue 237 238 windows = [] 239 processed_columns = set() 240 241 # Only use numeric columns (skip TIME and any non-numeric) 242 sensor_columns = [col for col in df.columns if col not in 243 ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME'] 244 and pd.api.types.is_numeric_dtype(df[col])] 245 246 247 # Process each sensor column 248 for col in sensor_columns: 249 if col not in processed_columns: 250 251 window_data = sliding_window(df[col], window_size, step_size) 252 windows.append({"name": col, "data": window_data}) 253 processed_columns.add(col) 254 255 # Include activity ID for each window 256 activity_windows = sliding_window(df["activity_id"], window_size, step_size) 257 windows.append({"name": "activity_id", "data": activity_windows}) 258 259 # For each window, take the most common activity ID as the label 260 labels = [] 261 for window in activity_windows: 262 # Get most common activity in this window 263 unique_vals, counts = np.unique(window, return_counts=True) 264 most_common_idx = np.argmax(counts) 265 labels.append(unique_vals[most_common_idx]) 266 267 windows.append({"name": "labels", "data": np.array(labels)}) 268 269 windows_data.append({"name": names[idx], "windows": windows}) 270 271 return windows_data
Create sliding windows from the HAR-UP dataset.
Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
Returns: List of dictionaries containing sliding windows for each DataFrame
273 def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True, 274 freq_domain_features: bool = True) -> List[Dict]: 275 """ 276 Extract features from sliding windows using HAR-UP feature extraction methods. 277 Args: 278 windows_data: List of dictionaries containing sliding windows 279 time_domain_features: Whether to extract time domain features 280 freq_domain_features: Whether to extract frequency domain features 281 Returns: 282 List of dictionaries containing extracted features 283 """ 284 # Mapping from original sensor names to actual CSV column names 285 sensor_map = { 286 'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X', 287 'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y', 288 'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z', 289 'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X', 290 'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y', 291 'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z', 292 'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY', 293 'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X', 294 'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y', 295 'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z', 296 'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X', 297 'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y', 298 'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z', 299 'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY', 300 'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X', 301 'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y', 302 'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z', 303 'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X', 304 'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y', 305 'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z', 306 'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY', 307 'WristAccelerometer: x-axis (g)': 'WRST_ACC_X', 308 'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y', 309 'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z', 310 'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X', 311 'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y', 312 'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z', 313 'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY', 314 'BrainSensor': 'HELMET_RAW', 315 'Infrared1': 'IR_1', 316 'Infrared2': 'IR_2', 317 'Infrared3': 'IR_3', 318 'Infrared4': 'IR_4', 319 } 320 extractor = HARUPFeatureExtractor(verbose=True) 321 extractor.config['time_domain'] = time_domain_features 322 extractor.config['frequency_domain'] = freq_domain_features 323 all_features = [] 324 for window_dict in windows_data: 325 name = window_dict["name"] 326 windows = window_dict["windows"] 327 labels = None 328 for window in windows: 329 if window["name"] == "labels": 330 labels = window["data"] 331 break 332 if labels is None: 333 print(f"No labels found for {name}, skipping feature extraction") 334 continue 335 filtered_windows = [] 336 missing = [] 337 for orig_sensor, csv_col in sensor_map.items(): 338 found = False 339 for window in windows: 340 if window["name"] == csv_col: 341 filtered_windows.append(window) 342 found = True 343 break 344 if not found: 345 missing.append((orig_sensor, csv_col)) 346 if missing: 347 print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}") 348 for window in windows: 349 if window["name"] == "activity_id" or window["name"] == "labels": 350 filtered_windows.append(window) 351 features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency']) 352 for i, feature in enumerate(features): 353 window_idx = i // (len(filtered_windows) - 2) # Subtract 2 for labels and activity_id 354 if window_idx < len(labels): 355 feature["label"] = labels[window_idx] 356 all_features.append({"name": name, "features": features}) 357 return all_features
Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features
359 def get_supported_formats(self) -> List[str]: 360 """ 361 Get list of supported file formats for HAR-UP dataset. 362 363 Returns: 364 List of supported file extensions 365 """ 366 return ['.csv']
Get list of supported file formats for HAR-UP dataset.
Returns: List of supported file extensions
368 def get_sensor_info(self) -> Dict[str, List[str]]: 369 """ 370 Get information about sensors in the dataset. 371 372 Returns: 373 Dictionary containing sensor information 374 """ 375 return { 376 'sensors': self.metadata['sensors'], 377 'components': self.metadata['components'], 378 'sampling_frequency': self.metadata['sampling_frequency'] 379 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
381 def get_activity_info(self) -> Dict[int, str]: 382 """ 383 Get information about activities in the dataset. 384 385 Returns: 386 Dictionary mapping activity IDs to descriptions 387 """ 388 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to descriptions
Inherited Members
24class UrFallLoader(BaseDatasetLoader): 25 """ 26 UrFall dataset loader class. 27 28 This class handles loading and processing of the UrFall dataset for fall detection. 29 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 30 and pre-extracted features from depth maps. 31 """ 32 33 def __init__(self): 34 super().__init__( 35 name="urfall", 36 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data" 37 ) 38 self.metadata = { 39 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 40 'camera': 'cam0', # Front camera 41 'sampling_frequency': 30, # Depth/RGB camera fps 42 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 43 'activities': { 44 -1: 'Not lying (standing/walking)', 45 0: 'Falling (transient)', 46 1: 'Lying on ground' 47 }, 48 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 49 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 50 'feature_columns': [ 51 'sequence_name', 52 'frame_number', 53 'label', 54 'HeightWidthRatio', 55 'MajorMinorRatio', 56 'BoundingBoxOccupancy', 57 'MaxStdXZ', 58 'HHmaxRatio', 59 'H', 60 'D', 61 'P40' 62 ], 63 'feature_descriptions': { 64 'HeightWidthRatio': 'Bounding box height to width ratio', 65 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 66 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 67 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 68 'HHmaxRatio': 'Human height in frame to standing height ratio', 69 'H': 'Actual height in mm', 70 'D': 'Distance of person center to floor in mm', 71 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 72 } 73 } 74 75 def load_data(self, data_dir: str, 76 data_types: Optional[List[str]] = None, 77 sequences: Optional[List[str]] = None, 78 use_falls: bool = True, 79 use_adls: bool = True, 80 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 81 """ 82 Load UrFall dataset from the specified directory. 83 84 Args: 85 data_dir: Directory containing the dataset 86 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 87 'synchronization', 'video', 'features' (default: ['features']) 88 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 89 If None, loads all based on use_falls and use_adls 90 use_falls: Whether to load fall sequences (default: True) 91 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 92 **kwargs: Additional arguments 93 94 Returns: 95 Tuple of (data_list, names_list) 96 """ 97 # Default to loading pre-extracted features if not specified 98 if data_types is None: 99 data_types = ['features'] 100 101 # Validate data types 102 valid_types = set(self.metadata['data_types']) 103 requested_types = set(data_types) 104 invalid_types = requested_types - valid_types 105 if invalid_types: 106 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 107 108 # Create directory if it doesn't exist 109 os.makedirs(data_dir, exist_ok=True) 110 111 data_list = [] 112 names_list = [] 113 114 # Load pre-extracted features (CSV files) 115 if 'features' in data_types: 116 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 117 data_list.extend(features_data) 118 names_list.extend(features_names) 119 120 # Load raw accelerometer data 121 if 'accelerometer' in data_types: 122 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 123 data_list.extend(accel_data) 124 names_list.extend(accel_names) 125 126 # Load synchronization data 127 if 'synchronization' in data_types: 128 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 129 data_list.extend(sync_data) 130 names_list.extend(sync_names) 131 132 # Note: Depth, RGB, and Video data are image/video files 133 # These would require specialized loading and are not typically loaded into DataFrames 134 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 135 print("Note: Depth, RGB, and Video data types contain image/video files.") 136 print("These are not loaded into DataFrames but their paths can be accessed.") 137 print("Use the get_file_paths() method to retrieve paths to these files.") 138 139 self.data = data_list 140 return data_list, names_list 141 142 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 143 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 144 """ 145 Load pre-extracted features from CSV files. 146 147 Args: 148 data_dir: Directory containing the dataset 149 sequences: Specific sequences to load 150 use_falls: Whether to include fall sequences 151 use_adls: Whether to include ADL sequences 152 153 Returns: 154 Tuple of (data_list, names_list) 155 """ 156 data_list = [] 157 names_list = [] 158 159 # Load falls features 160 if use_falls: 161 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 162 if os.path.exists(falls_csv): 163 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 164 165 # Filter by specific sequences if provided 166 if sequences is not None: 167 fall_sequences = [s for s in sequences if s.startswith('fall-')] 168 if fall_sequences: 169 df = df[df['sequence_name'].isin(fall_sequences)] 170 171 # Add metadata columns 172 df['activity_type'] = 'fall' 173 df['activity_id'] = 1 # Falls are labeled as 1 174 175 data_list.append(df) 176 names_list.append("urfall-cam0-falls") 177 else: 178 print(f"Warning: Falls features file not found at {falls_csv}") 179 180 # Load ADLs features 181 if use_adls: 182 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 183 if os.path.exists(adls_csv): 184 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 185 186 # Filter by specific sequences if provided 187 if sequences is not None: 188 adl_sequences = [s for s in sequences if s.startswith('adl-')] 189 if adl_sequences: 190 df = df[df['sequence_name'].isin(adl_sequences)] 191 192 # Add metadata columns 193 df['activity_type'] = 'adl' 194 df['activity_id'] = 0 # ADLs are labeled as 0 195 196 data_list.append(df) 197 names_list.append("urfall-cam0-adls") 198 else: 199 print(f"Warning: ADLs features file not found at {adls_csv}") 200 201 return data_list, names_list 202 203 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 204 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 205 """ 206 Load accelerometer CSV data files. 207 208 Args: 209 data_dir: Directory containing the dataset 210 sequences: Specific sequences to load 211 use_falls: Whether to include fall sequences 212 use_adls: Whether to include ADL sequences 213 214 Returns: 215 Tuple of (data_list, names_list) 216 """ 217 data_list = [] 218 names_list = [] 219 220 # Determine which sequences to load 221 seq_list = [] 222 if sequences is not None: 223 seq_list = sequences 224 else: 225 if use_falls: 226 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 227 if use_adls: 228 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 229 230 # Load accelerometer data for each sequence 231 for seq in seq_list: 232 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 233 if os.path.exists(accel_file): 234 try: 235 df = pd.read_csv(accel_file) 236 df['sequence_name'] = seq 237 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 238 df['activity_id'] = 1 if seq.startswith('fall-') else 0 239 data_list.append(df) 240 names_list.append(f"{seq}-accelerometer") 241 except Exception as e: 242 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 243 244 return data_list, names_list 245 246 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 247 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 248 """ 249 Load synchronization CSV data files. 250 251 Args: 252 data_dir: Directory containing the dataset 253 sequences: Specific sequences to load 254 use_falls: Whether to include fall sequences 255 use_adls: Whether to include ADL sequences 256 257 Returns: 258 Tuple of (data_list, names_list) 259 """ 260 data_list = [] 261 names_list = [] 262 263 # Determine which sequences to load 264 seq_list = [] 265 if sequences is not None: 266 seq_list = sequences 267 else: 268 if use_falls: 269 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 270 if use_adls: 271 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 272 273 # Load synchronization data for each sequence 274 for seq in seq_list: 275 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 276 if os.path.exists(sync_file): 277 try: 278 df = pd.read_csv(sync_file) 279 df['sequence_name'] = seq 280 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 281 df['activity_id'] = 1 if seq.startswith('fall-') else 0 282 data_list.append(df) 283 names_list.append(f"{seq}-synchronization") 284 except Exception as e: 285 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 286 287 return data_list, names_list 288 289 def get_file_paths(self, data_dir: str, data_type: str, 290 sequences: Optional[List[str]] = None, 291 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 292 """ 293 Get file paths for image/video data types (depth, RGB, video). 294 295 Args: 296 data_dir: Directory containing the dataset 297 data_type: Type of data ('depth', 'rgb', 'video') 298 sequences: Specific sequences to get paths for 299 use_falls: Whether to include fall sequences 300 use_adls: Whether to include ADL sequences 301 302 Returns: 303 Dictionary mapping sequence names to file paths 304 """ 305 if data_type not in ['depth', 'rgb', 'video']: 306 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 307 308 file_paths = {} 309 310 # Determine which sequences to include 311 seq_list = [] 312 if sequences is not None: 313 seq_list = sequences 314 else: 315 if use_falls: 316 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 317 if use_adls: 318 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 319 320 # Map data type to file extension 321 extension_map = { 322 'depth': '-cam0-d.zip', 323 'rgb': '-cam0-rgb.zip', 324 'video': '-cam0.mp4' 325 } 326 327 ext = extension_map[data_type] 328 329 for seq in seq_list: 330 file_path = os.path.join(data_dir, f"{seq}{ext}") 331 if os.path.exists(file_path): 332 file_paths[seq] = file_path 333 334 return file_paths 335 336 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 337 window_size: int = 30, step_size: int = 15) -> List[Dict]: 338 """ 339 Create sliding windows from the loaded data. 340 341 Args: 342 data: List of DataFrames containing the dataset 343 names: List of names corresponding to each DataFrame 344 window_size: Size of the sliding window (default: 30 frames for depth features) 345 step_size: Step size for sliding window (default: 15 frames) 346 347 Returns: 348 List of dictionaries containing windowed data 349 """ 350 windows_data = [] 351 352 for idx, df in enumerate(data): 353 if df.empty: 354 continue 355 356 # Get numeric feature columns (exclude metadata columns) 357 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 358 feature_cols = [col for col in df.columns 359 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 360 361 if not feature_cols: 362 continue 363 364 windows = [] 365 366 # Create windows for each feature column 367 for col in feature_cols: 368 win = sliding_window(df[col].values, window_size, step_size) 369 windows.append({"name": col, "data": win}) 370 371 # Create windows for labels if present 372 if 'label' in df.columns: 373 label_windows = sliding_window(df['label'].values, window_size, step_size) 374 # Majority voting for each window 375 labels = [] 376 for w in label_windows: 377 vals, counts = np.unique(w, return_counts=True) 378 labels.append(vals[np.argmax(counts)]) 379 windows.append({"name": "labels", "data": np.array(labels)}) 380 381 # Create activity_id windows 382 if 'activity_id' in df.columns: 383 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 384 windows.append({"name": "activity_id", "data": activity_windows}) 385 386 windows_data.append({"name": names[idx], "windows": windows}) 387 388 return windows_data 389 390 def get_supported_formats(self) -> List[str]: 391 """ 392 Get list of supported file formats for UrFall dataset. 393 394 Returns: 395 List of supported file extensions 396 """ 397 return ['.csv', '.zip', '.mp4'] 398 399 def get_sensor_info(self) -> Dict[str, any]: 400 """ 401 Get information about sensors in the dataset. 402 403 Returns: 404 Dictionary containing sensor information 405 """ 406 return { 407 'data_types': self.metadata['data_types'], 408 'camera': self.metadata['camera'], 409 'sampling_frequency': self.metadata['sampling_frequency'], 410 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 411 } 412 413 def get_activity_info(self) -> Dict[int, str]: 414 """ 415 Get information about activities in the dataset. 416 417 Returns: 418 Dictionary mapping activity IDs to labels 419 """ 420 return self.metadata['activities'] 421 422 def get_feature_info(self) -> Dict[str, str]: 423 """ 424 Get information about pre-extracted features. 425 426 Returns: 427 Dictionary mapping feature names to descriptions 428 """ 429 return self.metadata['feature_descriptions']
UrFall dataset loader class.
This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.
33 def __init__(self): 34 super().__init__( 35 name="urfall", 36 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data" 37 ) 38 self.metadata = { 39 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 40 'camera': 'cam0', # Front camera 41 'sampling_frequency': 30, # Depth/RGB camera fps 42 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 43 'activities': { 44 -1: 'Not lying (standing/walking)', 45 0: 'Falling (transient)', 46 1: 'Lying on ground' 47 }, 48 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 49 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 50 'feature_columns': [ 51 'sequence_name', 52 'frame_number', 53 'label', 54 'HeightWidthRatio', 55 'MajorMinorRatio', 56 'BoundingBoxOccupancy', 57 'MaxStdXZ', 58 'HHmaxRatio', 59 'H', 60 'D', 61 'P40' 62 ], 63 'feature_descriptions': { 64 'HeightWidthRatio': 'Bounding box height to width ratio', 65 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 66 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 67 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 68 'HHmaxRatio': 'Human height in frame to standing height ratio', 69 'H': 'Actual height in mm', 70 'D': 'Distance of person center to floor in mm', 71 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 72 } 73 }
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
75 def load_data(self, data_dir: str, 76 data_types: Optional[List[str]] = None, 77 sequences: Optional[List[str]] = None, 78 use_falls: bool = True, 79 use_adls: bool = True, 80 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 81 """ 82 Load UrFall dataset from the specified directory. 83 84 Args: 85 data_dir: Directory containing the dataset 86 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 87 'synchronization', 'video', 'features' (default: ['features']) 88 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 89 If None, loads all based on use_falls and use_adls 90 use_falls: Whether to load fall sequences (default: True) 91 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 92 **kwargs: Additional arguments 93 94 Returns: 95 Tuple of (data_list, names_list) 96 """ 97 # Default to loading pre-extracted features if not specified 98 if data_types is None: 99 data_types = ['features'] 100 101 # Validate data types 102 valid_types = set(self.metadata['data_types']) 103 requested_types = set(data_types) 104 invalid_types = requested_types - valid_types 105 if invalid_types: 106 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 107 108 # Create directory if it doesn't exist 109 os.makedirs(data_dir, exist_ok=True) 110 111 data_list = [] 112 names_list = [] 113 114 # Load pre-extracted features (CSV files) 115 if 'features' in data_types: 116 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 117 data_list.extend(features_data) 118 names_list.extend(features_names) 119 120 # Load raw accelerometer data 121 if 'accelerometer' in data_types: 122 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 123 data_list.extend(accel_data) 124 names_list.extend(accel_names) 125 126 # Load synchronization data 127 if 'synchronization' in data_types: 128 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 129 data_list.extend(sync_data) 130 names_list.extend(sync_names) 131 132 # Note: Depth, RGB, and Video data are image/video files 133 # These would require specialized loading and are not typically loaded into DataFrames 134 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 135 print("Note: Depth, RGB, and Video data types contain image/video files.") 136 print("These are not loaded into DataFrames but their paths can be accessed.") 137 print("Use the get_file_paths() method to retrieve paths to these files.") 138 139 self.data = data_list 140 return data_list, names_list
Load UrFall dataset from the specified directory.
Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments
Returns: Tuple of (data_list, names_list)
289 def get_file_paths(self, data_dir: str, data_type: str, 290 sequences: Optional[List[str]] = None, 291 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 292 """ 293 Get file paths for image/video data types (depth, RGB, video). 294 295 Args: 296 data_dir: Directory containing the dataset 297 data_type: Type of data ('depth', 'rgb', 'video') 298 sequences: Specific sequences to get paths for 299 use_falls: Whether to include fall sequences 300 use_adls: Whether to include ADL sequences 301 302 Returns: 303 Dictionary mapping sequence names to file paths 304 """ 305 if data_type not in ['depth', 'rgb', 'video']: 306 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 307 308 file_paths = {} 309 310 # Determine which sequences to include 311 seq_list = [] 312 if sequences is not None: 313 seq_list = sequences 314 else: 315 if use_falls: 316 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 317 if use_adls: 318 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 319 320 # Map data type to file extension 321 extension_map = { 322 'depth': '-cam0-d.zip', 323 'rgb': '-cam0-rgb.zip', 324 'video': '-cam0.mp4' 325 } 326 327 ext = extension_map[data_type] 328 329 for seq in seq_list: 330 file_path = os.path.join(data_dir, f"{seq}{ext}") 331 if os.path.exists(file_path): 332 file_paths[seq] = file_path 333 334 return file_paths
Get file paths for image/video data types (depth, RGB, video).
Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences
Returns: Dictionary mapping sequence names to file paths
336 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 337 window_size: int = 30, step_size: int = 15) -> List[Dict]: 338 """ 339 Create sliding windows from the loaded data. 340 341 Args: 342 data: List of DataFrames containing the dataset 343 names: List of names corresponding to each DataFrame 344 window_size: Size of the sliding window (default: 30 frames for depth features) 345 step_size: Step size for sliding window (default: 15 frames) 346 347 Returns: 348 List of dictionaries containing windowed data 349 """ 350 windows_data = [] 351 352 for idx, df in enumerate(data): 353 if df.empty: 354 continue 355 356 # Get numeric feature columns (exclude metadata columns) 357 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 358 feature_cols = [col for col in df.columns 359 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 360 361 if not feature_cols: 362 continue 363 364 windows = [] 365 366 # Create windows for each feature column 367 for col in feature_cols: 368 win = sliding_window(df[col].values, window_size, step_size) 369 windows.append({"name": col, "data": win}) 370 371 # Create windows for labels if present 372 if 'label' in df.columns: 373 label_windows = sliding_window(df['label'].values, window_size, step_size) 374 # Majority voting for each window 375 labels = [] 376 for w in label_windows: 377 vals, counts = np.unique(w, return_counts=True) 378 labels.append(vals[np.argmax(counts)]) 379 windows.append({"name": "labels", "data": np.array(labels)}) 380 381 # Create activity_id windows 382 if 'activity_id' in df.columns: 383 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 384 windows.append({"name": "activity_id", "data": activity_windows}) 385 386 windows_data.append({"name": names[idx], "windows": windows}) 387 388 return windows_data
Create sliding windows from the loaded data.
Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)
Returns: List of dictionaries containing windowed data
390 def get_supported_formats(self) -> List[str]: 391 """ 392 Get list of supported file formats for UrFall dataset. 393 394 Returns: 395 List of supported file extensions 396 """ 397 return ['.csv', '.zip', '.mp4']
Get list of supported file formats for UrFall dataset.
Returns: List of supported file extensions
399 def get_sensor_info(self) -> Dict[str, any]: 400 """ 401 Get information about sensors in the dataset. 402 403 Returns: 404 Dictionary containing sensor information 405 """ 406 return { 407 'data_types': self.metadata['data_types'], 408 'camera': self.metadata['camera'], 409 'sampling_frequency': self.metadata['sampling_frequency'], 410 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 411 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
413 def get_activity_info(self) -> Dict[int, str]: 414 """ 415 Get information about activities in the dataset. 416 417 Returns: 418 Dictionary mapping activity IDs to labels 419 """ 420 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to labels
422 def get_feature_info(self) -> Dict[str, str]: 423 """ 424 Get information about pre-extracted features. 425 426 Returns: 427 Dictionary mapping feature names to descriptions 428 """ 429 return self.metadata['feature_descriptions']
Get information about pre-extracted features.
Returns: Dictionary mapping feature names to descriptions
Inherited Members
170def load_daphnet_data(data_dir: str): 171 """ 172 Legacy function for loading Daphnet data. 173 174 Args: 175 data_dir: Directory to store the dataset 176 177 Returns: 178 Tuple of (data_list, names_list) 179 """ 180 loader = DaphnetLoader() 181 return loader.load_data(data_dir)
Legacy function for loading Daphnet data.
Args: data_dir: Directory to store the dataset
Returns: Tuple of (data_list, names_list)
184def create_sliding_windows(daphnet, daphnet_names, window_size=192, step_size=32): 185 """ 186 Legacy function for creating sliding windows. 187 188 Args: 189 daphnet: List of dataframes containing Daphnet data 190 daphnet_names: List of names of the Daphnet dataframes 191 window_size: Size of the sliding window 192 step_size: Step size for the sliding window 193 194 Returns: 195 List of dictionaries containing sliding windows for each DataFrame 196 """ 197 loader = DaphnetLoader() 198 return loader.create_sliding_windows(daphnet, daphnet_names, window_size, step_size)
Legacy function for creating sliding windows.
Args: daphnet: List of dataframes containing Daphnet data daphnet_names: List of names of the Daphnet dataframes window_size: Size of the sliding window step_size: Step size for the sliding window
Returns: List of dictionaries containing sliding windows for each DataFrame
104def load_mobifall_data(): 105 """ 106 Legacy function for loading MobiFall data. 107 108 Returns: 109 Tuple of (data_list, names_list) 110 """ 111 loader = MobiFallLoader() 112 return loader.load_data("")
Legacy function for loading MobiFall data.
Returns: Tuple of (data_list, names_list)
104def load_arduous_data(): 105 """ 106 Legacy function for loading Arduous data. 107 108 Returns: 109 Tuple of (data_list, names_list) 110 """ 111 loader = ArduousLoader() 112 return loader.load_data("")
Legacy function for loading Arduous data.
Returns: Tuple of (data_list, names_list)
313def load_physionet_data(data_dir: str) -> Tuple[List[pd.DataFrame], List[str]]: 314 """ 315 Legacy function to load PhysioNet data. 316 317 Args: 318 data_dir: Directory containing the dataset 319 320 Returns: 321 Tuple of (data_list, names_list) 322 """ 323 loader = PhysioNetLoader() 324 return loader.load_data(data_dir)
Legacy function to load PhysioNet data.
Args: data_dir: Directory containing the dataset
Returns: Tuple of (data_list, names_list)
327def create_physionet_windows(data: List[pd.DataFrame], names: List[str], 328 window_size: int = 600, step_size: int = 100) -> List[Dict]: 329 """ 330 Legacy function to create sliding windows from PhysioNet data. 331 332 Args: 333 data: List of DataFrames 334 names: List of names 335 window_size: Size of sliding window 336 step_size: Step size for sliding window 337 338 Returns: 339 List of sliding window dictionaries 340 """ 341 loader = PhysioNetLoader() 342 return loader.create_sliding_windows(data, names, window_size, step_size)
Legacy function to create sliding windows from PhysioNet data.
Args: data: List of DataFrames names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of sliding window dictionaries
392def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None): 393 """ 394 Legacy function for loading HAR-UP data. 395 396 Args: 397 data_dir: Directory containing the dataset 398 subjects: List of subject IDs to load (default: all subjects) 399 activities: List of activity IDs to load (default: all activities) 400 trials: List of trial IDs to load (default: all trials) 401 402 Returns: 403 Tuple of (data_list, names_list) 404 """ 405 loader = HARUPLoader() 406 return loader.load_data(data_dir, subjects, activities, trials)
Legacy function for loading HAR-UP data.
Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)
Returns: Tuple of (data_list, names_list)
409def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50): 410 """ 411 Legacy function for creating sliding windows from HAR-UP data. 412 413 Args: 414 harup_data: List of dataframes containing HAR-UP data 415 harup_names: List of names of the HAR-UP dataframes 416 window_size: Size of the sliding window 417 step_size: Step size for the sliding window 418 419 Returns: 420 List of dictionaries containing sliding windows for each DataFrame 421 """ 422 loader = HARUPLoader() 423 return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)
Legacy function for creating sliding windows from HAR-UP data.
Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window
Returns: List of dictionaries containing sliding windows for each DataFrame
426def extract_harup_features(windows_data, time_domain=True, freq_domain=True): 427 """ 428 Legacy function for extracting features from HAR-UP windows. 429 430 Args: 431 windows_data: List of dictionaries containing sliding windows 432 time_domain: Whether to extract time domain features 433 freq_domain: Whether to extract frequency domain features 434 435 Returns: 436 List of dictionaries containing extracted features 437 """ 438 loader = HARUPLoader() 439 return loader.extract_features(windows_data, time_domain, freq_domain)
Legacy function for extracting features from HAR-UP windows.
Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features
Returns: List of dictionaries containing extracted features
433def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 434 sequences: Optional[List[str]] = None, 435 use_falls: bool = True, use_adls: bool = True): 436 """ 437 Load UrFall dataset using the legacy function interface. 438 439 Args: 440 data_dir: Directory containing the dataset 441 data_types: List of data types to load 442 sequences: List of specific sequences to load 443 use_falls: Whether to load fall sequences 444 use_adls: Whether to load ADL sequences 445 446 Returns: 447 Tuple of (data_list, names_list) 448 """ 449 loader = UrFallLoader() 450 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 451 use_falls=use_falls, use_adls=use_adls)
Load UrFall dataset using the legacy function interface.
Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences
Returns: Tuple of (data_list, names_list)
454def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 455 """ 456 Create sliding windows from UrFall data using the legacy function interface. 457 458 Args: 459 urfall_data: List of DataFrames 460 urfall_names: List of names 461 window_size: Size of sliding window 462 step_size: Step size for sliding window 463 464 Returns: 465 List of dictionaries containing windowed data 466 """ 467 loader = UrFallLoader() 468 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
Create sliding windows from UrFall data using the legacy function interface.
Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of dictionaries containing windowed data
25def download_dataset(dataset_name, data_dir): 26 """Download the dataset.""" 27 if dataset_name == "daphnet": 28 download_daphnet_data(data_dir) 29 elif dataset_name == "mobifall": 30 download_mobifall_data(data_dir) 31 elif dataset_name == "arduous": 32 download_arduous_data(data_dir) 33 elif dataset_name == "harup": 34 download_harup_data(data_dir) 35 elif dataset_name == "urfall": 36 download_urfall_data(data_dir) 37 elif dataset_name == "physionet": 38 # PhysioNet dataset is handled by the PhysioNetLoader itself 39 pass 40 else: 41 raise ValueError(f"Dataset {dataset_name} not supported.")
Download the dataset.
243def extract_dataset(dataset_name, data_dir): 244 """Extract the dataset.""" 245 if dataset_name == "daphnet": 246 extract_daphnet_data(data_dir) 247 elif dataset_name == "mobifall": 248 extract_mobifall_data(data_dir) 249 elif dataset_name == "arduous": 250 extract_arduous_data(data_dir) 251 elif dataset_name == "harup": 252 extract_harup_data(data_dir) 253 elif dataset_name == "urfall": 254 extract_urfall_data(data_dir) 255 elif dataset_name == "physionet": 256 # PhysioNet dataset is handled by the PhysioNetLoader itself 257 pass 258 else: 259 raise ValueError(f"Dataset {dataset_name} not supported.")
Extract the dataset.
53def get_dataset_manager(): 54 """Get the singleton DatasetManager instance.""" 55 return DatasetManager()
Get the singleton DatasetManager instance.
58def get_available_datasets(): 59 """Get list of available dataset names.""" 60 return DatasetManager().get_available_components()
Get list of available dataset names.
63def load_dataset(name: str, data_dir: str, **kwargs): 64 """ 65 Load a dataset using the DatasetManager. 66 67 Args: 68 name: Name of the dataset loader 69 data_dir: Directory containing the dataset 70 **kwargs: Additional arguments for the loader 71 72 Returns: 73 Dataset loader instance with loaded data 74 """ 75 return DatasetManager().load_dataset(name, data_dir, **kwargs)
Load a dataset using the DatasetManager.
Args: name: Name of the dataset loader data_dir: Directory containing the dataset **kwargs: Additional arguments for the loader
Returns: Dataset loader instance with loaded data