gaitsetpy.dataset.urfall
UrFall Dataset Loader and Utils. Maintainer: @aharshit123456
This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, and pre-extracted features from depth maps.
Reference:
- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html
- Dataset: University of Rzeszow Fall Detection Dataset
1''' 2UrFall Dataset Loader and Utils. 3Maintainer: @aharshit123456 4 5This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. 6UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, 7and pre-extracted features from depth maps. 8 9Reference: 10- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html 11- Dataset: University of Rzeszow Fall Detection Dataset 12''' 13 14import os 15import pandas as pd 16import numpy as np 17from typing import List, Dict, Tuple, Optional, Set 18from glob import glob 19from ..core.base_classes import BaseDatasetLoader 20from .utils import download_dataset, extract_dataset, sliding_window 21 22 23class UrFallLoader(BaseDatasetLoader): 24 """ 25 UrFall dataset loader class. 26 27 This class handles loading and processing of the UrFall dataset for fall detection. 28 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 29 and pre-extracted features from depth maps. 30 """ 31 32 def __init__(self): 33 super().__init__( 34 name="urfall", 35 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data" 36 ) 37 self.metadata = { 38 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 39 'camera': 'cam0', # Front camera 40 'sampling_frequency': 30, # Depth/RGB camera fps 41 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 42 'activities': { 43 -1: 'Not lying (standing/walking)', 44 0: 'Falling (transient)', 45 1: 'Lying on ground' 46 }, 47 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 48 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 49 'feature_columns': [ 50 'sequence_name', 51 'frame_number', 52 'label', 53 'HeightWidthRatio', 54 'MajorMinorRatio', 55 'BoundingBoxOccupancy', 56 'MaxStdXZ', 57 'HHmaxRatio', 58 'H', 59 'D', 60 'P40' 61 ], 62 'feature_descriptions': { 63 'HeightWidthRatio': 'Bounding box height to width ratio', 64 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 65 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 66 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 67 'HHmaxRatio': 'Human height in frame to standing height ratio', 68 'H': 'Actual height in mm', 69 'D': 'Distance of person center to floor in mm', 70 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 71 } 72 } 73 74 def load_data(self, data_dir: str, 75 data_types: Optional[List[str]] = None, 76 sequences: Optional[List[str]] = None, 77 use_falls: bool = True, 78 use_adls: bool = True, 79 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 80 """ 81 Load UrFall dataset from the specified directory. 82 83 Args: 84 data_dir: Directory containing the dataset 85 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 86 'synchronization', 'video', 'features' (default: ['features']) 87 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 88 If None, loads all based on use_falls and use_adls 89 use_falls: Whether to load fall sequences (default: True) 90 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 91 **kwargs: Additional arguments 92 93 Returns: 94 Tuple of (data_list, names_list) 95 """ 96 # Default to loading pre-extracted features if not specified 97 if data_types is None: 98 data_types = ['features'] 99 100 # Validate data types 101 valid_types = set(self.metadata['data_types']) 102 requested_types = set(data_types) 103 invalid_types = requested_types - valid_types 104 if invalid_types: 105 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 106 107 # Create directory if it doesn't exist 108 os.makedirs(data_dir, exist_ok=True) 109 110 data_list = [] 111 names_list = [] 112 113 # Load pre-extracted features (CSV files) 114 if 'features' in data_types: 115 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 116 data_list.extend(features_data) 117 names_list.extend(features_names) 118 119 # Load raw accelerometer data 120 if 'accelerometer' in data_types: 121 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 122 data_list.extend(accel_data) 123 names_list.extend(accel_names) 124 125 # Load synchronization data 126 if 'synchronization' in data_types: 127 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 128 data_list.extend(sync_data) 129 names_list.extend(sync_names) 130 131 # Note: Depth, RGB, and Video data are image/video files 132 # These would require specialized loading and are not typically loaded into DataFrames 133 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 134 print("Note: Depth, RGB, and Video data types contain image/video files.") 135 print("These are not loaded into DataFrames but their paths can be accessed.") 136 print("Use the get_file_paths() method to retrieve paths to these files.") 137 138 self.data = data_list 139 return data_list, names_list 140 141 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 142 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 143 """ 144 Load pre-extracted features from CSV files. 145 146 Args: 147 data_dir: Directory containing the dataset 148 sequences: Specific sequences to load 149 use_falls: Whether to include fall sequences 150 use_adls: Whether to include ADL sequences 151 152 Returns: 153 Tuple of (data_list, names_list) 154 """ 155 data_list = [] 156 names_list = [] 157 158 # Load falls features 159 if use_falls: 160 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 161 if os.path.exists(falls_csv): 162 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 163 164 # Filter by specific sequences if provided 165 if sequences is not None: 166 fall_sequences = [s for s in sequences if s.startswith('fall-')] 167 if fall_sequences: 168 df = df[df['sequence_name'].isin(fall_sequences)] 169 170 # Add metadata columns 171 df['activity_type'] = 'fall' 172 df['activity_id'] = 1 # Falls are labeled as 1 173 174 data_list.append(df) 175 names_list.append("urfall-cam0-falls") 176 else: 177 print(f"Warning: Falls features file not found at {falls_csv}") 178 179 # Load ADLs features 180 if use_adls: 181 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 182 if os.path.exists(adls_csv): 183 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 184 185 # Filter by specific sequences if provided 186 if sequences is not None: 187 adl_sequences = [s for s in sequences if s.startswith('adl-')] 188 if adl_sequences: 189 df = df[df['sequence_name'].isin(adl_sequences)] 190 191 # Add metadata columns 192 df['activity_type'] = 'adl' 193 df['activity_id'] = 0 # ADLs are labeled as 0 194 195 data_list.append(df) 196 names_list.append("urfall-cam0-adls") 197 else: 198 print(f"Warning: ADLs features file not found at {adls_csv}") 199 200 return data_list, names_list 201 202 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 203 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 204 """ 205 Load accelerometer CSV data files. 206 207 Args: 208 data_dir: Directory containing the dataset 209 sequences: Specific sequences to load 210 use_falls: Whether to include fall sequences 211 use_adls: Whether to include ADL sequences 212 213 Returns: 214 Tuple of (data_list, names_list) 215 """ 216 data_list = [] 217 names_list = [] 218 219 # Determine which sequences to load 220 seq_list = [] 221 if sequences is not None: 222 seq_list = sequences 223 else: 224 if use_falls: 225 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 226 if use_adls: 227 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 228 229 # Load accelerometer data for each sequence 230 for seq in seq_list: 231 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 232 if os.path.exists(accel_file): 233 try: 234 df = pd.read_csv(accel_file) 235 df['sequence_name'] = seq 236 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 237 df['activity_id'] = 1 if seq.startswith('fall-') else 0 238 data_list.append(df) 239 names_list.append(f"{seq}-accelerometer") 240 except Exception as e: 241 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 242 243 return data_list, names_list 244 245 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 246 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 247 """ 248 Load synchronization CSV data files. 249 250 Args: 251 data_dir: Directory containing the dataset 252 sequences: Specific sequences to load 253 use_falls: Whether to include fall sequences 254 use_adls: Whether to include ADL sequences 255 256 Returns: 257 Tuple of (data_list, names_list) 258 """ 259 data_list = [] 260 names_list = [] 261 262 # Determine which sequences to load 263 seq_list = [] 264 if sequences is not None: 265 seq_list = sequences 266 else: 267 if use_falls: 268 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 269 if use_adls: 270 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 271 272 # Load synchronization data for each sequence 273 for seq in seq_list: 274 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 275 if os.path.exists(sync_file): 276 try: 277 df = pd.read_csv(sync_file) 278 df['sequence_name'] = seq 279 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 280 df['activity_id'] = 1 if seq.startswith('fall-') else 0 281 data_list.append(df) 282 names_list.append(f"{seq}-synchronization") 283 except Exception as e: 284 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 285 286 return data_list, names_list 287 288 def get_file_paths(self, data_dir: str, data_type: str, 289 sequences: Optional[List[str]] = None, 290 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 291 """ 292 Get file paths for image/video data types (depth, RGB, video). 293 294 Args: 295 data_dir: Directory containing the dataset 296 data_type: Type of data ('depth', 'rgb', 'video') 297 sequences: Specific sequences to get paths for 298 use_falls: Whether to include fall sequences 299 use_adls: Whether to include ADL sequences 300 301 Returns: 302 Dictionary mapping sequence names to file paths 303 """ 304 if data_type not in ['depth', 'rgb', 'video']: 305 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 306 307 file_paths = {} 308 309 # Determine which sequences to include 310 seq_list = [] 311 if sequences is not None: 312 seq_list = sequences 313 else: 314 if use_falls: 315 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 316 if use_adls: 317 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 318 319 # Map data type to file extension 320 extension_map = { 321 'depth': '-cam0-d.zip', 322 'rgb': '-cam0-rgb.zip', 323 'video': '-cam0.mp4' 324 } 325 326 ext = extension_map[data_type] 327 328 for seq in seq_list: 329 file_path = os.path.join(data_dir, f"{seq}{ext}") 330 if os.path.exists(file_path): 331 file_paths[seq] = file_path 332 333 return file_paths 334 335 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 336 window_size: int = 30, step_size: int = 15) -> List[Dict]: 337 """ 338 Create sliding windows from the loaded data. 339 340 Args: 341 data: List of DataFrames containing the dataset 342 names: List of names corresponding to each DataFrame 343 window_size: Size of the sliding window (default: 30 frames for depth features) 344 step_size: Step size for sliding window (default: 15 frames) 345 346 Returns: 347 List of dictionaries containing windowed data 348 """ 349 windows_data = [] 350 351 for idx, df in enumerate(data): 352 if df.empty: 353 continue 354 355 # Get numeric feature columns (exclude metadata columns) 356 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 357 feature_cols = [col for col in df.columns 358 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 359 360 if not feature_cols: 361 continue 362 363 windows = [] 364 365 # Create windows for each feature column 366 for col in feature_cols: 367 win = sliding_window(df[col].values, window_size, step_size) 368 windows.append({"name": col, "data": win}) 369 370 # Create windows for labels if present 371 if 'label' in df.columns: 372 label_windows = sliding_window(df['label'].values, window_size, step_size) 373 # Majority voting for each window 374 labels = [] 375 for w in label_windows: 376 vals, counts = np.unique(w, return_counts=True) 377 labels.append(vals[np.argmax(counts)]) 378 windows.append({"name": "labels", "data": np.array(labels)}) 379 380 # Create activity_id windows 381 if 'activity_id' in df.columns: 382 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 383 windows.append({"name": "activity_id", "data": activity_windows}) 384 385 windows_data.append({"name": names[idx], "windows": windows}) 386 387 return windows_data 388 389 def get_supported_formats(self) -> List[str]: 390 """ 391 Get list of supported file formats for UrFall dataset. 392 393 Returns: 394 List of supported file extensions 395 """ 396 return ['.csv', '.zip', '.mp4'] 397 398 def get_sensor_info(self) -> Dict[str, any]: 399 """ 400 Get information about sensors in the dataset. 401 402 Returns: 403 Dictionary containing sensor information 404 """ 405 return { 406 'data_types': self.metadata['data_types'], 407 'camera': self.metadata['camera'], 408 'sampling_frequency': self.metadata['sampling_frequency'], 409 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 410 } 411 412 def get_activity_info(self) -> Dict[int, str]: 413 """ 414 Get information about activities in the dataset. 415 416 Returns: 417 Dictionary mapping activity IDs to labels 418 """ 419 return self.metadata['activities'] 420 421 def get_feature_info(self) -> Dict[str, str]: 422 """ 423 Get information about pre-extracted features. 424 425 Returns: 426 Dictionary mapping feature names to descriptions 427 """ 428 return self.metadata['feature_descriptions'] 429 430 431# Legacy function wrappers for backward compatibility 432def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 433 sequences: Optional[List[str]] = None, 434 use_falls: bool = True, use_adls: bool = True): 435 """ 436 Load UrFall dataset using the legacy function interface. 437 438 Args: 439 data_dir: Directory containing the dataset 440 data_types: List of data types to load 441 sequences: List of specific sequences to load 442 use_falls: Whether to load fall sequences 443 use_adls: Whether to load ADL sequences 444 445 Returns: 446 Tuple of (data_list, names_list) 447 """ 448 loader = UrFallLoader() 449 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 450 use_falls=use_falls, use_adls=use_adls) 451 452 453def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 454 """ 455 Create sliding windows from UrFall data using the legacy function interface. 456 457 Args: 458 urfall_data: List of DataFrames 459 urfall_names: List of names 460 window_size: Size of sliding window 461 step_size: Step size for sliding window 462 463 Returns: 464 List of dictionaries containing windowed data 465 """ 466 loader = UrFallLoader() 467 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
24class UrFallLoader(BaseDatasetLoader): 25 """ 26 UrFall dataset loader class. 27 28 This class handles loading and processing of the UrFall dataset for fall detection. 29 Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, 30 and pre-extracted features from depth maps. 31 """ 32 33 def __init__(self): 34 super().__init__( 35 name="urfall", 36 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data" 37 ) 38 self.metadata = { 39 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 40 'camera': 'cam0', # Front camera 41 'sampling_frequency': 30, # Depth/RGB camera fps 42 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 43 'activities': { 44 -1: 'Not lying (standing/walking)', 45 0: 'Falling (transient)', 46 1: 'Lying on ground' 47 }, 48 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 49 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 50 'feature_columns': [ 51 'sequence_name', 52 'frame_number', 53 'label', 54 'HeightWidthRatio', 55 'MajorMinorRatio', 56 'BoundingBoxOccupancy', 57 'MaxStdXZ', 58 'HHmaxRatio', 59 'H', 60 'D', 61 'P40' 62 ], 63 'feature_descriptions': { 64 'HeightWidthRatio': 'Bounding box height to width ratio', 65 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 66 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 67 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 68 'HHmaxRatio': 'Human height in frame to standing height ratio', 69 'H': 'Actual height in mm', 70 'D': 'Distance of person center to floor in mm', 71 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 72 } 73 } 74 75 def load_data(self, data_dir: str, 76 data_types: Optional[List[str]] = None, 77 sequences: Optional[List[str]] = None, 78 use_falls: bool = True, 79 use_adls: bool = True, 80 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 81 """ 82 Load UrFall dataset from the specified directory. 83 84 Args: 85 data_dir: Directory containing the dataset 86 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 87 'synchronization', 'video', 'features' (default: ['features']) 88 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 89 If None, loads all based on use_falls and use_adls 90 use_falls: Whether to load fall sequences (default: True) 91 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 92 **kwargs: Additional arguments 93 94 Returns: 95 Tuple of (data_list, names_list) 96 """ 97 # Default to loading pre-extracted features if not specified 98 if data_types is None: 99 data_types = ['features'] 100 101 # Validate data types 102 valid_types = set(self.metadata['data_types']) 103 requested_types = set(data_types) 104 invalid_types = requested_types - valid_types 105 if invalid_types: 106 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 107 108 # Create directory if it doesn't exist 109 os.makedirs(data_dir, exist_ok=True) 110 111 data_list = [] 112 names_list = [] 113 114 # Load pre-extracted features (CSV files) 115 if 'features' in data_types: 116 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 117 data_list.extend(features_data) 118 names_list.extend(features_names) 119 120 # Load raw accelerometer data 121 if 'accelerometer' in data_types: 122 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 123 data_list.extend(accel_data) 124 names_list.extend(accel_names) 125 126 # Load synchronization data 127 if 'synchronization' in data_types: 128 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 129 data_list.extend(sync_data) 130 names_list.extend(sync_names) 131 132 # Note: Depth, RGB, and Video data are image/video files 133 # These would require specialized loading and are not typically loaded into DataFrames 134 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 135 print("Note: Depth, RGB, and Video data types contain image/video files.") 136 print("These are not loaded into DataFrames but their paths can be accessed.") 137 print("Use the get_file_paths() method to retrieve paths to these files.") 138 139 self.data = data_list 140 return data_list, names_list 141 142 def _load_features(self, data_dir: str, sequences: Optional[List[str]], 143 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 144 """ 145 Load pre-extracted features from CSV files. 146 147 Args: 148 data_dir: Directory containing the dataset 149 sequences: Specific sequences to load 150 use_falls: Whether to include fall sequences 151 use_adls: Whether to include ADL sequences 152 153 Returns: 154 Tuple of (data_list, names_list) 155 """ 156 data_list = [] 157 names_list = [] 158 159 # Load falls features 160 if use_falls: 161 falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv") 162 if os.path.exists(falls_csv): 163 df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns']) 164 165 # Filter by specific sequences if provided 166 if sequences is not None: 167 fall_sequences = [s for s in sequences if s.startswith('fall-')] 168 if fall_sequences: 169 df = df[df['sequence_name'].isin(fall_sequences)] 170 171 # Add metadata columns 172 df['activity_type'] = 'fall' 173 df['activity_id'] = 1 # Falls are labeled as 1 174 175 data_list.append(df) 176 names_list.append("urfall-cam0-falls") 177 else: 178 print(f"Warning: Falls features file not found at {falls_csv}") 179 180 # Load ADLs features 181 if use_adls: 182 adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv") 183 if os.path.exists(adls_csv): 184 df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns']) 185 186 # Filter by specific sequences if provided 187 if sequences is not None: 188 adl_sequences = [s for s in sequences if s.startswith('adl-')] 189 if adl_sequences: 190 df = df[df['sequence_name'].isin(adl_sequences)] 191 192 # Add metadata columns 193 df['activity_type'] = 'adl' 194 df['activity_id'] = 0 # ADLs are labeled as 0 195 196 data_list.append(df) 197 names_list.append("urfall-cam0-adls") 198 else: 199 print(f"Warning: ADLs features file not found at {adls_csv}") 200 201 return data_list, names_list 202 203 def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]], 204 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 205 """ 206 Load accelerometer CSV data files. 207 208 Args: 209 data_dir: Directory containing the dataset 210 sequences: Specific sequences to load 211 use_falls: Whether to include fall sequences 212 use_adls: Whether to include ADL sequences 213 214 Returns: 215 Tuple of (data_list, names_list) 216 """ 217 data_list = [] 218 names_list = [] 219 220 # Determine which sequences to load 221 seq_list = [] 222 if sequences is not None: 223 seq_list = sequences 224 else: 225 if use_falls: 226 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 227 if use_adls: 228 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 229 230 # Load accelerometer data for each sequence 231 for seq in seq_list: 232 accel_file = os.path.join(data_dir, f"{seq}-acc.csv") 233 if os.path.exists(accel_file): 234 try: 235 df = pd.read_csv(accel_file) 236 df['sequence_name'] = seq 237 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 238 df['activity_id'] = 1 if seq.startswith('fall-') else 0 239 data_list.append(df) 240 names_list.append(f"{seq}-accelerometer") 241 except Exception as e: 242 print(f"Warning: Could not load accelerometer data from {accel_file}: {e}") 243 244 return data_list, names_list 245 246 def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]], 247 use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]: 248 """ 249 Load synchronization CSV data files. 250 251 Args: 252 data_dir: Directory containing the dataset 253 sequences: Specific sequences to load 254 use_falls: Whether to include fall sequences 255 use_adls: Whether to include ADL sequences 256 257 Returns: 258 Tuple of (data_list, names_list) 259 """ 260 data_list = [] 261 names_list = [] 262 263 # Determine which sequences to load 264 seq_list = [] 265 if sequences is not None: 266 seq_list = sequences 267 else: 268 if use_falls: 269 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 270 if use_adls: 271 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 272 273 # Load synchronization data for each sequence 274 for seq in seq_list: 275 sync_file = os.path.join(data_dir, f"{seq}-data.csv") 276 if os.path.exists(sync_file): 277 try: 278 df = pd.read_csv(sync_file) 279 df['sequence_name'] = seq 280 df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl' 281 df['activity_id'] = 1 if seq.startswith('fall-') else 0 282 data_list.append(df) 283 names_list.append(f"{seq}-synchronization") 284 except Exception as e: 285 print(f"Warning: Could not load synchronization data from {sync_file}: {e}") 286 287 return data_list, names_list 288 289 def get_file_paths(self, data_dir: str, data_type: str, 290 sequences: Optional[List[str]] = None, 291 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 292 """ 293 Get file paths for image/video data types (depth, RGB, video). 294 295 Args: 296 data_dir: Directory containing the dataset 297 data_type: Type of data ('depth', 'rgb', 'video') 298 sequences: Specific sequences to get paths for 299 use_falls: Whether to include fall sequences 300 use_adls: Whether to include ADL sequences 301 302 Returns: 303 Dictionary mapping sequence names to file paths 304 """ 305 if data_type not in ['depth', 'rgb', 'video']: 306 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 307 308 file_paths = {} 309 310 # Determine which sequences to include 311 seq_list = [] 312 if sequences is not None: 313 seq_list = sequences 314 else: 315 if use_falls: 316 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 317 if use_adls: 318 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 319 320 # Map data type to file extension 321 extension_map = { 322 'depth': '-cam0-d.zip', 323 'rgb': '-cam0-rgb.zip', 324 'video': '-cam0.mp4' 325 } 326 327 ext = extension_map[data_type] 328 329 for seq in seq_list: 330 file_path = os.path.join(data_dir, f"{seq}{ext}") 331 if os.path.exists(file_path): 332 file_paths[seq] = file_path 333 334 return file_paths 335 336 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 337 window_size: int = 30, step_size: int = 15) -> List[Dict]: 338 """ 339 Create sliding windows from the loaded data. 340 341 Args: 342 data: List of DataFrames containing the dataset 343 names: List of names corresponding to each DataFrame 344 window_size: Size of the sliding window (default: 30 frames for depth features) 345 step_size: Step size for sliding window (default: 15 frames) 346 347 Returns: 348 List of dictionaries containing windowed data 349 """ 350 windows_data = [] 351 352 for idx, df in enumerate(data): 353 if df.empty: 354 continue 355 356 # Get numeric feature columns (exclude metadata columns) 357 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 358 feature_cols = [col for col in df.columns 359 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 360 361 if not feature_cols: 362 continue 363 364 windows = [] 365 366 # Create windows for each feature column 367 for col in feature_cols: 368 win = sliding_window(df[col].values, window_size, step_size) 369 windows.append({"name": col, "data": win}) 370 371 # Create windows for labels if present 372 if 'label' in df.columns: 373 label_windows = sliding_window(df['label'].values, window_size, step_size) 374 # Majority voting for each window 375 labels = [] 376 for w in label_windows: 377 vals, counts = np.unique(w, return_counts=True) 378 labels.append(vals[np.argmax(counts)]) 379 windows.append({"name": "labels", "data": np.array(labels)}) 380 381 # Create activity_id windows 382 if 'activity_id' in df.columns: 383 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 384 windows.append({"name": "activity_id", "data": activity_windows}) 385 386 windows_data.append({"name": names[idx], "windows": windows}) 387 388 return windows_data 389 390 def get_supported_formats(self) -> List[str]: 391 """ 392 Get list of supported file formats for UrFall dataset. 393 394 Returns: 395 List of supported file extensions 396 """ 397 return ['.csv', '.zip', '.mp4'] 398 399 def get_sensor_info(self) -> Dict[str, any]: 400 """ 401 Get information about sensors in the dataset. 402 403 Returns: 404 Dictionary containing sensor information 405 """ 406 return { 407 'data_types': self.metadata['data_types'], 408 'camera': self.metadata['camera'], 409 'sampling_frequency': self.metadata['sampling_frequency'], 410 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 411 } 412 413 def get_activity_info(self) -> Dict[int, str]: 414 """ 415 Get information about activities in the dataset. 416 417 Returns: 418 Dictionary mapping activity IDs to labels 419 """ 420 return self.metadata['activities'] 421 422 def get_feature_info(self) -> Dict[str, str]: 423 """ 424 Get information about pre-extracted features. 425 426 Returns: 427 Dictionary mapping feature names to descriptions 428 """ 429 return self.metadata['feature_descriptions']
UrFall dataset loader class.
This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.
33 def __init__(self): 34 super().__init__( 35 name="urfall", 36 description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data" 37 ) 38 self.metadata = { 39 'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'], 40 'camera': 'cam0', # Front camera 41 'sampling_frequency': 30, # Depth/RGB camera fps 42 'accelerometer_frequency': 100, # Accelerometer sampling frequency (typical) 43 'activities': { 44 -1: 'Not lying (standing/walking)', 45 0: 'Falling (transient)', 46 1: 'Lying on ground' 47 }, 48 'fall_sequences': list(range(1, 31)), # fall-01 to fall-30 49 'adl_sequences': list(range(1, 21)), # adl-01 to adl-20 50 'feature_columns': [ 51 'sequence_name', 52 'frame_number', 53 'label', 54 'HeightWidthRatio', 55 'MajorMinorRatio', 56 'BoundingBoxOccupancy', 57 'MaxStdXZ', 58 'HHmaxRatio', 59 'H', 60 'D', 61 'P40' 62 ], 63 'feature_descriptions': { 64 'HeightWidthRatio': 'Bounding box height to width ratio', 65 'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation', 66 'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels', 67 'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)', 68 'HHmaxRatio': 'Human height in frame to standing height ratio', 69 'H': 'Actual height in mm', 70 'D': 'Distance of person center to floor in mm', 71 'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid' 72 } 73 }
Initialize the dataset loader.
Args: name: Name of the dataset description: Description of the dataset
75 def load_data(self, data_dir: str, 76 data_types: Optional[List[str]] = None, 77 sequences: Optional[List[str]] = None, 78 use_falls: bool = True, 79 use_adls: bool = True, 80 **kwargs) -> Tuple[List[pd.DataFrame], List[str]]: 81 """ 82 Load UrFall dataset from the specified directory. 83 84 Args: 85 data_dir: Directory containing the dataset 86 data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 87 'synchronization', 'video', 'features' (default: ['features']) 88 sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) 89 If None, loads all based on use_falls and use_adls 90 use_falls: Whether to load fall sequences (default: True) 91 use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) 92 **kwargs: Additional arguments 93 94 Returns: 95 Tuple of (data_list, names_list) 96 """ 97 # Default to loading pre-extracted features if not specified 98 if data_types is None: 99 data_types = ['features'] 100 101 # Validate data types 102 valid_types = set(self.metadata['data_types']) 103 requested_types = set(data_types) 104 invalid_types = requested_types - valid_types 105 if invalid_types: 106 raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}") 107 108 # Create directory if it doesn't exist 109 os.makedirs(data_dir, exist_ok=True) 110 111 data_list = [] 112 names_list = [] 113 114 # Load pre-extracted features (CSV files) 115 if 'features' in data_types: 116 features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls) 117 data_list.extend(features_data) 118 names_list.extend(features_names) 119 120 # Load raw accelerometer data 121 if 'accelerometer' in data_types: 122 accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls) 123 data_list.extend(accel_data) 124 names_list.extend(accel_names) 125 126 # Load synchronization data 127 if 'synchronization' in data_types: 128 sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls) 129 data_list.extend(sync_data) 130 names_list.extend(sync_names) 131 132 # Note: Depth, RGB, and Video data are image/video files 133 # These would require specialized loading and are not typically loaded into DataFrames 134 if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types: 135 print("Note: Depth, RGB, and Video data types contain image/video files.") 136 print("These are not loaded into DataFrames but their paths can be accessed.") 137 print("Use the get_file_paths() method to retrieve paths to these files.") 138 139 self.data = data_list 140 return data_list, names_list
Load UrFall dataset from the specified directory.
Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments
Returns: Tuple of (data_list, names_list)
289 def get_file_paths(self, data_dir: str, data_type: str, 290 sequences: Optional[List[str]] = None, 291 use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]: 292 """ 293 Get file paths for image/video data types (depth, RGB, video). 294 295 Args: 296 data_dir: Directory containing the dataset 297 data_type: Type of data ('depth', 'rgb', 'video') 298 sequences: Specific sequences to get paths for 299 use_falls: Whether to include fall sequences 300 use_adls: Whether to include ADL sequences 301 302 Returns: 303 Dictionary mapping sequence names to file paths 304 """ 305 if data_type not in ['depth', 'rgb', 'video']: 306 raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}") 307 308 file_paths = {} 309 310 # Determine which sequences to include 311 seq_list = [] 312 if sequences is not None: 313 seq_list = sequences 314 else: 315 if use_falls: 316 seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)]) 317 if use_adls: 318 seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)]) 319 320 # Map data type to file extension 321 extension_map = { 322 'depth': '-cam0-d.zip', 323 'rgb': '-cam0-rgb.zip', 324 'video': '-cam0.mp4' 325 } 326 327 ext = extension_map[data_type] 328 329 for seq in seq_list: 330 file_path = os.path.join(data_dir, f"{seq}{ext}") 331 if os.path.exists(file_path): 332 file_paths[seq] = file_path 333 334 return file_paths
Get file paths for image/video data types (depth, RGB, video).
Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences
Returns: Dictionary mapping sequence names to file paths
336 def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 337 window_size: int = 30, step_size: int = 15) -> List[Dict]: 338 """ 339 Create sliding windows from the loaded data. 340 341 Args: 342 data: List of DataFrames containing the dataset 343 names: List of names corresponding to each DataFrame 344 window_size: Size of the sliding window (default: 30 frames for depth features) 345 step_size: Step size for sliding window (default: 15 frames) 346 347 Returns: 348 List of dictionaries containing windowed data 349 """ 350 windows_data = [] 351 352 for idx, df in enumerate(data): 353 if df.empty: 354 continue 355 356 # Get numeric feature columns (exclude metadata columns) 357 exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id'] 358 feature_cols = [col for col in df.columns 359 if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])] 360 361 if not feature_cols: 362 continue 363 364 windows = [] 365 366 # Create windows for each feature column 367 for col in feature_cols: 368 win = sliding_window(df[col].values, window_size, step_size) 369 windows.append({"name": col, "data": win}) 370 371 # Create windows for labels if present 372 if 'label' in df.columns: 373 label_windows = sliding_window(df['label'].values, window_size, step_size) 374 # Majority voting for each window 375 labels = [] 376 for w in label_windows: 377 vals, counts = np.unique(w, return_counts=True) 378 labels.append(vals[np.argmax(counts)]) 379 windows.append({"name": "labels", "data": np.array(labels)}) 380 381 # Create activity_id windows 382 if 'activity_id' in df.columns: 383 activity_windows = sliding_window(df['activity_id'].values, window_size, step_size) 384 windows.append({"name": "activity_id", "data": activity_windows}) 385 386 windows_data.append({"name": names[idx], "windows": windows}) 387 388 return windows_data
Create sliding windows from the loaded data.
Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)
Returns: List of dictionaries containing windowed data
390 def get_supported_formats(self) -> List[str]: 391 """ 392 Get list of supported file formats for UrFall dataset. 393 394 Returns: 395 List of supported file extensions 396 """ 397 return ['.csv', '.zip', '.mp4']
Get list of supported file formats for UrFall dataset.
Returns: List of supported file extensions
399 def get_sensor_info(self) -> Dict[str, any]: 400 """ 401 Get information about sensors in the dataset. 402 403 Returns: 404 Dictionary containing sensor information 405 """ 406 return { 407 'data_types': self.metadata['data_types'], 408 'camera': self.metadata['camera'], 409 'sampling_frequency': self.metadata['sampling_frequency'], 410 'accelerometer_frequency': self.metadata['accelerometer_frequency'] 411 }
Get information about sensors in the dataset.
Returns: Dictionary containing sensor information
413 def get_activity_info(self) -> Dict[int, str]: 414 """ 415 Get information about activities in the dataset. 416 417 Returns: 418 Dictionary mapping activity IDs to labels 419 """ 420 return self.metadata['activities']
Get information about activities in the dataset.
Returns: Dictionary mapping activity IDs to labels
422 def get_feature_info(self) -> Dict[str, str]: 423 """ 424 Get information about pre-extracted features. 425 426 Returns: 427 Dictionary mapping feature names to descriptions 428 """ 429 return self.metadata['feature_descriptions']
Get information about pre-extracted features.
Returns: Dictionary mapping feature names to descriptions
Inherited Members
433def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None, 434 sequences: Optional[List[str]] = None, 435 use_falls: bool = True, use_adls: bool = True): 436 """ 437 Load UrFall dataset using the legacy function interface. 438 439 Args: 440 data_dir: Directory containing the dataset 441 data_types: List of data types to load 442 sequences: List of specific sequences to load 443 use_falls: Whether to load fall sequences 444 use_adls: Whether to load ADL sequences 445 446 Returns: 447 Tuple of (data_list, names_list) 448 """ 449 loader = UrFallLoader() 450 return loader.load_data(data_dir, data_types=data_types, sequences=sequences, 451 use_falls=use_falls, use_adls=use_adls)
Load UrFall dataset using the legacy function interface.
Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences
Returns: Tuple of (data_list, names_list)
454def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15): 455 """ 456 Create sliding windows from UrFall data using the legacy function interface. 457 458 Args: 459 urfall_data: List of DataFrames 460 urfall_names: List of names 461 window_size: Size of sliding window 462 step_size: Step size for sliding window 463 464 Returns: 465 List of dictionaries containing windowed data 466 """ 467 loader = UrFallLoader() 468 return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
Create sliding windows from UrFall data using the legacy function interface.
Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window
Returns: List of dictionaries containing windowed data