gaitsetpy.dataset.urfall

UrFall Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader. UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer, and pre-extracted features from depth maps.

Reference:

  1'''
  2UrFall Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the UrFall dataset loader class that inherits from BaseDatasetLoader.
  6UrFall is a fall detection dataset with multimodal data including depth, RGB, accelerometer,
  7and pre-extracted features from depth maps.
  8
  9Reference:
 10- Website: https://fenix.ur.edu.pl/~mkepski/ds/uf.html
 11- Dataset: University of Rzeszow Fall Detection Dataset
 12'''
 13
 14import os
 15import pandas as pd
 16import numpy as np
 17from typing import List, Dict, Tuple, Optional, Set
 18from glob import glob
 19from ..core.base_classes import BaseDatasetLoader
 20from .utils import download_dataset, extract_dataset, sliding_window
 21
 22
 23class UrFallLoader(BaseDatasetLoader):
 24    """
 25    UrFall dataset loader class.
 26    
 27    This class handles loading and processing of the UrFall dataset for fall detection.
 28    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 29    and pre-extracted features from depth maps.
 30    """
 31    
 32    def __init__(self):
 33        super().__init__(
 34            name="urfall",
 35            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data"
 36        )
 37        self.metadata = {
 38            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 39            'camera': 'cam0',  # Front camera
 40            'sampling_frequency': 30,  # Depth/RGB camera fps
 41            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 42            'activities': {
 43                -1: 'Not lying (standing/walking)',
 44                0: 'Falling (transient)',
 45                1: 'Lying on ground'
 46            },
 47            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 48            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 49            'feature_columns': [
 50                'sequence_name',
 51                'frame_number',
 52                'label',
 53                'HeightWidthRatio',
 54                'MajorMinorRatio',
 55                'BoundingBoxOccupancy',
 56                'MaxStdXZ',
 57                'HHmaxRatio',
 58                'H',
 59                'D',
 60                'P40'
 61            ],
 62            'feature_descriptions': {
 63                'HeightWidthRatio': 'Bounding box height to width ratio',
 64                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 65                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 66                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 67                'HHmaxRatio': 'Human height in frame to standing height ratio',
 68                'H': 'Actual height in mm',
 69                'D': 'Distance of person center to floor in mm',
 70                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 71            }
 72        }
 73    
 74    def load_data(self, data_dir: str, 
 75                  data_types: Optional[List[str]] = None,
 76                  sequences: Optional[List[str]] = None,
 77                  use_falls: bool = True,
 78                  use_adls: bool = True,
 79                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 80        """
 81        Load UrFall dataset from the specified directory.
 82        
 83        Args:
 84            data_dir: Directory containing the dataset
 85            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 86                       'synchronization', 'video', 'features' (default: ['features'])
 87            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 88                      If None, loads all based on use_falls and use_adls
 89            use_falls: Whether to load fall sequences (default: True)
 90            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 91            **kwargs: Additional arguments
 92            
 93        Returns:
 94            Tuple of (data_list, names_list)
 95        """
 96        # Default to loading pre-extracted features if not specified
 97        if data_types is None:
 98            data_types = ['features']
 99        
100        # Validate data types
101        valid_types = set(self.metadata['data_types'])
102        requested_types = set(data_types)
103        invalid_types = requested_types - valid_types
104        if invalid_types:
105            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
106        
107        # Create directory if it doesn't exist
108        os.makedirs(data_dir, exist_ok=True)
109        
110        data_list = []
111        names_list = []
112        
113        # Load pre-extracted features (CSV files)
114        if 'features' in data_types:
115            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
116            data_list.extend(features_data)
117            names_list.extend(features_names)
118        
119        # Load raw accelerometer data
120        if 'accelerometer' in data_types:
121            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
122            data_list.extend(accel_data)
123            names_list.extend(accel_names)
124        
125        # Load synchronization data
126        if 'synchronization' in data_types:
127            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
128            data_list.extend(sync_data)
129            names_list.extend(sync_names)
130        
131        # Note: Depth, RGB, and Video data are image/video files
132        # These would require specialized loading and are not typically loaded into DataFrames
133        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
134            print("Note: Depth, RGB, and Video data types contain image/video files.")
135            print("These are not loaded into DataFrames but their paths can be accessed.")
136            print("Use the get_file_paths() method to retrieve paths to these files.")
137        
138        self.data = data_list
139        return data_list, names_list
140    
141    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
142                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
143        """
144        Load pre-extracted features from CSV files.
145        
146        Args:
147            data_dir: Directory containing the dataset
148            sequences: Specific sequences to load
149            use_falls: Whether to include fall sequences
150            use_adls: Whether to include ADL sequences
151            
152        Returns:
153            Tuple of (data_list, names_list)
154        """
155        data_list = []
156        names_list = []
157        
158        # Load falls features
159        if use_falls:
160            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
161            if os.path.exists(falls_csv):
162                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
163                
164                # Filter by specific sequences if provided
165                if sequences is not None:
166                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
167                    if fall_sequences:
168                        df = df[df['sequence_name'].isin(fall_sequences)]
169                
170                # Add metadata columns
171                df['activity_type'] = 'fall'
172                df['activity_id'] = 1  # Falls are labeled as 1
173                
174                data_list.append(df)
175                names_list.append("urfall-cam0-falls")
176            else:
177                print(f"Warning: Falls features file not found at {falls_csv}")
178        
179        # Load ADLs features
180        if use_adls:
181            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
182            if os.path.exists(adls_csv):
183                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
184                
185                # Filter by specific sequences if provided
186                if sequences is not None:
187                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
188                    if adl_sequences:
189                        df = df[df['sequence_name'].isin(adl_sequences)]
190                
191                # Add metadata columns
192                df['activity_type'] = 'adl'
193                df['activity_id'] = 0  # ADLs are labeled as 0
194                
195                data_list.append(df)
196                names_list.append("urfall-cam0-adls")
197            else:
198                print(f"Warning: ADLs features file not found at {adls_csv}")
199        
200        return data_list, names_list
201    
202    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
203                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
204        """
205        Load accelerometer CSV data files.
206        
207        Args:
208            data_dir: Directory containing the dataset
209            sequences: Specific sequences to load
210            use_falls: Whether to include fall sequences
211            use_adls: Whether to include ADL sequences
212            
213        Returns:
214            Tuple of (data_list, names_list)
215        """
216        data_list = []
217        names_list = []
218        
219        # Determine which sequences to load
220        seq_list = []
221        if sequences is not None:
222            seq_list = sequences
223        else:
224            if use_falls:
225                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
226            if use_adls:
227                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
228        
229        # Load accelerometer data for each sequence
230        for seq in seq_list:
231            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
232            if os.path.exists(accel_file):
233                try:
234                    df = pd.read_csv(accel_file)
235                    df['sequence_name'] = seq
236                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
237                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
238                    data_list.append(df)
239                    names_list.append(f"{seq}-accelerometer")
240                except Exception as e:
241                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
242        
243        return data_list, names_list
244    
245    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
246                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
247        """
248        Load synchronization CSV data files.
249        
250        Args:
251            data_dir: Directory containing the dataset
252            sequences: Specific sequences to load
253            use_falls: Whether to include fall sequences
254            use_adls: Whether to include ADL sequences
255            
256        Returns:
257            Tuple of (data_list, names_list)
258        """
259        data_list = []
260        names_list = []
261        
262        # Determine which sequences to load
263        seq_list = []
264        if sequences is not None:
265            seq_list = sequences
266        else:
267            if use_falls:
268                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
269            if use_adls:
270                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
271        
272        # Load synchronization data for each sequence
273        for seq in seq_list:
274            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
275            if os.path.exists(sync_file):
276                try:
277                    df = pd.read_csv(sync_file)
278                    df['sequence_name'] = seq
279                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
280                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
281                    data_list.append(df)
282                    names_list.append(f"{seq}-synchronization")
283                except Exception as e:
284                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
285        
286        return data_list, names_list
287    
288    def get_file_paths(self, data_dir: str, data_type: str, 
289                       sequences: Optional[List[str]] = None,
290                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
291        """
292        Get file paths for image/video data types (depth, RGB, video).
293        
294        Args:
295            data_dir: Directory containing the dataset
296            data_type: Type of data ('depth', 'rgb', 'video')
297            sequences: Specific sequences to get paths for
298            use_falls: Whether to include fall sequences
299            use_adls: Whether to include ADL sequences
300            
301        Returns:
302            Dictionary mapping sequence names to file paths
303        """
304        if data_type not in ['depth', 'rgb', 'video']:
305            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
306        
307        file_paths = {}
308        
309        # Determine which sequences to include
310        seq_list = []
311        if sequences is not None:
312            seq_list = sequences
313        else:
314            if use_falls:
315                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
316            if use_adls:
317                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
318        
319        # Map data type to file extension
320        extension_map = {
321            'depth': '-cam0-d.zip',
322            'rgb': '-cam0-rgb.zip',
323            'video': '-cam0.mp4'
324        }
325        
326        ext = extension_map[data_type]
327        
328        for seq in seq_list:
329            file_path = os.path.join(data_dir, f"{seq}{ext}")
330            if os.path.exists(file_path):
331                file_paths[seq] = file_path
332        
333        return file_paths
334    
335    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
336                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
337        """
338        Create sliding windows from the loaded data.
339        
340        Args:
341            data: List of DataFrames containing the dataset
342            names: List of names corresponding to each DataFrame
343            window_size: Size of the sliding window (default: 30 frames for depth features)
344            step_size: Step size for sliding window (default: 15 frames)
345            
346        Returns:
347            List of dictionaries containing windowed data
348        """
349        windows_data = []
350        
351        for idx, df in enumerate(data):
352            if df.empty:
353                continue
354            
355            # Get numeric feature columns (exclude metadata columns)
356            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
357            feature_cols = [col for col in df.columns 
358                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
359            
360            if not feature_cols:
361                continue
362            
363            windows = []
364            
365            # Create windows for each feature column
366            for col in feature_cols:
367                win = sliding_window(df[col].values, window_size, step_size)
368                windows.append({"name": col, "data": win})
369            
370            # Create windows for labels if present
371            if 'label' in df.columns:
372                label_windows = sliding_window(df['label'].values, window_size, step_size)
373                # Majority voting for each window
374                labels = []
375                for w in label_windows:
376                    vals, counts = np.unique(w, return_counts=True)
377                    labels.append(vals[np.argmax(counts)])
378                windows.append({"name": "labels", "data": np.array(labels)})
379            
380            # Create activity_id windows
381            if 'activity_id' in df.columns:
382                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
383                windows.append({"name": "activity_id", "data": activity_windows})
384            
385            windows_data.append({"name": names[idx], "windows": windows})
386        
387        return windows_data
388    
389    def get_supported_formats(self) -> List[str]:
390        """
391        Get list of supported file formats for UrFall dataset.
392        
393        Returns:
394            List of supported file extensions
395        """
396        return ['.csv', '.zip', '.mp4']
397    
398    def get_sensor_info(self) -> Dict[str, any]:
399        """
400        Get information about sensors in the dataset.
401        
402        Returns:
403            Dictionary containing sensor information
404        """
405        return {
406            'data_types': self.metadata['data_types'],
407            'camera': self.metadata['camera'],
408            'sampling_frequency': self.metadata['sampling_frequency'],
409            'accelerometer_frequency': self.metadata['accelerometer_frequency']
410        }
411    
412    def get_activity_info(self) -> Dict[int, str]:
413        """
414        Get information about activities in the dataset.
415        
416        Returns:
417            Dictionary mapping activity IDs to labels
418        """
419        return self.metadata['activities']
420    
421    def get_feature_info(self) -> Dict[str, str]:
422        """
423        Get information about pre-extracted features.
424        
425        Returns:
426            Dictionary mapping feature names to descriptions
427        """
428        return self.metadata['feature_descriptions']
429
430
431# Legacy function wrappers for backward compatibility
432def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
433                     sequences: Optional[List[str]] = None,
434                     use_falls: bool = True, use_adls: bool = True):
435    """
436    Load UrFall dataset using the legacy function interface.
437    
438    Args:
439        data_dir: Directory containing the dataset
440        data_types: List of data types to load
441        sequences: List of specific sequences to load
442        use_falls: Whether to load fall sequences
443        use_adls: Whether to load ADL sequences
444        
445    Returns:
446        Tuple of (data_list, names_list)
447    """
448    loader = UrFallLoader()
449    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
450                           use_falls=use_falls, use_adls=use_adls)
451
452
453def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
454    """
455    Create sliding windows from UrFall data using the legacy function interface.
456    
457    Args:
458        urfall_data: List of DataFrames
459        urfall_names: List of names
460        window_size: Size of sliding window
461        step_size: Step size for sliding window
462        
463    Returns:
464        List of dictionaries containing windowed data
465    """
466    loader = UrFallLoader()
467    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)
class UrFallLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 24class UrFallLoader(BaseDatasetLoader):
 25    """
 26    UrFall dataset loader class.
 27    
 28    This class handles loading and processing of the UrFall dataset for fall detection.
 29    Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video,
 30    and pre-extracted features from depth maps.
 31    """
 32    
 33    def __init__(self):
 34        super().__init__(
 35            name="urfall",
 36            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data"
 37        )
 38        self.metadata = {
 39            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
 40            'camera': 'cam0',  # Front camera
 41            'sampling_frequency': 30,  # Depth/RGB camera fps
 42            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
 43            'activities': {
 44                -1: 'Not lying (standing/walking)',
 45                0: 'Falling (transient)',
 46                1: 'Lying on ground'
 47            },
 48            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
 49            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
 50            'feature_columns': [
 51                'sequence_name',
 52                'frame_number',
 53                'label',
 54                'HeightWidthRatio',
 55                'MajorMinorRatio',
 56                'BoundingBoxOccupancy',
 57                'MaxStdXZ',
 58                'HHmaxRatio',
 59                'H',
 60                'D',
 61                'P40'
 62            ],
 63            'feature_descriptions': {
 64                'HeightWidthRatio': 'Bounding box height to width ratio',
 65                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
 66                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
 67                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
 68                'HHmaxRatio': 'Human height in frame to standing height ratio',
 69                'H': 'Actual height in mm',
 70                'D': 'Distance of person center to floor in mm',
 71                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
 72            }
 73        }
 74    
 75    def load_data(self, data_dir: str, 
 76                  data_types: Optional[List[str]] = None,
 77                  sequences: Optional[List[str]] = None,
 78                  use_falls: bool = True,
 79                  use_adls: bool = True,
 80                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 81        """
 82        Load UrFall dataset from the specified directory.
 83        
 84        Args:
 85            data_dir: Directory containing the dataset
 86            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 87                       'synchronization', 'video', 'features' (default: ['features'])
 88            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 89                      If None, loads all based on use_falls and use_adls
 90            use_falls: Whether to load fall sequences (default: True)
 91            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 92            **kwargs: Additional arguments
 93            
 94        Returns:
 95            Tuple of (data_list, names_list)
 96        """
 97        # Default to loading pre-extracted features if not specified
 98        if data_types is None:
 99            data_types = ['features']
100        
101        # Validate data types
102        valid_types = set(self.metadata['data_types'])
103        requested_types = set(data_types)
104        invalid_types = requested_types - valid_types
105        if invalid_types:
106            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
107        
108        # Create directory if it doesn't exist
109        os.makedirs(data_dir, exist_ok=True)
110        
111        data_list = []
112        names_list = []
113        
114        # Load pre-extracted features (CSV files)
115        if 'features' in data_types:
116            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
117            data_list.extend(features_data)
118            names_list.extend(features_names)
119        
120        # Load raw accelerometer data
121        if 'accelerometer' in data_types:
122            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
123            data_list.extend(accel_data)
124            names_list.extend(accel_names)
125        
126        # Load synchronization data
127        if 'synchronization' in data_types:
128            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
129            data_list.extend(sync_data)
130            names_list.extend(sync_names)
131        
132        # Note: Depth, RGB, and Video data are image/video files
133        # These would require specialized loading and are not typically loaded into DataFrames
134        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
135            print("Note: Depth, RGB, and Video data types contain image/video files.")
136            print("These are not loaded into DataFrames but their paths can be accessed.")
137            print("Use the get_file_paths() method to retrieve paths to these files.")
138        
139        self.data = data_list
140        return data_list, names_list
141    
142    def _load_features(self, data_dir: str, sequences: Optional[List[str]], 
143                       use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
144        """
145        Load pre-extracted features from CSV files.
146        
147        Args:
148            data_dir: Directory containing the dataset
149            sequences: Specific sequences to load
150            use_falls: Whether to include fall sequences
151            use_adls: Whether to include ADL sequences
152            
153        Returns:
154            Tuple of (data_list, names_list)
155        """
156        data_list = []
157        names_list = []
158        
159        # Load falls features
160        if use_falls:
161            falls_csv = os.path.join(data_dir, "urfall-cam0-falls.csv")
162            if os.path.exists(falls_csv):
163                df = pd.read_csv(falls_csv, header=None, names=self.metadata['feature_columns'])
164                
165                # Filter by specific sequences if provided
166                if sequences is not None:
167                    fall_sequences = [s for s in sequences if s.startswith('fall-')]
168                    if fall_sequences:
169                        df = df[df['sequence_name'].isin(fall_sequences)]
170                
171                # Add metadata columns
172                df['activity_type'] = 'fall'
173                df['activity_id'] = 1  # Falls are labeled as 1
174                
175                data_list.append(df)
176                names_list.append("urfall-cam0-falls")
177            else:
178                print(f"Warning: Falls features file not found at {falls_csv}")
179        
180        # Load ADLs features
181        if use_adls:
182            adls_csv = os.path.join(data_dir, "urfall-cam0-adls.csv")
183            if os.path.exists(adls_csv):
184                df = pd.read_csv(adls_csv, header=None, names=self.metadata['feature_columns'])
185                
186                # Filter by specific sequences if provided
187                if sequences is not None:
188                    adl_sequences = [s for s in sequences if s.startswith('adl-')]
189                    if adl_sequences:
190                        df = df[df['sequence_name'].isin(adl_sequences)]
191                
192                # Add metadata columns
193                df['activity_type'] = 'adl'
194                df['activity_id'] = 0  # ADLs are labeled as 0
195                
196                data_list.append(df)
197                names_list.append("urfall-cam0-adls")
198            else:
199                print(f"Warning: ADLs features file not found at {adls_csv}")
200        
201        return data_list, names_list
202    
203    def _load_accelerometer(self, data_dir: str, sequences: Optional[List[str]],
204                            use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
205        """
206        Load accelerometer CSV data files.
207        
208        Args:
209            data_dir: Directory containing the dataset
210            sequences: Specific sequences to load
211            use_falls: Whether to include fall sequences
212            use_adls: Whether to include ADL sequences
213            
214        Returns:
215            Tuple of (data_list, names_list)
216        """
217        data_list = []
218        names_list = []
219        
220        # Determine which sequences to load
221        seq_list = []
222        if sequences is not None:
223            seq_list = sequences
224        else:
225            if use_falls:
226                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
227            if use_adls:
228                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
229        
230        # Load accelerometer data for each sequence
231        for seq in seq_list:
232            accel_file = os.path.join(data_dir, f"{seq}-acc.csv")
233            if os.path.exists(accel_file):
234                try:
235                    df = pd.read_csv(accel_file)
236                    df['sequence_name'] = seq
237                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
238                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
239                    data_list.append(df)
240                    names_list.append(f"{seq}-accelerometer")
241                except Exception as e:
242                    print(f"Warning: Could not load accelerometer data from {accel_file}: {e}")
243        
244        return data_list, names_list
245    
246    def _load_synchronization(self, data_dir: str, sequences: Optional[List[str]],
247                              use_falls: bool, use_adls: bool) -> Tuple[List[pd.DataFrame], List[str]]:
248        """
249        Load synchronization CSV data files.
250        
251        Args:
252            data_dir: Directory containing the dataset
253            sequences: Specific sequences to load
254            use_falls: Whether to include fall sequences
255            use_adls: Whether to include ADL sequences
256            
257        Returns:
258            Tuple of (data_list, names_list)
259        """
260        data_list = []
261        names_list = []
262        
263        # Determine which sequences to load
264        seq_list = []
265        if sequences is not None:
266            seq_list = sequences
267        else:
268            if use_falls:
269                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
270            if use_adls:
271                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
272        
273        # Load synchronization data for each sequence
274        for seq in seq_list:
275            sync_file = os.path.join(data_dir, f"{seq}-data.csv")
276            if os.path.exists(sync_file):
277                try:
278                    df = pd.read_csv(sync_file)
279                    df['sequence_name'] = seq
280                    df['activity_type'] = 'fall' if seq.startswith('fall-') else 'adl'
281                    df['activity_id'] = 1 if seq.startswith('fall-') else 0
282                    data_list.append(df)
283                    names_list.append(f"{seq}-synchronization")
284                except Exception as e:
285                    print(f"Warning: Could not load synchronization data from {sync_file}: {e}")
286        
287        return data_list, names_list
288    
289    def get_file_paths(self, data_dir: str, data_type: str, 
290                       sequences: Optional[List[str]] = None,
291                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
292        """
293        Get file paths for image/video data types (depth, RGB, video).
294        
295        Args:
296            data_dir: Directory containing the dataset
297            data_type: Type of data ('depth', 'rgb', 'video')
298            sequences: Specific sequences to get paths for
299            use_falls: Whether to include fall sequences
300            use_adls: Whether to include ADL sequences
301            
302        Returns:
303            Dictionary mapping sequence names to file paths
304        """
305        if data_type not in ['depth', 'rgb', 'video']:
306            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
307        
308        file_paths = {}
309        
310        # Determine which sequences to include
311        seq_list = []
312        if sequences is not None:
313            seq_list = sequences
314        else:
315            if use_falls:
316                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
317            if use_adls:
318                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
319        
320        # Map data type to file extension
321        extension_map = {
322            'depth': '-cam0-d.zip',
323            'rgb': '-cam0-rgb.zip',
324            'video': '-cam0.mp4'
325        }
326        
327        ext = extension_map[data_type]
328        
329        for seq in seq_list:
330            file_path = os.path.join(data_dir, f"{seq}{ext}")
331            if os.path.exists(file_path):
332                file_paths[seq] = file_path
333        
334        return file_paths
335    
336    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
337                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
338        """
339        Create sliding windows from the loaded data.
340        
341        Args:
342            data: List of DataFrames containing the dataset
343            names: List of names corresponding to each DataFrame
344            window_size: Size of the sliding window (default: 30 frames for depth features)
345            step_size: Step size for sliding window (default: 15 frames)
346            
347        Returns:
348            List of dictionaries containing windowed data
349        """
350        windows_data = []
351        
352        for idx, df in enumerate(data):
353            if df.empty:
354                continue
355            
356            # Get numeric feature columns (exclude metadata columns)
357            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
358            feature_cols = [col for col in df.columns 
359                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
360            
361            if not feature_cols:
362                continue
363            
364            windows = []
365            
366            # Create windows for each feature column
367            for col in feature_cols:
368                win = sliding_window(df[col].values, window_size, step_size)
369                windows.append({"name": col, "data": win})
370            
371            # Create windows for labels if present
372            if 'label' in df.columns:
373                label_windows = sliding_window(df['label'].values, window_size, step_size)
374                # Majority voting for each window
375                labels = []
376                for w in label_windows:
377                    vals, counts = np.unique(w, return_counts=True)
378                    labels.append(vals[np.argmax(counts)])
379                windows.append({"name": "labels", "data": np.array(labels)})
380            
381            # Create activity_id windows
382            if 'activity_id' in df.columns:
383                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
384                windows.append({"name": "activity_id", "data": activity_windows})
385            
386            windows_data.append({"name": names[idx], "windows": windows})
387        
388        return windows_data
389    
390    def get_supported_formats(self) -> List[str]:
391        """
392        Get list of supported file formats for UrFall dataset.
393        
394        Returns:
395            List of supported file extensions
396        """
397        return ['.csv', '.zip', '.mp4']
398    
399    def get_sensor_info(self) -> Dict[str, any]:
400        """
401        Get information about sensors in the dataset.
402        
403        Returns:
404            Dictionary containing sensor information
405        """
406        return {
407            'data_types': self.metadata['data_types'],
408            'camera': self.metadata['camera'],
409            'sampling_frequency': self.metadata['sampling_frequency'],
410            'accelerometer_frequency': self.metadata['accelerometer_frequency']
411        }
412    
413    def get_activity_info(self) -> Dict[int, str]:
414        """
415        Get information about activities in the dataset.
416        
417        Returns:
418            Dictionary mapping activity IDs to labels
419        """
420        return self.metadata['activities']
421    
422    def get_feature_info(self) -> Dict[str, str]:
423        """
424        Get information about pre-extracted features.
425        
426        Returns:
427            Dictionary mapping feature names to descriptions
428        """
429        return self.metadata['feature_descriptions']

UrFall dataset loader class.

This class handles loading and processing of the UrFall dataset for fall detection. Supports multiple data types: Depth, RGB, Accelerometer, Synchronization, Video, and pre-extracted features from depth maps.

UrFallLoader()
33    def __init__(self):
34        super().__init__(
35            name="urfall",
36            description="UrFall Dataset - University of Rzeszow Fall Detection Dataset with multimodal data"
37        )
38        self.metadata = {
39            'data_types': ['depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features'],
40            'camera': 'cam0',  # Front camera
41            'sampling_frequency': 30,  # Depth/RGB camera fps
42            'accelerometer_frequency': 100,  # Accelerometer sampling frequency (typical)
43            'activities': {
44                -1: 'Not lying (standing/walking)',
45                0: 'Falling (transient)',
46                1: 'Lying on ground'
47            },
48            'fall_sequences': list(range(1, 31)),  # fall-01 to fall-30
49            'adl_sequences': list(range(1, 21)),  # adl-01 to adl-20
50            'feature_columns': [
51                'sequence_name',
52                'frame_number',
53                'label',
54                'HeightWidthRatio',
55                'MajorMinorRatio',
56                'BoundingBoxOccupancy',
57                'MaxStdXZ',
58                'HHmaxRatio',
59                'H',
60                'D',
61                'P40'
62            ],
63            'feature_descriptions': {
64                'HeightWidthRatio': 'Bounding box height to width ratio',
65                'MajorMinorRatio': 'Major to minor axis ratio from BLOB segmentation',
66                'BoundingBoxOccupancy': 'Ratio of bounding box occupied by person pixels',
67                'MaxStdXZ': 'Standard deviation of pixels from centroid (X and Z axis)',
68                'HHmaxRatio': 'Human height in frame to standing height ratio',
69                'H': 'Actual height in mm',
70                'D': 'Distance of person center to floor in mm',
71                'P40': 'Ratio of point clouds in 40cm cuboid to full height cuboid'
72            }
73        }

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
def load_data( self, data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
 75    def load_data(self, data_dir: str, 
 76                  data_types: Optional[List[str]] = None,
 77                  sequences: Optional[List[str]] = None,
 78                  use_falls: bool = True,
 79                  use_adls: bool = True,
 80                  **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
 81        """
 82        Load UrFall dataset from the specified directory.
 83        
 84        Args:
 85            data_dir: Directory containing the dataset
 86            data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer',
 87                       'synchronization', 'video', 'features' (default: ['features'])
 88            sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01'])
 89                      If None, loads all based on use_falls and use_adls
 90            use_falls: Whether to load fall sequences (default: True)
 91            use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True)
 92            **kwargs: Additional arguments
 93            
 94        Returns:
 95            Tuple of (data_list, names_list)
 96        """
 97        # Default to loading pre-extracted features if not specified
 98        if data_types is None:
 99            data_types = ['features']
100        
101        # Validate data types
102        valid_types = set(self.metadata['data_types'])
103        requested_types = set(data_types)
104        invalid_types = requested_types - valid_types
105        if invalid_types:
106            raise ValueError(f"Invalid data types: {invalid_types}. Valid types: {valid_types}")
107        
108        # Create directory if it doesn't exist
109        os.makedirs(data_dir, exist_ok=True)
110        
111        data_list = []
112        names_list = []
113        
114        # Load pre-extracted features (CSV files)
115        if 'features' in data_types:
116            features_data, features_names = self._load_features(data_dir, sequences, use_falls, use_adls)
117            data_list.extend(features_data)
118            names_list.extend(features_names)
119        
120        # Load raw accelerometer data
121        if 'accelerometer' in data_types:
122            accel_data, accel_names = self._load_accelerometer(data_dir, sequences, use_falls, use_adls)
123            data_list.extend(accel_data)
124            names_list.extend(accel_names)
125        
126        # Load synchronization data
127        if 'synchronization' in data_types:
128            sync_data, sync_names = self._load_synchronization(data_dir, sequences, use_falls, use_adls)
129            data_list.extend(sync_data)
130            names_list.extend(sync_names)
131        
132        # Note: Depth, RGB, and Video data are image/video files
133        # These would require specialized loading and are not typically loaded into DataFrames
134        if 'depth' in data_types or 'rgb' in data_types or 'video' in data_types:
135            print("Note: Depth, RGB, and Video data types contain image/video files.")
136            print("These are not loaded into DataFrames but their paths can be accessed.")
137            print("Use the get_file_paths() method to retrieve paths to these files.")
138        
139        self.data = data_list
140        return data_list, names_list

Load UrFall dataset from the specified directory.

Args: data_dir: Directory containing the dataset data_types: List of data types to load. Options: 'depth', 'rgb', 'accelerometer', 'synchronization', 'video', 'features' (default: ['features']) sequences: List of specific sequences to load (e.g., ['fall-01', 'adl-01']) If None, loads all based on use_falls and use_adls use_falls: Whether to load fall sequences (default: True) use_adls: Whether to load ADL (Activities of Daily Living) sequences (default: True) **kwargs: Additional arguments

Returns: Tuple of (data_list, names_list)

def get_file_paths( self, data_dir: str, data_type: str, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
289    def get_file_paths(self, data_dir: str, data_type: str, 
290                       sequences: Optional[List[str]] = None,
291                       use_falls: bool = True, use_adls: bool = True) -> Dict[str, str]:
292        """
293        Get file paths for image/video data types (depth, RGB, video).
294        
295        Args:
296            data_dir: Directory containing the dataset
297            data_type: Type of data ('depth', 'rgb', 'video')
298            sequences: Specific sequences to get paths for
299            use_falls: Whether to include fall sequences
300            use_adls: Whether to include ADL sequences
301            
302        Returns:
303            Dictionary mapping sequence names to file paths
304        """
305        if data_type not in ['depth', 'rgb', 'video']:
306            raise ValueError(f"data_type must be one of: 'depth', 'rgb', 'video'. Got: {data_type}")
307        
308        file_paths = {}
309        
310        # Determine which sequences to include
311        seq_list = []
312        if sequences is not None:
313            seq_list = sequences
314        else:
315            if use_falls:
316                seq_list.extend([f"fall-{i:02d}" for i in range(1, 31)])
317            if use_adls:
318                seq_list.extend([f"adl-{i:02d}" for i in range(1, 21)])
319        
320        # Map data type to file extension
321        extension_map = {
322            'depth': '-cam0-d.zip',
323            'rgb': '-cam0-rgb.zip',
324            'video': '-cam0.mp4'
325        }
326        
327        ext = extension_map[data_type]
328        
329        for seq in seq_list:
330            file_path = os.path.join(data_dir, f"{seq}{ext}")
331            if os.path.exists(file_path):
332                file_paths[seq] = file_path
333        
334        return file_paths

Get file paths for image/video data types (depth, RGB, video).

Args: data_dir: Directory containing the dataset data_type: Type of data ('depth', 'rgb', 'video') sequences: Specific sequences to get paths for use_falls: Whether to include fall sequences use_adls: Whether to include ADL sequences

Returns: Dictionary mapping sequence names to file paths

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 30, step_size: int = 15) -> List[Dict]:
336    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str],
337                               window_size: int = 30, step_size: int = 15) -> List[Dict]:
338        """
339        Create sliding windows from the loaded data.
340        
341        Args:
342            data: List of DataFrames containing the dataset
343            names: List of names corresponding to each DataFrame
344            window_size: Size of the sliding window (default: 30 frames for depth features)
345            step_size: Step size for sliding window (default: 15 frames)
346            
347        Returns:
348            List of dictionaries containing windowed data
349        """
350        windows_data = []
351        
352        for idx, df in enumerate(data):
353            if df.empty:
354                continue
355            
356            # Get numeric feature columns (exclude metadata columns)
357            exclude_cols = ['sequence_name', 'frame_number', 'label', 'activity_type', 'activity_id']
358            feature_cols = [col for col in df.columns 
359                          if col not in exclude_cols and pd.api.types.is_numeric_dtype(df[col])]
360            
361            if not feature_cols:
362                continue
363            
364            windows = []
365            
366            # Create windows for each feature column
367            for col in feature_cols:
368                win = sliding_window(df[col].values, window_size, step_size)
369                windows.append({"name": col, "data": win})
370            
371            # Create windows for labels if present
372            if 'label' in df.columns:
373                label_windows = sliding_window(df['label'].values, window_size, step_size)
374                # Majority voting for each window
375                labels = []
376                for w in label_windows:
377                    vals, counts = np.unique(w, return_counts=True)
378                    labels.append(vals[np.argmax(counts)])
379                windows.append({"name": "labels", "data": np.array(labels)})
380            
381            # Create activity_id windows
382            if 'activity_id' in df.columns:
383                activity_windows = sliding_window(df['activity_id'].values, window_size, step_size)
384                windows.append({"name": "activity_id", "data": activity_windows})
385            
386            windows_data.append({"name": names[idx], "windows": windows})
387        
388        return windows_data

Create sliding windows from the loaded data.

Args: data: List of DataFrames containing the dataset names: List of names corresponding to each DataFrame window_size: Size of the sliding window (default: 30 frames for depth features) step_size: Step size for sliding window (default: 15 frames)

Returns: List of dictionaries containing windowed data

def get_supported_formats(self) -> List[str]:
390    def get_supported_formats(self) -> List[str]:
391        """
392        Get list of supported file formats for UrFall dataset.
393        
394        Returns:
395            List of supported file extensions
396        """
397        return ['.csv', '.zip', '.mp4']

Get list of supported file formats for UrFall dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, <built-in function any>]:
399    def get_sensor_info(self) -> Dict[str, any]:
400        """
401        Get information about sensors in the dataset.
402        
403        Returns:
404            Dictionary containing sensor information
405        """
406        return {
407            'data_types': self.metadata['data_types'],
408            'camera': self.metadata['camera'],
409            'sampling_frequency': self.metadata['sampling_frequency'],
410            'accelerometer_frequency': self.metadata['accelerometer_frequency']
411        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
413    def get_activity_info(self) -> Dict[int, str]:
414        """
415        Get information about activities in the dataset.
416        
417        Returns:
418            Dictionary mapping activity IDs to labels
419        """
420        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to labels

def get_feature_info(self) -> Dict[str, str]:
422    def get_feature_info(self) -> Dict[str, str]:
423        """
424        Get information about pre-extracted features.
425        
426        Returns:
427            Dictionary mapping feature names to descriptions
428        """
429        return self.metadata['feature_descriptions']

Get information about pre-extracted features.

Returns: Dictionary mapping feature names to descriptions

def load_urfall_data( data_dir: str, data_types: Optional[List[str]] = None, sequences: Optional[List[str]] = None, use_falls: bool = True, use_adls: bool = True):
433def load_urfall_data(data_dir: str, data_types: Optional[List[str]] = None,
434                     sequences: Optional[List[str]] = None,
435                     use_falls: bool = True, use_adls: bool = True):
436    """
437    Load UrFall dataset using the legacy function interface.
438    
439    Args:
440        data_dir: Directory containing the dataset
441        data_types: List of data types to load
442        sequences: List of specific sequences to load
443        use_falls: Whether to load fall sequences
444        use_adls: Whether to load ADL sequences
445        
446    Returns:
447        Tuple of (data_list, names_list)
448    """
449    loader = UrFallLoader()
450    return loader.load_data(data_dir, data_types=data_types, sequences=sequences,
451                           use_falls=use_falls, use_adls=use_adls)

Load UrFall dataset using the legacy function interface.

Args: data_dir: Directory containing the dataset data_types: List of data types to load sequences: List of specific sequences to load use_falls: Whether to load fall sequences use_adls: Whether to load ADL sequences

Returns: Tuple of (data_list, names_list)

def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
454def create_urfall_windows(urfall_data, urfall_names, window_size=30, step_size=15):
455    """
456    Create sliding windows from UrFall data using the legacy function interface.
457    
458    Args:
459        urfall_data: List of DataFrames
460        urfall_names: List of names
461        window_size: Size of sliding window
462        step_size: Step size for sliding window
463        
464    Returns:
465        List of dictionaries containing windowed data
466    """
467    loader = UrFallLoader()
468    return loader.create_sliding_windows(urfall_data, urfall_names, window_size, step_size)

Create sliding windows from UrFall data using the legacy function interface.

Args: urfall_data: List of DataFrames urfall_names: List of names window_size: Size of sliding window step_size: Step size for sliding window

Returns: List of dictionaries containing windowed data