gaitsetpy.dataset.harup

HAR-UP Dataset Loader and Utils. Maintainer: @aharshit123456

This file contains the HAR-UP dataset loader class that inherits from BaseDatasetLoader. HAR-UP is a multimodal dataset for human activity recognition and fall detection.

Reference:

  1'''
  2HAR-UP Dataset Loader and Utils.
  3Maintainer: @aharshit123456
  4
  5This file contains the HAR-UP dataset loader class that inherits from BaseDatasetLoader.
  6HAR-UP is a multimodal dataset for human activity recognition and fall detection.
  7
  8Reference:
  9- Website: https://sites.google.com/up.edu.mx/har-up/
 10- GitHub: https://github.com/jpnm561/HAR-UP
 11'''
 12
 13import os
 14import pandas as pd
 15import numpy as np
 16from typing import List, Dict, Tuple, Optional
 17from glob import glob
 18import datetime
 19from tqdm import tqdm
 20from ..core.base_classes import BaseDatasetLoader
 21from .utils import download_dataset, extract_dataset, sliding_window
 22from ..features.harup_features import HARUPFeatureExtractor
 23
 24
 25class HARUPLoader(BaseDatasetLoader):
 26    """
 27    HAR-UP dataset loader class.
 28    
 29    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 30    and fall detection analysis.
 31    """
 32    
 33    def __init__(self):
 34        super().__init__(
 35            name="harup",
 36            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition"
 37        )
 38        self.metadata = {
 39            'sensors': [
 40                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 41                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 42                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 43                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 44                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 45                'BrainSensor', 'Infrared'
 46            ],
 47            'components': {
 48                'Accelerometer': ['x', 'y', 'z'],
 49                'AngularVelocity': ['x', 'y', 'z'],
 50                'Luminosity': ['illuminance'],
 51                'BrainSensor': ['value'],
 52                'Infrared': ['value']
 53            },
 54            'sampling_frequency': 100,  # Hz
 55            'activities': {
 56                1: 'Walking',
 57                2: 'Walking upstairs',
 58                3: 'Walking downstairs',
 59                4: 'Sitting',
 60                5: 'Standing',
 61                6: 'Lying',
 62                7: 'Falling forward using hands',
 63                8: 'Falling forward using knees',
 64                9: 'Falling backwards',
 65                10: 'Falling sideward',
 66                11: 'Falling sitting in empty chair'
 67            }
 68        }
 69        
 70        # Features used in HAR-UP
 71        self.features = [
 72            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 73            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 74            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 75            'Energy'
 76        ]
 77    
 78    def download_harup_data(self, data_dir: str) -> Optional[str]:
 79        """
 80        Download HAR-UP dataset if not already present.
 81        
 82        Args:
 83            data_dir: Directory to store the dataset
 84            
 85        Returns:
 86            Path to the extracted dataset or None if not found
 87        """
 88        # Use the utility function to download and extract the dataset
 89        download_dataset("harup", data_dir)
 90        extract_dataset("harup", data_dir)
 91        
 92        # Check if dataset exists after download attempt
 93        dataset_path = os.path.join(data_dir, "DataSet")
 94        if not os.path.exists(dataset_path):
 95            print("HAR-UP dataset not found after download attempt.")
 96            print("Please ensure the dataset is organized in the following structure:")
 97            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
 98            return None
 99        
100        return dataset_path
101    
102    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
103                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
104                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
105        """
106        Load HAR-UP dataset from the specified directory.
107        Args:
108            data_dir: Directory containing the dataset
109            subjects: List of subject IDs to load (default: all subjects)
110            activities: List of activity IDs to load (default: all activities)
111            trials: List of trial IDs to load (default: all trials)
112            **kwargs: Additional arguments
113        Returns:
114            Tuple of (data_list, names_list)
115        """
116        import re
117        import os
118        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
119        if subjects is None:
120            subjects = list(range(1, 5))  # 4 subjects
121        if activities is None:
122            activities = list(range(1, 12))  # 11 activities
123        if trials is None:
124            trials = list(range(1, 4))  # 3 trials
125
126        # Column names as per official HAR-UP documentation
127        columns = [
128            "Timestamp",
129            "EEG_NeuroSky",
130            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
131            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
132            "Belt_Luminosity",
133            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
134            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
135            "Neck_Luminosity",
136            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
137            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
138            "Pocket_Luminosity",
139            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
140            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
141            "Wrist_Luminosity",
142            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
143        ]
144
145        # If data_dir does not exist, trigger interactive download
146        if not os.path.exists(data_dir):
147            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
148            self.download_harup_data(data_dir)
149        # If still doesn't exist, error out
150        if not os.path.exists(data_dir):
151            print(f"Failed to create or download dataset directory: {data_dir}")
152            return [], []
153
154        # Find the UP_Fall_Detection_Dataset directory
155        dataset_path = None
156        for entry in os.listdir(data_dir):
157            entry_path = os.path.join(data_dir, entry)
158            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
159                dataset_path = entry_path
160                break
161        if dataset_path is None:
162            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
163            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
164            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
165            return [], []
166
167        harup_data = []
168        harup_names = []
169
170        # Iterate over subjects
171        for subject_id in subjects:
172            subject_folder = f"Subject_{subject_id:02d}"
173            subject_path = os.path.join(dataset_path, subject_folder)
174            if not os.path.isdir(subject_path):
175                continue
176            
177            # Initialize empty DataFrame for this subject
178            subject_df = pd.DataFrame()
179            
180            # Iterate over activities in order
181            for activity_id in sorted(activities):
182                activity_folder = f"A{activity_id:02d}"
183                activity_path = os.path.join(subject_path, activity_folder)
184                if not os.path.isdir(activity_path):
185                    continue
186                
187                # Iterate over trials in order
188                for trial_id in sorted(trials):
189                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
190                    file_path = os.path.join(activity_path, file_name)
191                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
192                    
193                    try:
194                        df = pd.read_csv(file_path, header=0)
195                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
196                        df['subject_id'] = subject_id
197                        df['activity_id'] = activity_id 
198                        df['trial_id'] = trial_id
199                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
200                        
201                        # Concatenate to subject's DataFrame
202                        subject_df = pd.concat([subject_df, df], ignore_index=True)
203                        harup_names.append(name)
204                        
205                    except Exception as e:
206                        print(f"Error loading {file_path}: {e}")
207            
208            # Add complete subject DataFrame to data list
209            if not subject_df.empty:
210                harup_data.append(subject_df)
211                
212        self.data = harup_data
213        self.names = harup_names
214
215        return harup_data, harup_names
216    
217    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
218                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
219        """
220        Create sliding windows from the HAR-UP dataset.
221        
222        Args:
223            data: List of DataFrames containing HAR-UP data
224            names: List of names corresponding to the data
225            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
226            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
227            
228        Returns:
229            List of dictionaries containing sliding windows for each DataFrame
230        """
231        windows_data = []
232        
233        for idx, df in enumerate(data):
234            if df.empty:
235                continue
236                
237            windows = []
238            processed_columns = set()
239            
240            # Only use numeric columns (skip TIME and any non-numeric)
241            sensor_columns = [col for col in df.columns if col not in 
242                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
243                             and pd.api.types.is_numeric_dtype(df[col])]
244            
245
246            # Process each sensor column
247            for col in sensor_columns:
248                if col not in processed_columns:
249                    
250                    window_data = sliding_window(df[col], window_size, step_size)
251                    windows.append({"name": col, "data": window_data})
252                    processed_columns.add(col)
253            
254            # Include activity ID for each window
255            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
256            windows.append({"name": "activity_id", "data": activity_windows})
257            
258            # For each window, take the most common activity ID as the label
259            labels = []
260            for window in activity_windows:
261                # Get most common activity in this window
262                unique_vals, counts = np.unique(window, return_counts=True)
263                most_common_idx = np.argmax(counts)
264                labels.append(unique_vals[most_common_idx])
265            
266            windows.append({"name": "labels", "data": np.array(labels)})
267            
268            windows_data.append({"name": names[idx], "windows": windows})
269        
270        return windows_data
271    
272    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
273                       freq_domain_features: bool = True) -> List[Dict]:
274        """
275        Extract features from sliding windows using HAR-UP feature extraction methods.
276        Args:
277            windows_data: List of dictionaries containing sliding windows
278            time_domain_features: Whether to extract time domain features
279            freq_domain_features: Whether to extract frequency domain features
280        Returns:
281            List of dictionaries containing extracted features
282        """
283        # Mapping from original sensor names to actual CSV column names
284        sensor_map = {
285            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
286            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
287            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
288            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
289            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
290            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
291            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
292            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
293            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
294            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
295            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
296            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
297            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
298            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
299            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
300            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
301            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
302            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
303            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
304            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
305            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
306            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
307            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
308            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
309            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
310            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
311            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
312            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
313            'BrainSensor': 'HELMET_RAW',
314            'Infrared1': 'IR_1',
315            'Infrared2': 'IR_2',
316            'Infrared3': 'IR_3',
317            'Infrared4': 'IR_4',
318        }
319        extractor = HARUPFeatureExtractor(verbose=True)
320        extractor.config['time_domain'] = time_domain_features
321        extractor.config['frequency_domain'] = freq_domain_features
322        all_features = []
323        for window_dict in windows_data:
324            name = window_dict["name"]
325            windows = window_dict["windows"]
326            labels = None
327            for window in windows:
328                if window["name"] == "labels":
329                    labels = window["data"]
330                    break
331            if labels is None:
332                print(f"No labels found for {name}, skipping feature extraction")
333                continue
334            filtered_windows = []
335            missing = []
336            for orig_sensor, csv_col in sensor_map.items():
337                found = False
338                for window in windows:
339                    if window["name"] == csv_col:
340                        filtered_windows.append(window)
341                        found = True
342                        break
343                if not found:
344                    missing.append((orig_sensor, csv_col))
345            if missing:
346                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
347            for window in windows:
348                if window["name"] == "activity_id" or window["name"] == "labels":
349                    filtered_windows.append(window)
350            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
351            for i, feature in enumerate(features):
352                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
353                if window_idx < len(labels):
354                    feature["label"] = labels[window_idx]
355            all_features.append({"name": name, "features": features})
356        return all_features
357    
358    def get_supported_formats(self) -> List[str]:
359        """
360        Get list of supported file formats for HAR-UP dataset.
361        
362        Returns:
363            List of supported file extensions
364        """
365        return ['.csv']
366    
367    def get_sensor_info(self) -> Dict[str, List[str]]:
368        """
369        Get information about sensors in the dataset.
370        
371        Returns:
372            Dictionary containing sensor information
373        """
374        return {
375            'sensors': self.metadata['sensors'],
376            'components': self.metadata['components'],
377            'sampling_frequency': self.metadata['sampling_frequency']
378        }
379    
380    def get_activity_info(self) -> Dict[int, str]:
381        """
382        Get information about activities in the dataset.
383        
384        Returns:
385            Dictionary mapping activity IDs to descriptions
386        """
387        return self.metadata['activities']
388
389
390# Legacy function wrappers for backward compatibility
391def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
392    """
393    Legacy function for loading HAR-UP data.
394    
395    Args:
396        data_dir: Directory containing the dataset
397        subjects: List of subject IDs to load (default: all subjects)
398        activities: List of activity IDs to load (default: all activities)
399        trials: List of trial IDs to load (default: all trials)
400        
401    Returns:
402        Tuple of (data_list, names_list)
403    """
404    loader = HARUPLoader()
405    return loader.load_data(data_dir, subjects, activities, trials)
406
407
408def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
409    """
410    Legacy function for creating sliding windows from HAR-UP data.
411    
412    Args:
413        harup_data: List of dataframes containing HAR-UP data
414        harup_names: List of names of the HAR-UP dataframes
415        window_size: Size of the sliding window
416        step_size: Step size for the sliding window
417        
418    Returns:
419        List of dictionaries containing sliding windows for each DataFrame
420    """
421    loader = HARUPLoader()
422    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)
423
424
425def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
426    """
427    Legacy function for extracting features from HAR-UP windows.
428    
429    Args:
430        windows_data: List of dictionaries containing sliding windows
431        time_domain: Whether to extract time domain features
432        freq_domain: Whether to extract frequency domain features
433        
434    Returns:
435        List of dictionaries containing extracted features
436    """
437    loader = HARUPLoader()
438    return loader.extract_features(windows_data, time_domain, freq_domain)
class HARUPLoader(gaitsetpy.core.base_classes.BaseDatasetLoader):
 26class HARUPLoader(BaseDatasetLoader):
 27    """
 28    HAR-UP dataset loader class.
 29    
 30    This class handles loading and processing of the HAR-UP dataset for human activity recognition
 31    and fall detection analysis.
 32    """
 33    
 34    def __init__(self):
 35        super().__init__(
 36            name="harup",
 37            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition"
 38        )
 39        self.metadata = {
 40            'sensors': [
 41                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
 42                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
 43                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
 44                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
 45                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
 46                'BrainSensor', 'Infrared'
 47            ],
 48            'components': {
 49                'Accelerometer': ['x', 'y', 'z'],
 50                'AngularVelocity': ['x', 'y', 'z'],
 51                'Luminosity': ['illuminance'],
 52                'BrainSensor': ['value'],
 53                'Infrared': ['value']
 54            },
 55            'sampling_frequency': 100,  # Hz
 56            'activities': {
 57                1: 'Walking',
 58                2: 'Walking upstairs',
 59                3: 'Walking downstairs',
 60                4: 'Sitting',
 61                5: 'Standing',
 62                6: 'Lying',
 63                7: 'Falling forward using hands',
 64                8: 'Falling forward using knees',
 65                9: 'Falling backwards',
 66                10: 'Falling sideward',
 67                11: 'Falling sitting in empty chair'
 68            }
 69        }
 70        
 71        # Features used in HAR-UP
 72        self.features = [
 73            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
 74            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
 75            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
 76            'Energy'
 77        ]
 78    
 79    def download_harup_data(self, data_dir: str) -> Optional[str]:
 80        """
 81        Download HAR-UP dataset if not already present.
 82        
 83        Args:
 84            data_dir: Directory to store the dataset
 85            
 86        Returns:
 87            Path to the extracted dataset or None if not found
 88        """
 89        # Use the utility function to download and extract the dataset
 90        download_dataset("harup", data_dir)
 91        extract_dataset("harup", data_dir)
 92        
 93        # Check if dataset exists after download attempt
 94        dataset_path = os.path.join(data_dir, "DataSet")
 95        if not os.path.exists(dataset_path):
 96            print("HAR-UP dataset not found after download attempt.")
 97            print("Please ensure the dataset is organized in the following structure:")
 98            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
 99            return None
100        
101        return dataset_path
102    
103    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
104                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
105                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
106        """
107        Load HAR-UP dataset from the specified directory.
108        Args:
109            data_dir: Directory containing the dataset
110            subjects: List of subject IDs to load (default: all subjects)
111            activities: List of activity IDs to load (default: all activities)
112            trials: List of trial IDs to load (default: all trials)
113            **kwargs: Additional arguments
114        Returns:
115            Tuple of (data_list, names_list)
116        """
117        import re
118        import os
119        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
120        if subjects is None:
121            subjects = list(range(1, 5))  # 4 subjects
122        if activities is None:
123            activities = list(range(1, 12))  # 11 activities
124        if trials is None:
125            trials = list(range(1, 4))  # 3 trials
126
127        # Column names as per official HAR-UP documentation
128        columns = [
129            "Timestamp",
130            "EEG_NeuroSky",
131            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
132            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
133            "Belt_Luminosity",
134            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
135            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
136            "Neck_Luminosity",
137            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
138            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
139            "Pocket_Luminosity",
140            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
141            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
142            "Wrist_Luminosity",
143            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
144        ]
145
146        # If data_dir does not exist, trigger interactive download
147        if not os.path.exists(data_dir):
148            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
149            self.download_harup_data(data_dir)
150        # If still doesn't exist, error out
151        if not os.path.exists(data_dir):
152            print(f"Failed to create or download dataset directory: {data_dir}")
153            return [], []
154
155        # Find the UP_Fall_Detection_Dataset directory
156        dataset_path = None
157        for entry in os.listdir(data_dir):
158            entry_path = os.path.join(data_dir, entry)
159            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
160                dataset_path = entry_path
161                break
162        if dataset_path is None:
163            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
164            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
165            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
166            return [], []
167
168        harup_data = []
169        harup_names = []
170
171        # Iterate over subjects
172        for subject_id in subjects:
173            subject_folder = f"Subject_{subject_id:02d}"
174            subject_path = os.path.join(dataset_path, subject_folder)
175            if not os.path.isdir(subject_path):
176                continue
177            
178            # Initialize empty DataFrame for this subject
179            subject_df = pd.DataFrame()
180            
181            # Iterate over activities in order
182            for activity_id in sorted(activities):
183                activity_folder = f"A{activity_id:02d}"
184                activity_path = os.path.join(subject_path, activity_folder)
185                if not os.path.isdir(activity_path):
186                    continue
187                
188                # Iterate over trials in order
189                for trial_id in sorted(trials):
190                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
191                    file_path = os.path.join(activity_path, file_name)
192                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
193                    
194                    try:
195                        df = pd.read_csv(file_path, header=0)
196                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
197                        df['subject_id'] = subject_id
198                        df['activity_id'] = activity_id 
199                        df['trial_id'] = trial_id
200                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
201                        
202                        # Concatenate to subject's DataFrame
203                        subject_df = pd.concat([subject_df, df], ignore_index=True)
204                        harup_names.append(name)
205                        
206                    except Exception as e:
207                        print(f"Error loading {file_path}: {e}")
208            
209            # Add complete subject DataFrame to data list
210            if not subject_df.empty:
211                harup_data.append(subject_df)
212                
213        self.data = harup_data
214        self.names = harup_names
215
216        return harup_data, harup_names
217    
218    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
219                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
220        """
221        Create sliding windows from the HAR-UP dataset.
222        
223        Args:
224            data: List of DataFrames containing HAR-UP data
225            names: List of names corresponding to the data
226            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
227            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
228            
229        Returns:
230            List of dictionaries containing sliding windows for each DataFrame
231        """
232        windows_data = []
233        
234        for idx, df in enumerate(data):
235            if df.empty:
236                continue
237                
238            windows = []
239            processed_columns = set()
240            
241            # Only use numeric columns (skip TIME and any non-numeric)
242            sensor_columns = [col for col in df.columns if col not in 
243                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
244                             and pd.api.types.is_numeric_dtype(df[col])]
245            
246
247            # Process each sensor column
248            for col in sensor_columns:
249                if col not in processed_columns:
250                    
251                    window_data = sliding_window(df[col], window_size, step_size)
252                    windows.append({"name": col, "data": window_data})
253                    processed_columns.add(col)
254            
255            # Include activity ID for each window
256            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
257            windows.append({"name": "activity_id", "data": activity_windows})
258            
259            # For each window, take the most common activity ID as the label
260            labels = []
261            for window in activity_windows:
262                # Get most common activity in this window
263                unique_vals, counts = np.unique(window, return_counts=True)
264                most_common_idx = np.argmax(counts)
265                labels.append(unique_vals[most_common_idx])
266            
267            windows.append({"name": "labels", "data": np.array(labels)})
268            
269            windows_data.append({"name": names[idx], "windows": windows})
270        
271        return windows_data
272    
273    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
274                       freq_domain_features: bool = True) -> List[Dict]:
275        """
276        Extract features from sliding windows using HAR-UP feature extraction methods.
277        Args:
278            windows_data: List of dictionaries containing sliding windows
279            time_domain_features: Whether to extract time domain features
280            freq_domain_features: Whether to extract frequency domain features
281        Returns:
282            List of dictionaries containing extracted features
283        """
284        # Mapping from original sensor names to actual CSV column names
285        sensor_map = {
286            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
287            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
288            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
289            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
290            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
291            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
292            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
293            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
294            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
295            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
296            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
297            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
298            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
299            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
300            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
301            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
302            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
303            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
304            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
305            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
306            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
307            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
308            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
309            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
310            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
311            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
312            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
313            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
314            'BrainSensor': 'HELMET_RAW',
315            'Infrared1': 'IR_1',
316            'Infrared2': 'IR_2',
317            'Infrared3': 'IR_3',
318            'Infrared4': 'IR_4',
319        }
320        extractor = HARUPFeatureExtractor(verbose=True)
321        extractor.config['time_domain'] = time_domain_features
322        extractor.config['frequency_domain'] = freq_domain_features
323        all_features = []
324        for window_dict in windows_data:
325            name = window_dict["name"]
326            windows = window_dict["windows"]
327            labels = None
328            for window in windows:
329                if window["name"] == "labels":
330                    labels = window["data"]
331                    break
332            if labels is None:
333                print(f"No labels found for {name}, skipping feature extraction")
334                continue
335            filtered_windows = []
336            missing = []
337            for orig_sensor, csv_col in sensor_map.items():
338                found = False
339                for window in windows:
340                    if window["name"] == csv_col:
341                        filtered_windows.append(window)
342                        found = True
343                        break
344                if not found:
345                    missing.append((orig_sensor, csv_col))
346            if missing:
347                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
348            for window in windows:
349                if window["name"] == "activity_id" or window["name"] == "labels":
350                    filtered_windows.append(window)
351            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
352            for i, feature in enumerate(features):
353                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
354                if window_idx < len(labels):
355                    feature["label"] = labels[window_idx]
356            all_features.append({"name": name, "features": features})
357        return all_features
358    
359    def get_supported_formats(self) -> List[str]:
360        """
361        Get list of supported file formats for HAR-UP dataset.
362        
363        Returns:
364            List of supported file extensions
365        """
366        return ['.csv']
367    
368    def get_sensor_info(self) -> Dict[str, List[str]]:
369        """
370        Get information about sensors in the dataset.
371        
372        Returns:
373            Dictionary containing sensor information
374        """
375        return {
376            'sensors': self.metadata['sensors'],
377            'components': self.metadata['components'],
378            'sampling_frequency': self.metadata['sampling_frequency']
379        }
380    
381    def get_activity_info(self) -> Dict[int, str]:
382        """
383        Get information about activities in the dataset.
384        
385        Returns:
386            Dictionary mapping activity IDs to descriptions
387        """
388        return self.metadata['activities']

HAR-UP dataset loader class.

This class handles loading and processing of the HAR-UP dataset for human activity recognition and fall detection analysis.

HARUPLoader()
34    def __init__(self):
35        super().__init__(
36            name="harup",
37            description="HAR-UP Dataset - Multimodal System for Fall Detection and Human Activity Recognition"
38        )
39        self.metadata = {
40            'sensors': [
41                'AnkleAccelerometer', 'AnkleAngularVelocity', 'AnkleLuminosity',
42                'RightPocketAccelerometer', 'RightPocketAngularVelocity', 'RightPocketLuminosity',
43                'BeltAccelerometer', 'BeltAngularVelocity', 'BeltLuminosity',
44                'NeckAccelerometer', 'NeckAngularVelocity', 'NeckLuminosity',
45                'WristAccelerometer', 'WristAngularVelocity', 'WristLuminosity',
46                'BrainSensor', 'Infrared'
47            ],
48            'components': {
49                'Accelerometer': ['x', 'y', 'z'],
50                'AngularVelocity': ['x', 'y', 'z'],
51                'Luminosity': ['illuminance'],
52                'BrainSensor': ['value'],
53                'Infrared': ['value']
54            },
55            'sampling_frequency': 100,  # Hz
56            'activities': {
57                1: 'Walking',
58                2: 'Walking upstairs',
59                3: 'Walking downstairs',
60                4: 'Sitting',
61                5: 'Standing',
62                6: 'Lying',
63                7: 'Falling forward using hands',
64                8: 'Falling forward using knees',
65                9: 'Falling backwards',
66                10: 'Falling sideward',
67                11: 'Falling sitting in empty chair'
68            }
69        }
70        
71        # Features used in HAR-UP
72        self.features = [
73            'Mean', 'StandardDeviation', 'RootMeanSquare', 'MaximalAmplitude',
74            'MinimalAmplitude', 'Median', 'Number of zero-crossing', 'Skewness',
75            'Kurtosis', 'First Quartile', 'Third Quartile', 'Autocorrelation',
76            'Energy'
77        ]

Initialize the dataset loader.

Args: name: Name of the dataset description: Description of the dataset

metadata
features
def download_harup_data(self, data_dir: str) -> Optional[str]:
 79    def download_harup_data(self, data_dir: str) -> Optional[str]:
 80        """
 81        Download HAR-UP dataset if not already present.
 82        
 83        Args:
 84            data_dir: Directory to store the dataset
 85            
 86        Returns:
 87            Path to the extracted dataset or None if not found
 88        """
 89        # Use the utility function to download and extract the dataset
 90        download_dataset("harup", data_dir)
 91        extract_dataset("harup", data_dir)
 92        
 93        # Check if dataset exists after download attempt
 94        dataset_path = os.path.join(data_dir, "DataSet")
 95        if not os.path.exists(dataset_path):
 96            print("HAR-UP dataset not found after download attempt.")
 97            print("Please ensure the dataset is organized in the following structure:")
 98            print("DataSet/Subject{i}/Activity{j}/Trial{k}/Subject{i}Activity{j}Trial{k}.csv")
 99            return None
100        
101        return dataset_path

Download HAR-UP dataset if not already present.

Args: data_dir: Directory to store the dataset

Returns: Path to the extracted dataset or None if not found

def load_data( self, data_dir: str, subjects: Optional[List[int]] = None, activities: Optional[List[int]] = None, trials: Optional[List[int]] = None, **kwargs) -> Tuple[List[pandas.core.frame.DataFrame], List[str]]:
103    def load_data(self, data_dir: str, subjects: Optional[List[int]] = None, 
104                activities: Optional[List[int]] = None, trials: Optional[List[int]] = None,
105                **kwargs) -> Tuple[List[pd.DataFrame], List[str]]:
106        """
107        Load HAR-UP dataset from the specified directory.
108        Args:
109            data_dir: Directory containing the dataset
110            subjects: List of subject IDs to load (default: all subjects)
111            activities: List of activity IDs to load (default: all activities)
112            trials: List of trial IDs to load (default: all trials)
113            **kwargs: Additional arguments
114        Returns:
115            Tuple of (data_list, names_list)
116        """
117        import re
118        import os
119        # Set default values if not provided (HAR-UP: 4 subjects, 11 activities, 3 trials)
120        if subjects is None:
121            subjects = list(range(1, 5))  # 4 subjects
122        if activities is None:
123            activities = list(range(1, 12))  # 11 activities
124        if trials is None:
125            trials = list(range(1, 4))  # 3 trials
126
127        # Column names as per official HAR-UP documentation
128        columns = [
129            "Timestamp",
130            "EEG_NeuroSky",
131            "Belt_Acc_X", "Belt_Acc_Y", "Belt_Acc_Z",
132            "Belt_Gyro_X", "Belt_Gyro_Y", "Belt_Gyro_Z",
133            "Belt_Luminosity",
134            "Neck_Acc_X", "Neck_Acc_Y", "Neck_Acc_Z",
135            "Neck_Gyro_X", "Neck_Gyro_Y", "Neck_Gyro_Z",
136            "Neck_Luminosity",
137            "Pocket_Acc_X", "Pocket_Acc_Y", "Pocket_Acc_Z",
138            "Pocket_Gyro_X", "Pocket_Gyro_Y", "Pocket_Gyro_Z",
139            "Pocket_Luminosity",
140            "Wrist_Acc_X", "Wrist_Acc_Y", "Wrist_Acc_Z",
141            "Wrist_Gyro_X", "Wrist_Gyro_Y", "Wrist_Gyro_Z",
142            "Wrist_Luminosity",
143            "Infrared_1", "Infrared_2", "Infrared_3", "Infrared_4"
144        ]
145
146        # If data_dir does not exist, trigger interactive download
147        if not os.path.exists(data_dir):
148            print(f"Directory {data_dir} does not exist. Attempting to download HAR-UP dataset...")
149            self.download_harup_data(data_dir)
150        # If still doesn't exist, error out
151        if not os.path.exists(data_dir):
152            print(f"Failed to create or download dataset directory: {data_dir}")
153            return [], []
154
155        # Find the UP_Fall_Detection_Dataset directory
156        dataset_path = None
157        for entry in os.listdir(data_dir):
158            entry_path = os.path.join(data_dir, entry)
159            if os.path.isdir(entry_path) and entry.startswith("UP_Fall_Detection_Dataset"):
160                dataset_path = entry_path
161                break
162        if dataset_path is None:
163            print("UP_Fall_Detection_Dataset directory not found in", data_dir)
164            print("No data loaded. Please make sure you've downloaded the HAR-UP dataset.")
165            print("Visit https://sites.google.com/up.edu.mx/har-up/ to download the dataset.")
166            return [], []
167
168        harup_data = []
169        harup_names = []
170
171        # Iterate over subjects
172        for subject_id in subjects:
173            subject_folder = f"Subject_{subject_id:02d}"
174            subject_path = os.path.join(dataset_path, subject_folder)
175            if not os.path.isdir(subject_path):
176                continue
177            
178            # Initialize empty DataFrame for this subject
179            subject_df = pd.DataFrame()
180            
181            # Iterate over activities in order
182            for activity_id in sorted(activities):
183                activity_folder = f"A{activity_id:02d}"
184                activity_path = os.path.join(subject_path, activity_folder)
185                if not os.path.isdir(activity_path):
186                    continue
187                
188                # Iterate over trials in order
189                for trial_id in sorted(trials):
190                    file_name = f"S{subject_id:02d}_A{activity_id:02d}_T{trial_id:02d}.csv"
191                    file_path = os.path.join(activity_path, file_name)
192                    name = f"{subject_folder}_{activity_folder}_T{trial_id:02d}"
193                    
194                    try:
195                        df = pd.read_csv(file_path, header=0)
196                        print(f"[HARUP] Loaded columns for {file_name}: {list(df.columns)}")
197                        df['subject_id'] = subject_id
198                        df['activity_id'] = activity_id 
199                        df['trial_id'] = trial_id
200                        df['activity_label'] = self.metadata['activities'].get(activity_id, f"A{activity_id:02d}")
201                        
202                        # Concatenate to subject's DataFrame
203                        subject_df = pd.concat([subject_df, df], ignore_index=True)
204                        harup_names.append(name)
205                        
206                    except Exception as e:
207                        print(f"Error loading {file_path}: {e}")
208            
209            # Add complete subject DataFrame to data list
210            if not subject_df.empty:
211                harup_data.append(subject_df)
212                
213        self.data = harup_data
214        self.names = harup_names
215
216        return harup_data, harup_names

Load HAR-UP dataset from the specified directory. Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials) **kwargs: Additional arguments Returns: Tuple of (data_list, names_list)

def create_sliding_windows( self, data: List[pandas.core.frame.DataFrame], names: List[str], window_size: int = 100, step_size: int = 50) -> List[Dict]:
218    def create_sliding_windows(self, data: List[pd.DataFrame], names: List[str], 
219                             window_size: int = 100, step_size: int = 50) -> List[Dict]:
220        """
221        Create sliding windows from the HAR-UP dataset.
222        
223        Args:
224            data: List of DataFrames containing HAR-UP data
225            names: List of names corresponding to the data
226            window_size: Size of the sliding window (default: 100 = 1 second at 100Hz)
227            step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)
228            
229        Returns:
230            List of dictionaries containing sliding windows for each DataFrame
231        """
232        windows_data = []
233        
234        for idx, df in enumerate(data):
235            if df.empty:
236                continue
237                
238            windows = []
239            processed_columns = set()
240            
241            # Only use numeric columns (skip TIME and any non-numeric)
242            sensor_columns = [col for col in df.columns if col not in 
243                             ['subject_id', 'activity_id', 'trial_id', 'activity_label', 'TIME']
244                             and pd.api.types.is_numeric_dtype(df[col])]
245            
246
247            # Process each sensor column
248            for col in sensor_columns:
249                if col not in processed_columns:
250                    
251                    window_data = sliding_window(df[col], window_size, step_size)
252                    windows.append({"name": col, "data": window_data})
253                    processed_columns.add(col)
254            
255            # Include activity ID for each window
256            activity_windows = sliding_window(df["activity_id"], window_size, step_size)
257            windows.append({"name": "activity_id", "data": activity_windows})
258            
259            # For each window, take the most common activity ID as the label
260            labels = []
261            for window in activity_windows:
262                # Get most common activity in this window
263                unique_vals, counts = np.unique(window, return_counts=True)
264                most_common_idx = np.argmax(counts)
265                labels.append(unique_vals[most_common_idx])
266            
267            windows.append({"name": "labels", "data": np.array(labels)})
268            
269            windows_data.append({"name": names[idx], "windows": windows})
270        
271        return windows_data

Create sliding windows from the HAR-UP dataset.

Args: data: List of DataFrames containing HAR-UP data names: List of names corresponding to the data window_size: Size of the sliding window (default: 100 = 1 second at 100Hz) step_size: Step size for the sliding window (default: 50 = 0.5 seconds at 100Hz)

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_features( self, windows_data: List[Dict], time_domain_features: bool = True, freq_domain_features: bool = True) -> List[Dict]:
273    def extract_features(self, windows_data: List[Dict], time_domain_features: bool = True,
274                       freq_domain_features: bool = True) -> List[Dict]:
275        """
276        Extract features from sliding windows using HAR-UP feature extraction methods.
277        Args:
278            windows_data: List of dictionaries containing sliding windows
279            time_domain_features: Whether to extract time domain features
280            freq_domain_features: Whether to extract frequency domain features
281        Returns:
282            List of dictionaries containing extracted features
283        """
284        # Mapping from original sensor names to actual CSV column names
285        sensor_map = {
286            'BeltAccelerometer: x-axis (g)': 'BELT_ACC_X',
287            'BeltAccelerometer: y-axis (g)': 'BELT_ACC_Y',
288            'BeltAccelerometer: z-axis (g)': 'BELT_ACC_Z',
289            'BeltAngularVelocity: x-axis (deg/s)': 'BELT_ANG_X',
290            'BeltAngularVelocity: y-axis (deg/s)': 'BELT_ANG_Y',
291            'BeltAngularVelocity: z-axis (deg/s)': 'BELT_ANG_Z',
292            'BeltLuminosity: illuminance (lx)': 'BELT_LUMINOSITY',
293            'NeckAccelerometer: x-axis (g)': 'NECK_ACC_X',
294            'NeckAccelerometer: y-axis (g)': 'NECK_ACC_Y',
295            'NeckAccelerometer: z-axis (g)': 'NECK_ACC_Z',
296            'NeckAngularVelocity: x-axis (deg/s)': 'NECK_ANG_X',
297            'NeckAngularVelocity: y-axis (deg/s)': 'NECK_ANG_Y',
298            'NeckAngularVelocity: z-axis (deg/s)': 'NECK_ANG_Z',
299            'NeckLuminosity: illuminance (lx)': 'NECK_LUMINOSITY',
300            'PocketAccelerometer: x-axis (g)': 'PCKT_ACC_X',
301            'PocketAccelerometer: y-axis (g)': 'PCKT_ACC_Y',
302            'PocketAccelerometer: z-axis (g)': 'PCKT_ACC_Z',
303            'PocketAngularVelocity: x-axis (deg/s)': 'PCKT_ANG_X',
304            'PocketAngularVelocity: y-axis (deg/s)': 'PCKT_ANG_Y',
305            'PocketAngularVelocity: z-axis (deg/s)': 'PCKT_ANG_Z',
306            'PocketLuminosity: illuminance (lx)': 'PCKT_LUMINOSITY',
307            'WristAccelerometer: x-axis (g)': 'WRST_ACC_X',
308            'WristAccelerometer: y-axis (g)': 'WRST_ACC_Y',
309            'WristAccelerometer: z-axis (g)': 'WRST_ACC_Z',
310            'WristAngularVelocity: x-axis (deg/s)': 'WRST_ANG_X',
311            'WristAngularVelocity: y-axis (deg/s)': 'WRST_ANG_Y',
312            'WristAngularVelocity: z-axis (deg/s)': 'WRST_ANG_Z',
313            'WristLuminosity: illuminance (lx)': 'WRST_LUMINOSITY',
314            'BrainSensor': 'HELMET_RAW',
315            'Infrared1': 'IR_1',
316            'Infrared2': 'IR_2',
317            'Infrared3': 'IR_3',
318            'Infrared4': 'IR_4',
319        }
320        extractor = HARUPFeatureExtractor(verbose=True)
321        extractor.config['time_domain'] = time_domain_features
322        extractor.config['frequency_domain'] = freq_domain_features
323        all_features = []
324        for window_dict in windows_data:
325            name = window_dict["name"]
326            windows = window_dict["windows"]
327            labels = None
328            for window in windows:
329                if window["name"] == "labels":
330                    labels = window["data"]
331                    break
332            if labels is None:
333                print(f"No labels found for {name}, skipping feature extraction")
334                continue
335            filtered_windows = []
336            missing = []
337            for orig_sensor, csv_col in sensor_map.items():
338                found = False
339                for window in windows:
340                    if window["name"] == csv_col:
341                        filtered_windows.append(window)
342                        found = True
343                        break
344                if not found:
345                    missing.append((orig_sensor, csv_col))
346            if missing:
347                print(f"[HARUP] Missing columns for {name}: {[m[1] for m in missing]}")
348            for window in windows:
349                if window["name"] == "activity_id" or window["name"] == "labels":
350                    filtered_windows.append(window)
351            features = extractor.extract_features(filtered_windows, fs=self.metadata['sampling_frequency'])
352            for i, feature in enumerate(features):
353                window_idx = i // (len(filtered_windows) - 2)  # Subtract 2 for labels and activity_id
354                if window_idx < len(labels):
355                    feature["label"] = labels[window_idx]
356            all_features.append({"name": name, "features": features})
357        return all_features

Extract features from sliding windows using HAR-UP feature extraction methods. Args: windows_data: List of dictionaries containing sliding windows time_domain_features: Whether to extract time domain features freq_domain_features: Whether to extract frequency domain features Returns: List of dictionaries containing extracted features

def get_supported_formats(self) -> List[str]:
359    def get_supported_formats(self) -> List[str]:
360        """
361        Get list of supported file formats for HAR-UP dataset.
362        
363        Returns:
364            List of supported file extensions
365        """
366        return ['.csv']

Get list of supported file formats for HAR-UP dataset.

Returns: List of supported file extensions

def get_sensor_info(self) -> Dict[str, List[str]]:
368    def get_sensor_info(self) -> Dict[str, List[str]]:
369        """
370        Get information about sensors in the dataset.
371        
372        Returns:
373            Dictionary containing sensor information
374        """
375        return {
376            'sensors': self.metadata['sensors'],
377            'components': self.metadata['components'],
378            'sampling_frequency': self.metadata['sampling_frequency']
379        }

Get information about sensors in the dataset.

Returns: Dictionary containing sensor information

def get_activity_info(self) -> Dict[int, str]:
381    def get_activity_info(self) -> Dict[int, str]:
382        """
383        Get information about activities in the dataset.
384        
385        Returns:
386            Dictionary mapping activity IDs to descriptions
387        """
388        return self.metadata['activities']

Get information about activities in the dataset.

Returns: Dictionary mapping activity IDs to descriptions

def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
392def load_harup_data(data_dir: str, subjects=None, activities=None, trials=None):
393    """
394    Legacy function for loading HAR-UP data.
395    
396    Args:
397        data_dir: Directory containing the dataset
398        subjects: List of subject IDs to load (default: all subjects)
399        activities: List of activity IDs to load (default: all activities)
400        trials: List of trial IDs to load (default: all trials)
401        
402    Returns:
403        Tuple of (data_list, names_list)
404    """
405    loader = HARUPLoader()
406    return loader.load_data(data_dir, subjects, activities, trials)

Legacy function for loading HAR-UP data.

Args: data_dir: Directory containing the dataset subjects: List of subject IDs to load (default: all subjects) activities: List of activity IDs to load (default: all activities) trials: List of trial IDs to load (default: all trials)

Returns: Tuple of (data_list, names_list)

def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
409def create_harup_windows(harup_data, harup_names, window_size=100, step_size=50):
410    """
411    Legacy function for creating sliding windows from HAR-UP data.
412    
413    Args:
414        harup_data: List of dataframes containing HAR-UP data
415        harup_names: List of names of the HAR-UP dataframes
416        window_size: Size of the sliding window
417        step_size: Step size for the sliding window
418        
419    Returns:
420        List of dictionaries containing sliding windows for each DataFrame
421    """
422    loader = HARUPLoader()
423    return loader.create_sliding_windows(harup_data, harup_names, window_size, step_size)

Legacy function for creating sliding windows from HAR-UP data.

Args: harup_data: List of dataframes containing HAR-UP data harup_names: List of names of the HAR-UP dataframes window_size: Size of the sliding window step_size: Step size for the sliding window

Returns: List of dictionaries containing sliding windows for each DataFrame

def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
426def extract_harup_features(windows_data, time_domain=True, freq_domain=True):
427    """
428    Legacy function for extracting features from HAR-UP windows.
429    
430    Args:
431        windows_data: List of dictionaries containing sliding windows
432        time_domain: Whether to extract time domain features
433        freq_domain: Whether to extract frequency domain features
434        
435    Returns:
436        List of dictionaries containing extracted features
437    """
438    loader = HARUPLoader()
439    return loader.extract_features(windows_data, time_domain, freq_domain)

Legacy function for extracting features from HAR-UP windows.

Args: windows_data: List of dictionaries containing sliding windows time_domain: Whether to extract time domain features freq_domain: Whether to extract frequency domain features

Returns: List of dictionaries containing extracted features