Given, a complex dataframe df: (expand for full code)
from skrub.datasets import fetch_employee_salaries
dataset = fetch_employee_salaries()
df = dataset.X
y = dataset.y
df
gender department department_name division assignment_category employee_position_title date_first_hired year_first_hired
1 M FRS Fire and Rescue Services Third Battalion - Administration Fulltime-Regular Fire/Rescue Lieutenant 06/07/2004 2004
2 M HHS Department of Health and Human Services Environmental Health and Regulatory Services Fulltime-Regular Environmental Health Specialist III 02/20/2007 2007
... ... ... ... ... ... ... ... ...
9226 M DGS Department of General Services Facilities Maintenance Fulltime-Regular Master Plumber 03/26/2001 2001
9227 F HHS Department of Health and Human Services Infants and Toddlers Fulltime-Regular Program Specialist II 03/25/2013 2013
from sklearn.model_selection import cross_val_score
from skrub import tabular_pipeline
cross_val_score(tabular_pipeline('regressor'), df, y)
array([0.89370447, 0.89279068, 0.92282557, 0.92319094, 0.92162666])