Metadata-Version: 2.1
Name: homebase_calendar_sync
Version: 0.1.0
Summary: A simple web scraper that reads gethomebase.com's schedule and updates Google Calendar.
Home-page: https://github.com/dmidlo/homebase_calendar_sync
Author: David Midlo
Author-email: dmidlo@gmail.com
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent
Requires-Python: >=3.6
Description-Content-Type: text/markdown
Requires-Dist: anyio==4.4.0
Requires-Dist: attrs==23.2.0
Requires-Dist: beautifulsoup4==4.12.3
Requires-Dist: black==24.4.2
Requires-Dist: cachetools==5.3.3
Requires-Dist: certifi==2024.2.2
Requires-Dist: cffi==1.16.0
Requires-Dist: charset-normalizer==3.3.2
Requires-Dist: click==8.1.7
Requires-Dist: cryptography==42.0.7
Requires-Dist: google-api-core==2.19.0
Requires-Dist: google-api-python-client==2.131.0
Requires-Dist: google-auth==2.29.0
Requires-Dist: google-auth-httplib2==0.2.0
Requires-Dist: google-auth-oauthlib==1.2.0
Requires-Dist: googleapis-common-protos==1.63.0
Requires-Dist: h11==0.14.0
Requires-Dist: httpcore==1.0.5
Requires-Dist: httplib2==0.22.0
Requires-Dist: httpx==0.27.0
Requires-Dist: idna==3.7
Requires-Dist: markdown-it-py==3.0.0
Requires-Dist: mdurl==0.1.2
Requires-Dist: mypy-extensions==1.0.0
Requires-Dist: oauthlib==3.2.2
Requires-Dist: outcome==1.3.0.post0
Requires-Dist: packaging==24.0
Requires-Dist: pathspec==0.12.1
Requires-Dist: pendulum==3.0.0
Requires-Dist: platformdirs==4.2.2
Requires-Dist: proto-plus==1.23.0
Requires-Dist: protobuf==4.25.3
Requires-Dist: pyasn1==0.6.0
Requires-Dist: pyasn1_modules==0.4.0
Requires-Dist: pycparser==2.22
Requires-Dist: Pygments==2.18.0
Requires-Dist: pyparsing==3.1.2
Requires-Dist: PySocks==1.7.1
Requires-Dist: python-dateutil==2.9.0.post0
Requires-Dist: python-dotenv==1.0.1
Requires-Dist: PyYAML==6.0.1
Requires-Dist: requests==2.32.3
Requires-Dist: requests-oauthlib==2.0.0
Requires-Dist: rich==13.7.1
Requires-Dist: rsa==4.9
Requires-Dist: ruff==0.4.7
Requires-Dist: selenium==4.21.0
Requires-Dist: six==1.16.0
Requires-Dist: sniffio==1.3.1
Requires-Dist: sortedcontainers==2.4.0
Requires-Dist: soupsieve==2.5
Requires-Dist: time-machine==2.14.1
Requires-Dist: trio==0.25.1
Requires-Dist: trio-websocket==0.11.1
Requires-Dist: typing_extensions==4.12.0
Requires-Dist: tzdata==2024.1
Requires-Dist: uritemplate==4.1.1
Requires-Dist: urllib3==2.2.1
Requires-Dist: wsproto==1.2.0

```.
├── README.md
├── __pycache__
│   └── config.cpython-312.pyc
├── events.db
├── requirements.txt
├── setup.py
└── src
    └── homebase_calendar_sync
        ├── __init__.py
        ├── __main__.py
        ├── __pycache__
        │   ├── config.cpython-312.pyc
        │   └── homebase_calendar_sync.cpython-312.pyc
        ├── config.py
        ├── db
        │   ├── __pycache__
        │   │   └── models.cpython-312.pyc
        │   └── models.py
        ├── google_client
        │   ├── __pycache__
        │   │   ├── auth.cpython-312.pyc
        │   │   ├── drive_types.cpython-312.pyc
        │   │   └── google_client.cpython-312.pyc
        │   ├── auth.py
        │   ├── drive_types.py
        │   └── google_client.py
        └── homebase_calendar_sync.py
```

```setup.py
from setuptools import setup, find_packages

setup(
    name='homebase_calendar_sync',
    version='0.1.0',
    author='David Midlo',
    author_email='dmidlo@gmail.com',
    description='A simple web scraper that reads gethomebase.com's schedule and updates google calendar.',
    long_description=open('README.md').read(),
    long_description_content_type='text/markdown',
    url='https://github.com/dmidlo/homebase_calendar_sync',  # Update this to your project's URL
    packages=find_packages(),
    install_requires=open('requirements.txt').read().splitlines(),
    classifiers=[
        'Programming Language :: Python :: 3',
        'License :: OSI Approved :: MIT License',
        'Operating System :: OS Independent',
    ],
    python_requires='>=3.6',
)
```

```
import os
import json
import httpx
from bs4 import BeautifulSoup
import pendulum
from pathlib import Path
from dotenv import load_dotenv
from rich import print
import hashlib

import config
from db.models import setup_database, connect_database
from google_client.auth import Metadata
from google_client.google_client import GoogleClient

DOTENV_BASE_DIR = Path(__file__).parent.parent.parent
load_dotenv(Path(DOTENV_BASE_DIR, ".env"))

HOMEBASE_USERNAME = os.environ["CC_HOMEBASE_USERNAME"]
HOMEBASE_PASSWORD = os.environ["CC_HOMEBASE_PASSWORD"]
EMPLOYEE_FIRSTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_FIRSTNAME"]
EMPLOYEE_LASTNAME = os.environ["CC_HOMEBASE_EMPLOYEE_LASTNAME"]
START_DATE = os.environ["CC_HOMEBASE_START_DATE"]
END_DATE = os.environ["CC_HOMEBASE_END_DATE"]
LOOKAHEAD = os.environ["CC_HOMEBASE_LOOKAHEAD"]
LOOKAHEAD = LOOKAHEAD.lower() in ["true", "1", "t", "y", "yes"]
LOOKAHEAD_DAYS = os.environ["CC_HOMEBASE_DAYS_LOOKAHEAD"]
LOOKAHEAD_DAYS = int(LOOKAHEAD_DAYS)


class HomebaseScheduleScraper:
    def __init__(
        self, username, password, first_name, last_name, start_date, end_date
    ) -> None:
        self.username = username
        self.password = password
        self.start_date, self.end_date = self.initialize_date_range(
            start_date, end_date
        )
        self.login_url = "https://app.joinhomebase.com/accounts/sign-in"
        self.base_schedule_url = (
            "https://app.joinhomebase.com/api/fe/schedule_builder/schedule?"
        )
        self.client = httpx.Client()
        self.login_payload = {
            "authenticity_token": self.get_authenticity_token(),
            "account[login]": username,
            "account[password]": password,
            "account[remember_me]": 0,
        }
        self.login()
        self.calendar_json = json.loads(self.get_calendar_json())
        self.employee_first_name = first_name
        self.employee_last_name = last_name
        self.employee_id = self.get_employee_id()
        self.employee_jobs = self.get_employee_jobs()
        self.employee_shifts = self.get_employee_shifts()
        self.employee_shifts_in_range = self.filter_shifts_by_date()
        self.close()

    def close(self):
        self.client.close()

    def get_login_form(self):
        response = self.client.get(self.login_url)

        if response.status_code == 200:
            html_content = BeautifulSoup(response.text, "html.parser")

            return html_content.find("form", method="post")
        else:
            print(f"Failed to retrieve the page. Status Code: {response.status_code}")

    def get_authenticity_token(self):
        login_form = self.get_login_form()
        if login_form:
            input_element = login_form.find(
                "input", attrs={"name": "authenticity_token", "type": "hidden"}
            )
            return input_element.get("value")
        else:
            print("No input element with `name='authenticity_token'` found.")

    def login(self):
        response = self.client.post(self.login_url, data=self.login_payload)

        if response.status_code == 200:
            print(f"Homebase Login Successful. Status Code: {response.status_code}")
        else:
            print(f"Homebase Login failed. Status Code: {response.status_code}")

    def get_schedule_route(self):
        route = f"{self.base_schedule_url}end_date={self.end_date.to_date_string()}&start_date={self.start_date.to_date_string()}"
        print(route)
        return route

    def get_calendar_json(self):
        response = self.client.get(self.get_schedule_route())

        if response.status_code == 200:
            return response.text
        else:
            print(f"Failed to retrieve the page. Status Code: {response.status_code}")

    def get_employee_id(self):
        for _ in self.calendar_json["included"]:
            if _["type"] == "user" and (
                str(_["attributes"]["firstName"]).lower()
                == self.employee_first_name.lower()
                and str(_["attributes"]["lastName"]).lower()
                == self.employee_last_name.lower()
            ):
                return _["id"]

    def get_employee_jobs(self):
        return [
            _["id"]
            for _ in self.calendar_json["included"]
            if _["type"] == "job"
            and _["relationships"]["user"]["data"]["id"] == self.employee_id
        ]

    def get_employee_shifts(self):
        return (
            _
            for _ in self.calendar_json["included"]
            if _["type"] == "shift"
            and _["relationships"]["owner"]["data"]["id"] in self.employee_jobs
        )

    def initialize_date_range(self, start_date, end_date):
        if start_date == "today":
            start = pendulum.now().start_of("day")
        else:
            start = pendulum.parse(start_date).start_of("day")
        if end_date == "today":
            end = pendulum.now().end_of("day")
        else:
            end = pendulum.parse(end_date).end_of("day")

        if LOOKAHEAD:
            start = start.start_of("week")
            end = end.add(days=LOOKAHEAD_DAYS).end_of("week")

        return start, end

    def filter_shifts_by_date(self):
        return (
            _
            for _ in self.employee_shifts
            if self.start_date
            <= pendulum.parse(_["attributes"]["startAt"])
            <= self.end_date
        )

    def get_employee_shifts_json(self):
        shifts = []

        for _ in self.employee_shifts_in_range:
            shift = {
                "shiftId": _["id"],
                "firstName": self.employee_first_name,
                "lastName": self.employee_last_name,
                "jobRole": _["attributes"]["roleName"],
                "shiftDate": pendulum.parse(
                    _["attributes"]["startAt"]
                ).to_date_string(),
                "startTime": pendulum.parse(
                    _["attributes"]["startAt"]
                ).to_time_string(),
                "endTime": pendulum.parse(_["attributes"]["endAt"]).to_time_string(),
            }

            shifts.append(shift)

        return json.dumps(shifts)


class HomebaseCalendarSync:
    def __init__(self) -> None:
        config.META = Metadata.metadata_singleton_factory()
        config.META.check_for_client_secret_and_import()
        config.GOOGLE = GoogleClient()
        setup_database()
        self.scraper = HomebaseScheduleScraper(
            HOMEBASE_USERNAME,
            HOMEBASE_PASSWORD,
            EMPLOYEE_FIRSTNAME,
            EMPLOYEE_LASTNAME,
            START_DATE,
            END_DATE,
        )
        self.primary_calendar = config.GOOGLE.get_primary_calendar()
        self.primary_calendar_events = config.GOOGLE.get_calendar_events(
            self.primary_calendar["id"]
        )
        self.remote_homebase_shifts = json.loads(
            self.scraper.get_employee_shifts_json()
        )

    def __call__(self):
        self.update_events_db_from_remote()
        self.add_homebase_shifts()
        config.DB.close()

    def get_event_hash(self, event: dict) -> str:
        event_str = json.dumps(event, sort_keys=True)
        return hashlib.sha512(event_str.encode("utf-8")).hexdigest()

    def update_events_db_from_remote(self):
        connect_database()
        remote_events = set()

        for event in self.primary_calendar_events:
            event_id = event["id"]
            event_hash = self.get_event_hash(event)
            remote_events.add(event_id)
            from_homebase = 0  # 0/1 - False/True
            homebase_shift_id = None

            homebase_event = event.get("source")
            if homebase_event:
                shift_id_source = homebase_event["title"].split("-")

                if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
                    homebase_shift_id = shift_id_source[1]
                    from_homebase = 1

            config.DB_CURSOR.execute(
                "SELECT hash FROM events WHERE event_id = ?", (event_id,)
            )
            row = config.DB_CURSOR.fetchone()

            if row is None:
                config.DB_CURSOR.execute(
                    "INSERT INTO events (event_id, hash, from_homebase, homebase_shift_id) VALUES (?, ?, ?, ?)",
                    (event_id, event_hash, from_homebase, homebase_shift_id),
                )
                # print(f"New event added: {event_id}")
            elif row[0] != event_hash:
                config.DB_CURSOR.execute(
                    "UPDATE events SET hash = ? WHERE event_id = ?",
                    (event_hash, event_id),
                )
                print(f"Event updated: {event_id}")
            config.DB.commit()

        # Prune Local Events to match remote
        config.DB_CURSOR.execute("SELECT event_id FROM events")
        local_events = {row[0] for row in config.DB_CURSOR.fetchall()}
        events_to_delete = local_events - remote_events
        for event_id in events_to_delete:
            config.DB_CURSOR.execute(
                "DELETE FROM events WHERE event_id = ?", (event_id,)
            )
            print(f"Event deleted: {event_id}")

        config.DB.commit()

    def get_homebase_events(self) -> set:
        homebase_events = set()

        for _ in self.primary_calendar_events:
            if _.get("source"):
                shift_id_source = _["source"]["title"].split("-")

                if len(shift_id_source) > 1 and shift_id_source[0] == "homebaseShiftId":
                    homebase_events.add(shift_id_source[1])
        return homebase_events

    def add_homebase_shifts(self):
        connect_database()
        remote_shifts = {_["shiftId"] for _ in self.remote_homebase_shifts}
        homebase_events = self.get_homebase_events()

        for shift in self.remote_homebase_shifts:
            shift_hash = self.get_event_hash(shift)
            config.DB_CURSOR.execute(
                "SELECT hash FROM shifts WHERE homebase_shift_id = ?",
                (shift["shiftId"],),
            )
            row = config.DB_CURSOR.fetchone()

            local_time = pendulum.now()
            start = pendulum.parse(
                f"{shift["shiftDate"]} {shift["startTime"]}",
                tz=local_time.timezone_name,
            )
            end = pendulum.parse(
                f"{shift["shiftDate"]} {shift["endTime"]}", tz=local_time.timezone_name
            )
            event = {
                "summary": f"Homebase - {shift["jobRole"]}",
                "description": f"{shift["firstName"]} {shift["lastName"]}",
                "start": {
                    "dateTime": start.to_iso8601_string(),
                    "timeZone": local_time.timezone_name,
                },
                "end": {
                    "dateTime": end.to_iso8601_string(),
                    "timeZone": local_time.timezone_name,
                },
                "source": {
                    "title": f"homebaseShiftId-{shift["shiftId"]}",
                    "url": "https://app.joinhomebase.com/",
                },
            }

            if row is None:
                config.DB_CURSOR.execute(
                    "INSERT INTO shifts (homebase_shift_id, hash) VALUES (?, ?)",
                    (shift["shiftId"], shift_hash),
                )
                print(f"New shift added: {shift_hash}")
                config.DB.commit()

                config.DB_CURSOR.execute(
                    "SELECT hash FROM events WHERE homebase_shift_id = ?",
                    (shift["shiftId"],),
                )
                row = config.DB_CURSOR.fetchone()

                if row is None and shift["shiftId"] not in homebase_events:
                    config.GOOGLE.create_new_event(self.primary_calendar["id"], event)

            elif row[0] != shift_hash:
                config.DB_CURSOR.execute(
                    "UPDATE shifts SET hash = ? WHERE homebase_shift_id = ?",
                    (shift_hash, shift["shiftId"]),
                )
                print(f"Shift updated: {shift_hash}")
                # TODO: for CRUD operations, this is where integration code for UPDATES to
                # TODO  homebase's shift times would be processed.
                config.DB.commit()
            else:
                if shift["shiftId"] not in homebase_events:
                    config.GOOGLE.create_new_event(self.primary_calendar["id"], event)

            # Prune Local Events to match remote
            config.DB_CURSOR.execute("SELECT homebase_shift_id FROM shifts")
            local_shifts = {row[0] for row in config.DB_CURSOR.fetchall()}
            shifts_to_delete = local_shifts - remote_shifts
            for event_id in shifts_to_delete:
                config.DB_CURSOR.execute(
                    "DELETE FROM shifts WHERE homebase_shift_id = ?", (event_id,)
                )
                print(f"Shift deleted: {event_id}")

        config.DB.commit()
        self.update_events_db_from_remote()


def main():
    sync = HomebaseCalendarSync()
    sync()


if __name__ == "__main__":
    main()
```

====================

complete my setup.py file with an entrypoint on `def main` and use twine and keyring for deployment
