[[driver, attachable], [overlay], [app2-network], [weaviate-data, traefiklog], [external], [traefik-dynamic-configuration.yml], [external], [external], [public-genaiele.crt, private-genaiele.key], [app2-network], [--api.insecure=true, --providers.docker, --accesslog=true, --log.level=DEBUG, --accesslog.filePath=/logs/access.log, --entrypoints.web.address=:80, --entrypoints.websecure.address=:443, --entrypoints.websecure.asDefault=true, --providers.docker.exposedByDefault=false, --global.sendAnonymousUsage=false], [80:80, 443:443, 18080:8080], [/var/run/docker.sock:/var/run/docker.sock:ro, traefiklog:/logs/], [source, target], [traefik-dynamic-configuration.yml], [public-genaiele.crt, private-genaiele.key], [image, networks, command, ports, volumes, configs, secrets], [traefik:v3.3], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [delay], [240s], [replicas, restart_policy], [image, tty, networks, depends_on, environment, deploy], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [delay], [240s], [replicas, restart_policy], [image, tty, networks, depends_on, environment, deploy], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [image, tty, networks, depends_on, environment], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [image, tty, networks, depends_on, environment], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [traefik.enable=true, traefik.http.routers.querying_service.tls=true], [image, tty, networks, depends_on, environment, labels], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [traefik.enable=true, traefik.http.routers.auth_service.tls=true], [image, tty, networks, depends_on, environment, labels], [app2-network], [weaviate], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}, CICD_VPH_USER=${CICD_VPH_USER}, CICD_VPH_PW=${CICD_VPH_PW}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [traefik.enable=true, traefik.http.routers.documents_service.tls=true], [image, tty, networks, depends_on, environment, labels], [app2-network], [CICD_ENV=${CICD_ENV}, CICD_PORTKEY=${CICD_PORTKEY}, CICD_WEAVIATE_APIKEY=${CICD_WEAVIATE_APIKEY}], [image, networks, environment], [app2-network], [traefik.enable=true, traefik.http.routers.frontend.rule=PathPrefix(`/`), traefik.http.routers.frontend.tls=true], [image, tty, networks, labels], [app2-network], [traefik.enable=true, traefik.http.routers.whoami.entrypoints=websecure, traefik.http.routers.whoami.tls=true], [image, networks, labels], [traefik/whoami], [8080:8080, 50051:50051], [app2-network], [type, source, target], [bind, /home/ubuntu/elevidys-rag/backups, /tmp/backups], [weaviate-data:/wv_data], [QUERY_DEFAULTS_LIMIT, LIMIT_RESOURCES, PERSISTENCE_DATA_PATH, DEFAULT_VECTORIZER_MODULE, ENABLE_MODULES, BACKUP_S3_BUCKET, BACKUP_S3_PATH, BACKUP_FILESYSTEM_PATH, TRANSFORMERS_INFERENCE_API, CLUSTER_HOSTNAME, DISABLE_TELEMETRY, AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED, AUTHENTICATION_APIKEY_ENABLED, AUTHENTICATION_APIKEY_ALLOWED_KEYS, AUTHENTICATION_APIKEY_USERS], [true, /wv_data, text2vec-transformers, text2vec-transformers,backup-filesystem,backup-s3, weaviate-backup-dev-890742574233, wv_backups, /tmp/backups, http://t2v-transformers:8080, node1, true, false, true, ${CICD_WEAVIATE_APIKEY}, elevydis], [image, restart, ports, networks, volumes, environment], [semitechnologies/weaviate:1.25.32, on-failure:0], [app2-network], [8081:8080], [ENABLE_CUDA], [image, networks, ports, environment], [app2-network], [7777:7777], [WEAVIATE_URL, WEAVIATE_API_KEYS], [http://weaviate:8080, ${CICD_WEAVIATE_APIKEY}], [weaviate], [traefik.enable=false, traefik.http.routers.weaviateui.tls=true], [container_name, image, networks, ports, environment, depends_on, labels], [weaviate_ui, naaive/weaviate-ui:v1.0.3], [38888:80], [app2-network], [traefik.enable=false, traefik.http.routers.projector.tls=true], [image, ports, networks, labels], [traefik, pdf2task_service, task2json_service, tjson2text_service, embedding_service, querying_service, auth_service, documents_service, agentic, frontend, whoami, weaviate, t2v-transformers, weaviate-ui, projector-ui], [networks, volumes, configs, secrets, services], [driver], [bridge], [app1-network], [SERVICE_NAME], [pdf2task], [context, dockerfile, args], [./src, ./services/pdf2task/Dockerfile], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [app1-network], [weaviate], [build, container_name, tty, environment, networks, depends_on], [pdf2task_service], [SERVICE_NAME], [task2json], [context, dockerfile, args], [./src, ./services/task2json/Dockerfile], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [app1-network], [weaviate], [build, container_name, tty, environment, networks, depends_on], [task2json_service], [SERVICE_NAME], [tjson2text], [args, context, dockerfile], [./src, ./services/tjson2text/Dockerfile], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [app1-network], [weaviate], [build, container_name, tty, environment, networks, depends_on], [tjson2text_service], [SERVICE_NAME], [embedding], [context, args, dockerfile], [./src, ./services/embedding/Dockerfile], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [app1-network], [weaviate], [build, container_name, tty, environment, networks, depends_on], [embedding_service], [SERVICE_NAME], [querying], [context, dockerfile, args], [./src, ./services/querying/Dockerfile], [3002:3002], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [type, source, target], [bind, ./data, /data], [app1-network], [weaviate], [build, container_name, tty, ports, environment, volumes, networks, depends_on], [querying_service], [SERVICE_NAME], [auth], [context, args, dockerfile], [./src, ./services/auth/Dockerfile], [3003:3003], [CICD_ENV=${CICD_ENV}, CICD_GIT_BRANCH=${CICD_GIT_BRANCH}, CICD_AZURE_API_KEY=${CICD_AZURE_API_KEY}, CICD_CLIENT_SECRET=${CICD_CLIENT_SECRET}], [app1-network], [build, container_name, tty, ports, environment, networks], [auth_service], [8080:8080, 50051:50051], [app1-network], [type, source, target], [bind, /mnt/home/ubuntu/elevidys-rag/wv_data, /wv_data], [type, source, target], [bind, ./backups, /tmp/backups], [QUERY_DEFAULTS_LIMIT, AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED, PERSISTENCE_DATA_PATH, DEFAULT_VECTORIZER_MODULE, ENABLE_MODULES, BACKUP_S3_BUCKET, BACKUP_S3_PATH, BACKUP_FILESYSTEM_PATH, TRANSFORMERS_INFERENCE_API, CLUSTER_HOSTNAME], [true, /wv_data, text2vec-transformers, text2vec-transformers,backup-filesystem,backup-s3, weaviate-backup-dev-890742574233, wv_backups, /tmp/backups, http://t2v-transformers:8080, node1], [container_name, image, restart, ports, networks, volumes, environment], [weaviate, semitechnologies/weaviate, on-failure:0], [app1-network], [8081:8080], [ENABLE_CUDA], [container_name, image, networks, ports, environment], [t2v-transformers], [app1-network], [7777:7777], [WEAVIATE_URL], [http://weaviate:8080], [weaviate], [container_name, image, networks, ports, environment, depends_on], [weaviate_ui, naaive/weaviate-ui:latest], [pdf2task_service, task2json_service, tjson2text_service, embedding_service, querying_service, auth_service, weaviate, t2v-transformers, weaviate-ui], [networks, services], [unit_tests, build, deploy, end2end, jobs], [tags], [, ele-box-dev-890742574233, elevydis-mapping-890742574233, elevydis-qa-parsed, landing-dev-890742574233, single-processes-temp-elevydis, text-documents-dev-890742574233, text-documents-temp-123, text-documents-test-890742574233, textract-json-dev-890742574233, textract-json-test-890742574233, qanda-890742574233], [value, description, options], [, Source ingestion bucket], [, pdfs_for_textract, texttract_task_ids, texttract_json_to_parse, text_to_embed], [value, description, options], [, Target queue], [value, description], [, Target collection], [no, yes], [value, description, options], [no, Delete given collection if exists], [no, yes], [value, description, options], [yes, This will skip the tests], [SOURCE_INGESTION_BUCKET, TARGET_QUEUE, TARGET_COLLECTION, DELETE_COLLECTION_IF_EXISTS, SKIP_TESTS], [export CICD_AZURE_API_KEY="$AZURE_API_KEY", export CICD_CLIENT_SECRET="$CLIENT_SECRET", export CICD_GIT_BRANCH="$CI_COMMIT_BRANCH", export CICD_ENV="$CICD_ENV", export CICD_PORTKEY="$CICD_PORTKEY", export B_VERSION="$B_VERSION", export CICD_VPH_USER="$VPH_USER", export CICD_VPH_PW="$VPH_PW", export CICD_WEAVIATE_APIKEY="$WEAVIATE_APIKEY", export F_VERSION="$CI_COMMIT_BRANCH", export STORAGE_PREFIX=""], [run_unit_tests], [echo "Deploying to GitLab Pages"], [if, when], [$SOURCE_INGESTION_BUCKET != "", never], [if, when], [$SKIP_TESTS == "yes", never], [src/**/*], [changes], [when], [on_success], [public], [paths], [stage, needs, script, rules, artifacts], [unit_tests], [own-runner-machine-small], [if, when], [$SOURCE_INGESTION_BUCKET != "", never], [if, when], [$SKIP_TESTS == "yes", never], [src/**/*], [changes], [when], [on_success], [public/coverage_reports], [paths, when, expire_in], [always, 1 week], [pip install poetry, pip install pdm], [set -x], [stage, tags, image, rules, artifacts, before_script, script], [unit_tests, public.ecr.aws/docker/library/python:3.10-bookworm], [own-runner-machine-small], [pip install poetry, pip install pdm], [set -x], [stage, tags, image, when, before_script, script], [unit_tests, public.ecr.aws/docker/library/python:3.10-bookworm, manual], [gen10], [INSTANCE_ID="i-024d3683ba0f946f0"], [stage, environment, when, image, tags, script], [build, $CI_COMMIT_BRANCH, manual], [rm -rf /kaniko/cache, rm -rf /kaniko/.cache/*, rm -rf /cache/*, rm -rf /kaniko/fs/*, du -ha /kaniko | sort -rh | head -20, apt-get clean && rm -rf /var/lib/apt/lists/*, rm -rf /kaniko/.docker, mkdir -p /kaniko/.docker], [own-runner-machine-small], [if, when], [$SKIP_BUILD == "yes", never], [src/**/*, stack.yml], [changes], [if], [$SOURCE_INGESTION_BUCKET == ""], [NAME], [auth], [NAME], [embedding], [NAME], [querying], [NAME], [task2json], [NAME], [tjson2text], [NAME], [pdf2task], [NAME], [documents], [NAME], [agentic], [matrix], [], [name, entrypoint], [gcr.io/kaniko-project/executor:debug], [rm -rf /kaniko/cache, rm -rf /kaniko/.cache/*, rm -rf /cache/*, rm -rf /kaniko/fs/*, du -ha /kaniko | sort -rh | head -20, apt-get clean && rm -rf /var/lib/apt/lists/*, rm -rf /kaniko/.docker, mkdir -p /kaniko/.docker], [stage, environment, tags, rules, parallel, image, script], [build, $CI_COMMIT_BRANCH], [start-own-runner-machine], [own-runner-machine], [], [name, entrypoint], [gcr.io/kaniko-project/executor:debug], [rm -rf /kaniko/cache, rm -rf /kaniko/.cache/*, rm -rf /cache/*, rm -rf /kaniko/fs/*, du -ha /kaniko | sort -rh | head -20, apt-get clean && rm -rf /var/lib/apt/lists/*, rm -rf /kaniko/.docker, mkdir -p /kaniko/.docker], [export PYTHONUNBUFFERED=1, export NAME=projector-ui], [stage, needs, environment, tags, when, image, script], [build, $CI_COMMIT_BRANCH, manual], [apt-get update && apt-get install -y python3, echo $filename], [stage, when, image, script], [deploy, manual], [own-runner-machine-small], [test], [B_VERSION], [$CI_COMMIT_SHA], [image, tags, stage, environment, only, when, variables, script], [public.ecr.aws/docker/library/python:3.10-bookworm, deploy, $CI_COMMIT_BRANCH, manual], [own-runner-machine-small], [B_VERSION], [$CI_COMMIT_SHA], [image, tags, stage, environment, when, variables, script], [public.ecr.aws/docker/library/python:3.10-bookworm, deploy, dev, manual], [own-runner-machine-small], [cd src/tests, pip install pdm, pdm install, CICD_ENV=dev pdm run pytest -m "integration" -s], [stage, image, environment, tags, when, script], [end2end, public.ecr.aws/docker/library/python:3.10-bookworm, $CI_COMMIT_BRANCH, manual], [own-runner-machine-small], [image, stage, tags, when, script], [public.ecr.aws/docker/library/python:3.10-bookworm, jobs, manual], [own-runner-machine-small], [image, stage, tags, when, script], [public.ecr.aws/docker/library/python:3.10-bookworm, jobs, manual], [own-runner-machine-small], [image, stage, tags, when, script], [public.ecr.aws/docker/library/python:3.10-bookworm, jobs, manual], [own-runner-machine-small], [image, stage, tags, when, script], [public.ecr.aws/docker/library/python:3.10-bookworm, jobs, manual], [stages, default, variables, before_script, pages, run_unit_tests, run_integration_tests, start-own-runner-machine, .build_script, build-job, projector-build-job, .deployment, deploy_vectors-dev-to-test-curl, deploy-job-test, deploy-job-dev, end2end, run_ingestion, run_synchronization_execution, run_synchronization, run_questions], [mapping, bin, .claude, iac, src], [docker-compose, CHANGELOG, stack, README, build_and_run_local_wsl, .gitignore, docker-compose, .gitlab-ci], [section, authors, source, cmc_update, status, comments], [S.1, Simone, publication, No, ready for review, ], [section, authors, source, cmc_update, status, comments], [GMP, Marvin, NA, , ready for review, ], [section, authors, source, cmc_update, status, comments], [eAF, Simone, NA, , ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.2, Mark , Source Doc: REC-00527, Yes, ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.2, Jakob, Leverage SwissMedic IR-2 Q3, No, ready for review, Different option should be discussed], [section, authors, source, cmc_update, status, comments], [S.2.2, Mark, SwissMedic IR-1 Q12 & Q31 and IR-2 Q7, Yes, ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.2, Mark, Source Doc: 55-634-PQ-0005.IR, , clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Jelena, source documents – RPT-02188, RPT-01974, RPT-02181, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Lisa, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Lisa, , , , ], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Lisa, , , , ], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Lisa, source doc – SPC_00590_Version 11, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3_plasmid, Andrea/Lisa?, , yes, , ], [section, authors, source, cmc_update, status, comments], [S.2.3 , Andrea/Sandra, Source Doc: RPT-01941, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3 , Andrea/Sandra, S.2.3 from AR2024 and Saudi Q1b, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3, Jakob, SwissMedic IR-1 Q25 & Q27, No, clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.3, Andrea/Sandra, Saudi Q1a, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3, Andrea/Sandra, RPT-01943, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3, Andrea/Ulli, RPT-01915, , , ], [section, authors, source, cmc_update, status, comments], [S.2.3, Sarepta new, , , , ], [section, authors, source, cmc_update, status, comments], [S.2.3, Jakob, , No?, ready for review + clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.4, Mark, SwissMedic IR-1 Q12 & Q31 and IR-2 Q7, , clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.5, Sarepta/Mark/Jakob, No, ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.5, Mark/Jakob, yes, ready for review], [section, authors, source, cmc_update, status, comments], [S.2.5, Mark, Risk Assessment L/E, , ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.5, Simone, Editorial update, yes, ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.5, Mark, , ready for review, ], [section, authors, source, cmc_update, status, comments], [S.2.5, Mark/Jakob, , clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.5, Christine/NC, , clarification needed], [section, authors, source, cmc_update, status, comments], [S.2.6, Jakob, 634-DE-TSR_9001_21-088, yes?, ready for review], [section, authors, source, cmc_update, status, comments], [S.2.6, non-clinic/clinic, , , ], [section, authors, source, cmc_update, status, comments], [S.2.6, Lisa/Andrea?, , , ], [section, authors, source, cmc_update, status, comments], [S.2.6, Lisa/Andrea?, , , ], [section, authors, source, cmc_update, status, comments], [S.3.1, Marco/Josef, literature, , Ready for review, ], [section, authors, source, cmc_update, status, comments], [S.3.1, Marco, , Ready for review], [section, authors, source, cmc_update, status, comments], [S.3.1, Marco/non-clinic, , Ready for review], [section, authors, source, cmc_update, status, comments], [S.3.1, non-clinic, , , , ], [section, authors, source, cmc_update, status, comments], [S.3.1, Marco/non-clinic, Source documents: SR-21-025, , Was done by Sarepta], [section, authors, source, cmc_update, status, comments], [S.3.1, Non-clinic: iv/v/vi, , , , ], [section, authors, source, cmc_update, status, comments], [S.3.2, Marco, SwissMedic IR-1 Q55, , 1:1 use of IR-1 Q55, ready for review, SwissMedic IR-1 Q55 can be used], [section, authors, source, cmc_update, status, comments], [S.3.2, Mark, Leverage DS process E&L risk assessment Rpt-02079, , , ], [section, authors, source, cmc_update, status, comments], [S.3.2, Sarepta new, , , , ], [section, authors, source, cmc_update, status, comments], [S.4.1, Marco, ], [section, authors, source, cmc_update, status, comments], [S.4.1, Marco, FDA response, , FDA response used. Ready for review.], [section, authors, source, cmc_update, status, comments], [S.4.5, Sarepta/Marco/Simone, , yes, Sarepta to provide answer?], [section, authors, source, cmc_update, status, comments], [S.4.1/S.4.2/S.4.3/P.5.1/P.5.2/P.5.3, Sarepta/Marco/Simone, , yes, Ready for review, Sarepta will provide SOP numbers], [section, authors, source, cmc_update, status, comments], [S.4.2, Marco, Swissmedic responses round 2 (Q21), no, ], [section, authors, source, cmc_update, status, comments], [S.4.2, Marco, US CBE submission package, , Ready for review], [section, authors, source, cmc_update, status, comments], [S.4.2, Marco, Israel IR-1 Q10 and IR-2 Q3, , waiting for documents], [section, authors, source, cmc_update, status, comments], [S.4.2, Marco, , yes, Ready for review], [section, authors, source, cmc_update, status, comments], [S.4.2, Marco, , ], [section, authors, source, cmc_update, status, comments], [S.4.3, Marco, , , ], [section, authors, source, cmc_update, status, comments], [S.4.3, Marco, Israel IR-1 Q10 & IR-2 Q2, yes, ], [section, authors, source, cmc_update, status, comments], [S.4.4 , Marco, , yes, ], [section, authors, source, cmc_update, status, comments], [S.4.5, Marco, SwissMedic Rd1 Q65, , ], [section, authors, source, cmc_update, status, comments], [S.4.5, Marco/Yingmei, , , ], [section, authors, source, cmc_update, status, comments], [S.6, Mark, , , clarification needed, Details needed for size and wrap], [section, authors, source, cmc_update, status, comments], [S.7, Marco/Beate?, , yes, , Report will be ready first week of November], [section, authors, source, cmc_update, status, comments], [S.7, Marco/Beate?, to Saudi Q8 & Israel Q20, , ], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, RPT-01975, ], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, BUS-00112, P.2.3], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, P.2.3, See 116, ,117, What is the current status of US PMC#3? ], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, , ], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, , , Source doc does not cover full response needs], [section, authors, source, cmc_update, status, comments], [P.2, Philip/Marco, , , See 116, 113; Add data (Philip) , ], [section, authors, source, cmc_update, status, comments], [P.2.6 SmPC, Philip/Marco, , no], [section, authors, source, cmc_update, status, comments], [P.3.1, Marvin, , M1, , ], [section, authors, source, cmc_update, status, comments], [P.3.1, Simone, , P.3.1, , ], [section, authors, source, cmc_update, status, comments], [P.3.3, Philip/Marco, , P3.3, ], [section, authors, source, cmc_update, status, comments], [P.3.3/P.3.4, Philip/Marco, , P.3.3/P.3.4, Strategy? Push back based on what experience?], [section, authors, source, cmc_update, status, comments], [P.3.4, Philip/Marco, SwissMedic responses round 1, P.3.4, Please provide missing method info], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, , , ], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, , P.3.5, Statement missing], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, VAL-RPT-01762, no, Review of draft response. ], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, VAL-RPT-01767; -02132; -01777; -01770, P.3.5, Source documents screened, information missing. ], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, US PMC#3 report RPT-7334, ?], [section, authors, source, cmc_update, status, comments], [P.3.5, Marco/Richard/Max?, VAL-PLN-00419, , , ], [section, authors, source, cmc_update, status, comments], [P.3.5, Marco/Richard/Max?, , , , ], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, SwissMedic IR-1 Q74, ], [section, authors, source, cmc_update, status, comments], [P.3.5, Philip/Marco, Source Doc: SRP-9001.PQ.00.FR, , ], [section, authors, source, cmc_update, status, comments], [P.4, Marvin/Simone, , M1, , ], [section, authors, source, cmc_update, status, comments], [P.5.1, Simone/Marvin/Beate?, , P.5.1, , ], [section, authors, source, cmc_update, status, comments], [P.5.1, Marco/non-clinic, Saudi Q8); EMA Scientific Advise, no, ], [section, authors, source, cmc_update, status, comments], [P.5.1, Marco, Israel IR-1 Q11, , ], [section, authors, source, cmc_update, status, comments], [P.5.2, Sarepta/Marco (cmc update), , P.5.2, Sarepta takes this question, ], [section, authors, source, cmc_update, status, comments], [P.5.2, Sarepta/Marco (cmc update), , P.5.2, Sarepta takes this question, ], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , ], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , , , No documents on Box to address Q151a.], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , , ], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , , ], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , , ], [section, authors, source, cmc_update, status, comments], [P.5.3, Marco, , , Answer already provided by Sarepta?, ], [section, authors, source, cmc_update, status, comments], [P.5.4, Marco, , P.5.4, ], [section, authors, source, cmc_update, status, comments], [P.5.4, Philip/Marco M?, RPT-01975, No, Response drafted and ready for review., Review of response. ], [section, authors, source, cmc_update, status, comments], [P.5.4, Marco, , , , 3 CoAs on Box. Fine.], [section, authors, source, cmc_update, status, comments], [P.5.4/P.8.1/P.8.3, Marvin/Simone, SwissMedic IR-2 Q15, P.5.4/P.8.1/P.8.3, , ], [section, authors, source, cmc_update, status, comments], [P.5.6, Marco, SwissMedic IR-1 Q65, , ], [section, authors, source, cmc_update, status, comments], [P.6, Marco/Beate?, , P.6, Sarepta takes this question, P.6 Update by Roche, ], [section, authors, source, cmc_update, status, comments], [P.6, Marco, , ], [section, authors, source, cmc_update, status, comments], [P.7, Marco M., , , , ], [section, authors, source, cmc_update, status, comments], [P7/SmPC Section 6.5, Marco M/Marvin, , , , ], [section, authors, source, cmc_update, status, comments], [P.8.3, Marco/Beate?, Saudi Q21, , ], [section, authors, source, cmc_update, status, comments], [P.8.1/P.8.3, Marco/Beate?, , P.8.1/P.8.3, ], [section, authors, source, cmc_update, status, comments], [A.2, Jakob/Simone, , , clarification needed, ], [section, authors, source, cmc_update, status, comments], [A.2, Marco --> Elisabeth, SwissMedic IR-1 Q7 & Q86 and IR-2 Q21, , ], [section, authors, source, cmc_update, status, comments], [A.2, Marco, , , ], [section, authors, source, cmc_update, status, comments], [A.2, Josef/Iris?, Source Doc: 634_DS-TSR_ 9001_21_088, , , ], [section, authors, source, cmc_update, status, comments], [R, Simone, , , , ], [3, 4, 5, 8, 9, 10, 11, 15, 16, 17, 18, 20, 21, 24, 25, 26, 28, 29, 31, 32, 33, 34, 36, 37, 38, 39, 40, 42, 43, 46, 47, 48, 49, 50, 51, 55, 56, 57, 58, 59, 63, 65, 68, 69, 70, 71, 76, 78, 79, 80, 82, 84, 85, 87, 98, 99, 102, 105, 108, 111, 112, 113, 114, 116, 117, 118, 119, 120, 121, 125, 127, 128, 129, 133, 134, 136, 137, 138, 139, 140, 141, 142, 143, 145, 148, 149, 150, 151, 152, 153, 154, 155, 156, 159, 161, 162, 169, 171, 175, 178, 179, 181, 184, 188, 189, 190, 192, 194], [csv_json, ema, ema], [local_ingest, lambdas, rebuild, run_vph_sync, deploy], [Bash(find:*)], [allow, deny], [permissions], [settings.local], [**], [README.md, cdk*.json, requirements*.txt, source.bat, **/__init__.py, **/__pycache__, tests], [include, exclude], [account, region, vpc], [890742574233, eu-central-1, vpc-09c7d2916f48e8b3a], [account, region, vpc], [NO_FILLED_YET, eu-central-1, NO_FILLED_YET], [dev, prod], [aws, aws-cn], [environments, @aws-cdk/aws-lambda:recognizeLayerVersion, @aws-cdk/core:checkSecretUsage, @aws-cdk/core:target-partitions, @aws-cdk/aws-ec2:uniqueImdsv2TemplateName, @aws-cdk/aws-ecs:arnFormatIncludesClusterName, @aws-cdk/aws-iam:minimizePolicies, @aws-cdk/core:validateSnapshotRemovalPolicy, @aws-cdk/aws-s3:createDefaultLoggingPolicy, @aws-cdk/aws-apigateway:disableCloudWatchRole, @aws-cdk/core:enablePartitionLiterals, @aws-cdk/aws-events:eventsTargetQueueSameAccount, @aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy, @aws-cdk/aws-route53-patters:useCertificate, @aws-cdk/aws-rds:databaseProxyUniqueResourceName, @aws-cdk/aws-ec2:launchTemplateDefaultUserData, @aws-cdk/aws-redshift:columnId, @aws-cdk/aws-ec2:restrictDefaultSecurityGroup, @aws-cdk/aws-apigateway:requestValidatorUniqueId, @aws-cdk/aws-kms:aliasNameRef, @aws-cdk/core:includePrefixInUniqueNameGeneration, @aws-cdk/aws-efs:denyAnonymousAccess, @aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion, @aws-cdk/aws-codepipeline:defaultPipelineTypeToV2, @aws-cdk/aws-eks:nodegroupNameAttribute, @aws-cdk/aws-ec2:ebsDefaultGp3Volume, @aws-cdk/aws-ecs:removeDefaultDeploymentAlarm], [app, watch, context], [python3 app.py], [tests, iac, cloudformation], [requirements, read.38474.1, cdk, source, README, requirements-dev, .gitignore, app], [unit], [__init__], [__init__, test_iac_stack], [iac_stack, __init__, fargate_stack], [Protocol, Endpoint], [email, roman.krivtsov@contractors.roche.com], [TopicName, Subscription], [document-actions-topic], [Type, Properties], [AWS::SNS::Topic], [DocumentActionsTopic], [AWSTemplateFormatVersion, Resources], [2010-09-09], [Type, Default], [String, i-012f443724ebdabf8], [InstanceId], [Service], [lambda.amazonaws.com], [Effect, Principal, Action], [Allow, sts:AssumeRole], [Version, Statement], [2012-10-17], [ec2:StartInstances, ec2:StopInstances], [Effect, Action, Resource], [Allow, *], [Version, Statement], [2012-10-17], [PolicyName, PolicyDocument], [StartStopEC2], [logs:CreateLogGroup, logs:CreateLogStream, logs:PutLogEvents], [Effect, Action, Resource], [Allow, *], [Version, Statement], [2012-10-17], [PolicyName, PolicyDocument], [BasicLambdaLogs], [AssumeRolePolicyDocument, Policies], [Type, Properties], [AWS::IAM::Role], [ZipFile], [Runtime, Timeout, Handler, Role, Code], [python3.9, index.handler, unknownyamltag], [Type, Properties], [AWS::Lambda::Function], [Arn, Id, Input], [unknownyamltag, StopEC2At19Target, unknownyamltag], [ScheduleExpression, State, Targets], [cron(0 19 ? * MON-FRI *), ENABLED], [Type, Properties], [AWS::Events::Rule], [FunctionName, Action, Principal, SourceArn], [unknownyamltag, lambda:InvokeFunction, events.amazonaws.com, unknownyamltag], [Type, Properties], [AWS::Lambda::Permission], [Arn, Id, Input], [unknownyamltag, StartEC2At07Target, unknownyamltag], [ScheduleExpression, State, Targets], [cron(0 7 ? * MON-FRI *), ENABLED], [Type, Properties], [AWS::Events::Rule], [FunctionName, Action, Principal, SourceArn], [unknownyamltag, lambda:InvokeFunction, events.amazonaws.com, unknownyamltag], [Type, Properties], [AWS::Lambda::Permission], [Arn, Id, Input], [unknownyamltag, StopEC2Weekend07Target, unknownyamltag], [ScheduleExpression, State, Targets], [cron(0 7 ? * SAT,SUN *), ENABLED], [Type, Properties], [AWS::Events::Rule], [FunctionName, Action, Principal, SourceArn], [unknownyamltag, lambda:InvokeFunction, events.amazonaws.com, unknownyamltag], [Type, Properties], [AWS::Lambda::Permission], [StartStopLambdaRole, StartStopLambda, StopEC2At19Rule, LambdaPermissionForStopEC2At19, StartEC2At07Rule, LambdaPermissionForStartEC2At07, StopEC2Weekend07Rule, LambdaPermissionForStopEC2Weekend07], [Description, Value], [Lambda ARN for starting and stopping EC2 instances, unknownyamltag], [LambdaArn], [AWSTemplateFormatVersion, Description, Parameters, Resources, Outputs], [2010-09-09], [vph_sync, ec2_rules], [tests, libs, scripts, services], [end2end], [pdm, .pdm-python, pyproject], [mocks], [test_services], [end2end_mock], [tests, libs], [.pdm-python, pyproject, README], [__init__, test_lib], [config], [txtract, __init__, llm, common, wv], [type, description], [string], [type, description], [string, Required file key.], [type, description], [string, Collection name, required.], [type, description], [string, S3 bucket name, required.], [type, default, description], [integer], [type, default, description], [boolean], [type, default, description], [boolean], [type, default, description], [string, , Additional metadata included to ingestion process], [job_id, file_key, collection, bucket, chunk_size, include_meta_data, dont_proceed, ingestion_metadata], [file_key, collection, bucket], [$schema, type, properties, required, additionalProperties], [http://json-schema.org/draft-07/schema#, object], [sqs_message_schema, config], [ingestion, bin, selenium_vph, agentic_chunking, regressions, jupyter, qa_pipeline], [common_publisher, pdm, pdf_publisher, single_recognition, pyproject, split_publisher, create_and_ingest_metadata_collection, pdf_consumer, README, metadata_extractor, box_sync, backup_export, add_pdf_hash_to_collection, metadata_transfer, json_to_txt_consumer, json_to_txt, backup_import, Ingestion, windows_duplicates], [index], [s3_copy, libreoffice_converter], [vph_sync, pyproject, selenium_vph], [requirements, find_by_md5, populate-agentic-processing-queue], [show_questions_results, links, qs, build_results], [ingestion, general], [main], [search-textracted-text, get-md5sums-of-s3, bring-your-own-vectors-weaviate, vectorize], [populate_qa, parser], [metadata_extraction_job, auth, embedding, pdf2task, agentic, task2json, tjson2text, wa_manager, projector-ui, documents, querying], [docs], [pdm, server, Dockerfile, pyproject, README, utils, metadata_extraction], [images], [sqs_logs, log_case_2, log_case_3, log_case_1], [pdm, server, .pdm-python, index, Dockerfile, pyproject, VERSION], [tests], [elemb, pdm, embedding, Dockerfile, pyproject, limit_feed, consumer, stat_tests], [mocks], [test_token_limit, test_splitting], [large_table, sr_19_050_mini, 32s23_02_967_v01, sr_19_050, SRP-9001 Drug Product (9001003-0003) Manufacturing at Catale(1)], [pdm, .pdm-python, Dockerfile, pyproject, consumer], [agentic_chunking_controller, Dockerfile, agentic_chunker, pyproject, README], [pdm, .pdm-python, Dockerfile, pyproject, consumer], [pdm, .pdm-python, Dockerfile, pyproject, consumer], [pdm, .pdm-python, Dockerfile, pyproject, collections_counts, app], [embedding-projector-standalone], [requirements, Dockerfile, pyproject, projector-data-download-and-converter], [oss_data], [index, CNAME], [embeddings, modelCheckpointPath], [Demo datasets], [oss_demo_projector_config], [tests], [pdm, vph_sync, server, .pdm-python, Dockerfile, sync_exec, pyproject, metadata_extractor, run_tests, weaviate_helper, metadata_handler], [mocks], [test_metadata_extractor, test_get_metadata], [RPT-02079], [pdm, server, .pdm-python, index, Dockerfile, pyproject, README, VERSION, poetry]]
['embedding'] ['Deployment cloud']
['embedding'] ['VPH Sync', 'Statistical tests']
['embedding'] ['PDF Parsing', 'Splitting', 'Infrastructure', 'Ingestion']
['embedding'] ['1. `unit_tests`', '2. `build`', '3. `deploy`', '4. `end2end`', '4. `end2end`', '5. `run_synchronization`']
['embedding'] ['docker compose', 'Single services as python scripts', 'CI/CD Pipeline Overview and pipeline Stages', 'CI/CD Variables', 'Weaviate manager']
['embedding'] ['Tests', 'Notes']
['embedding'] ['IaC', 'Local usage', 'Statistical tests', 'Services']
['embedding'] ['curl -X POST http://localhost:3000/ -F "filename=/data/pdf/123.pdf"']
['embedding'] ['curl -X POST http://localhost:3001/ -F "filename=/data/pdf/123.txt"']
['embedding'] ['curl -X POST http://localhost:3002/ -H "Content-Type: application/json" -d \'{"query": "Who killed Kenny?"}\'']
['embedding'] ['http://localhost:3000 - PDF parsing.', 'http://localhost:3001 - Splitting', 'http://localhost:3002 - Backend']
['embedding'] ['2024-11-26 08:58:25 aws-emr-studio-890742574233-eu-central-1', '2024-11-16 15:35:01 cdk-hnb659fds-assets-890742574233-eu-central-1', '2024-11-20 21:05:57 cf-templates-1geagnhmafn7y-eu-central-1', '2024-11-20 21:50:40 dev.genaiele.roche.com - Bucket for s3 hosting (not used yet in favour of nginx on ec2)', '2024-11-20 22:02:04 ele-box-dev-890742574233 - Separated bucket for Box files. But actual consumption happens over common files bucket (__box folder)', '2024-11-20 22:02:12 elevydis-mapping-890742574233 - not used', '2024-11-20 22:02:12 elevydis-qa-parsed - text based Q&A storage', '2024-11-16 20:59:28 landing-dev-890742574233 - main source of raw files', '2024-11-25 15:41:40 sagemaker-eu-central-1-890742574233', '2024-11-25 15:41:40 sagemaker-studio-flyqyshykkb', '2024-11-20 23:33:36 single-processes-temp-elevydis - test bucket for isolated consumtion', '2024-11-20 23:52:17 text-documents-dev-890742574233 - parsed PDFs, text/md based', '2024-11-20 23:52:17 text-documents-temp-123 - test bucket', '2024-11-20 23:52:29 textract-json-dev-890742574233 - Textract parsed JSON data', '2024-11-20 23:52:29 textract-json-test-890742574233', '2024-11-21 00:04:03 weaviate-backup-dev-890742574233 - weaviate backups']
['embedding'] ['"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_dev", - markdown to split into chunks', '"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_dev", - textract jsons to parse', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_dev", - textract task ids to to consume jsons', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_test"']
['embedding'] ['batches processing (e.g over scripts/common_publisher.py)', 'VPH sync. Which is endpoint in the documents service, which can be triggered from CI/CD']
['embedding'] ['By total count. New total count should not be more or less than 10% from the previous state']
['embedding'] ['Runs tests in `src/services/*/tests` using `pytest`.', 'Triggered on source changes.', 'Skipped if `SKIP_TESTS` or `SOURCE_INGESTION_BUCKET` is set.']
['embedding'] ['Job: `run_unit_tests`', 'Purpose: Run unit tests (excluding integration tests) for each service.', 'Details:']
['embedding'] ['`start-own-runner-machine` – starts the powerful m5.8xlarge EC2 runner.', '`build-job` – builds all service containers (matrix build).', '`project-build-job` – manually builds `projector-ui` (m5.8xlarge runner should be used)']
['embedding'] ['Uses `kaniko` for container builds.', 'Tags images with branch or commit tag.', 'Images pushed to `890742574233.dkr.ecr.eu-central-1.amazonaws.com`.']
['embedding'] ['Jobs:', 'Purpose: Build Docker images and push to AWS ECR.', 'Details:']
['embedding'] ['`deploy-job-dev` – deploy to dev (manual).', '`deploy-job-test` – deploy to test (manual).', '`deploy_vectors-dev-to-test-curl` – backup/migrate Weaviate data.']
['embedding'] ['SSH to target host, pull branch, authenticate with AWS ECR.', 'Deploy with `docker stack deploy` using `stack.yml`.', 'Optional: Deploy `portainer`, clean Docker system.']
['embedding'] ['Jobs:', 'Purpose: Deploy services using Docker Swarm.', 'Details:']
['embedding'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['embedding'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['embedding'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['embedding'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['embedding'] ['TARGET_COLLECTION must be provided']
['embedding'] ['Job: `run_synchronization` (manual)', 'Purpose: Invokes the documents/vph_sync endpoint', 'Details:']
['embedding'] ['Unit tests: `pdm run pytest -s -m "not integration"` use only local system and mocked services', 'Integration tests `pdm run pytest -s -m "not integration"` can use external APIs like Galileo or Weaviate EC2', 'End2end ingestion tests `cd src/tests && pdm run pytest -m "integration" -s ` Emulates the ingestions process', 'End2end Selenium tests are automatically triggered in https://code.roche.com/gene-therapy/tools/genaiele/rag-for-elevidys-regression-tests/-/pipelines']
['embedding'] ['Default runner tag: `gen10`', 'Docker builds use Kaniko.', 'Docker images stored in AWS ECR under account `890742574233`.', 'Deployment via Docker Swarm using `docker stack deploy`.', 'Manual steps defined for deployment and ingestion for controlled execution.']
['embedding'] ['pdf2task', 'task2json', 'tjson2text', 'embedding']
['embedding'] ['ordered_list']
['embedding'] ['Deployment cloud']
['embedding'] ['VPH Sync', 'Statistical tests']
['embedding'] ['PDF Parsing', 'Splitting', 'Infrastructure', 'Ingestion']
['embedding'] ['1. `unit_tests`', '2. `build`', '3. `deploy`', '4. `end2end`', '4. `end2end`', '5. `run_synchronization`']
['embedding'] ['docker compose', 'Single services as python scripts', 'CI/CD Pipeline Overview and pipeline Stages', 'CI/CD Variables', 'Weaviate manager']
['embedding'] ['Tests', 'Notes']
['embedding'] ['IaC', 'Local usage', 'Statistical tests', 'Services']
['embedding'] ['curl -X POST http://localhost:3000/ -F "filename=/data/pdf/123.pdf"']
['embedding'] ['curl -X POST http://localhost:3001/ -F "filename=/data/pdf/123.txt"']
['embedding'] ['curl -X POST http://localhost:3002/ -H "Content-Type: application/json" -d \'{"query": "Who killed Kenny?"}\'']
['embedding'] ['http://localhost:3000 - PDF parsing.', 'http://localhost:3001 - Splitting', 'http://localhost:3002 - Backend']
['embedding'] ['2024-11-26 08:58:25 aws-emr-studio-890742574233-eu-central-1', '2024-11-16 15:35:01 cdk-hnb659fds-assets-890742574233-eu-central-1', '2024-11-20 21:05:57 cf-templates-1geagnhmafn7y-eu-central-1', '2024-11-20 21:50:40 dev.genaiele.roche.com - Bucket for s3 hosting (not used yet in favour of nginx on ec2)', '2024-11-20 22:02:04 ele-box-dev-890742574233 - Separated bucket for Box files. But actual consumption happens over common files bucket (__box folder)', '2024-11-20 22:02:12 elevydis-mapping-890742574233 - not used', '2024-11-20 22:02:12 elevydis-qa-parsed - text based Q&A storage', '2024-11-16 20:59:28 landing-dev-890742574233 - main source of raw files', '2024-11-25 15:41:40 sagemaker-eu-central-1-890742574233', '2024-11-25 15:41:40 sagemaker-studio-flyqyshykkb', '2024-11-20 23:33:36 single-processes-temp-elevydis - test bucket for isolated consumtion', '2024-11-20 23:52:17 text-documents-dev-890742574233 - parsed PDFs, text/md based', '2024-11-20 23:52:17 text-documents-temp-123 - test bucket', '2024-11-20 23:52:29 textract-json-dev-890742574233 - Textract parsed JSON data', '2024-11-20 23:52:29 textract-json-test-890742574233', '2024-11-21 00:04:03 weaviate-backup-dev-890742574233 - weaviate backups']
['embedding'] ['"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_dev", - markdown to split into chunks', '"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_dev", - textract jsons to parse', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_dev", - textract task ids to to consume jsons', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_test"']
['embedding'] ['batches processing (e.g over scripts/common_publisher.py)', 'VPH sync. Which is endpoint in the documents service, which can be triggered from CI/CD']
['embedding'] ['By total count. New total count should not be more or less than 10% from the previous state']
['embedding'] ['Runs tests in `src/services/*/tests` using `pytest`.', 'Triggered on source changes.', 'Skipped if `SKIP_TESTS` or `SOURCE_INGESTION_BUCKET` is set.']
['embedding'] ['Job: `run_unit_tests`', 'Purpose: Run unit tests (excluding integration tests) for each service.', 'Details:']
['embedding'] ['`start-own-runner-machine` – starts the powerful m5.8xlarge EC2 runner.', '`build-job` – builds all service containers (matrix build).', '`project-build-job` – manually builds `projector-ui` (m5.8xlarge runner should be used)']
['embedding'] ['Uses `kaniko` for container builds.', 'Tags images with branch or commit tag.', 'Images pushed to `890742574233.dkr.ecr.eu-central-1.amazonaws.com`.']
['embedding'] ['Jobs:', 'Purpose: Build Docker images and push to AWS ECR.', 'Details:']
['embedding'] ['`deploy-job-dev` – deploy to dev (manual).', '`deploy-job-test` – deploy to test (manual).', '`deploy_vectors-dev-to-test-curl` – backup/migrate Weaviate data.']
['embedding'] ['SSH to target host, pull branch, authenticate with AWS ECR.', 'Deploy with `docker stack deploy` using `stack.yml`.', 'Optional: Deploy `portainer`, clean Docker system.']
['embedding'] ['Jobs:', 'Purpose: Deploy services using Docker Swarm.', 'Details:']
['embedding'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['embedding'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['embedding'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['embedding'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['embedding'] ['TARGET_COLLECTION must be provided']
['embedding'] ['Job: `run_synchronization` (manual)', 'Purpose: Invokes the documents/vph_sync endpoint', 'Details:']
['embedding'] ['Unit tests: `pdm run pytest -s -m "not integration"` use only local system and mocked services', 'Integration tests `pdm run pytest -s -m "not integration"` can use external APIs like Galileo or Weaviate EC2', 'End2end ingestion tests `cd src/tests && pdm run pytest -m "integration" -s ` Emulates the ingestions process', 'End2end Selenium tests are automatically triggered in https://code.roche.com/gene-therapy/tools/genaiele/rag-for-elevidys-regression-tests/-/pipelines']
['embedding'] ['Default runner tag: `gen10`', 'Docker builds use Kaniko.', 'Docker images stored in AWS ECR under account `890742574233`.', 'Deployment via Docker Swarm using `docker stack deploy`.', 'Manual steps defined for deployment and ingestion for controlled execution.']
['embedding'] ['pdf2task', 'task2json', 'tjson2text', 'embedding']
['embedding'] ['ordered_list']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Deployment cloud']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['VPH Sync', 'Statistical tests']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['PDF Parsing', 'Splitting', 'Infrastructure', 'Ingestion']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['1. `unit_tests`', '2. `build`', '3. `deploy`', '4. `end2end`', '4. `end2end`', '5. `run_synchronization`']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['docker compose', 'Single services as python scripts', 'CI/CD Pipeline Overview and pipeline Stages', 'CI/CD Variables', 'Weaviate manager']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Tests', 'Notes']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['IaC', 'Local usage', 'Statistical tests', 'Services']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['curl -X POST http://localhost:3000/ -F "filename=/data/pdf/123.pdf"']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['curl -X POST http://localhost:3001/ -F "filename=/data/pdf/123.txt"']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['curl -X POST http://localhost:3002/ -H "Content-Type: application/json" -d \'{"query": "Who killed Kenny?"}\'']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['http://localhost:3000 - PDF parsing.', 'http://localhost:3001 - Splitting', 'http://localhost:3002 - Backend']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['2024-11-26 08:58:25 aws-emr-studio-890742574233-eu-central-1', '2024-11-16 15:35:01 cdk-hnb659fds-assets-890742574233-eu-central-1', '2024-11-20 21:05:57 cf-templates-1geagnhmafn7y-eu-central-1', '2024-11-20 21:50:40 dev.genaiele.roche.com - Bucket for s3 hosting (not used yet in favour of nginx on ec2)', '2024-11-20 22:02:04 ele-box-dev-890742574233 - Separated bucket for Box files. But actual consumption happens over common files bucket (__box folder)', '2024-11-20 22:02:12 elevydis-mapping-890742574233 - not used', '2024-11-20 22:02:12 elevydis-qa-parsed - text based Q&A storage', '2024-11-16 20:59:28 landing-dev-890742574233 - main source of raw files', '2024-11-25 15:41:40 sagemaker-eu-central-1-890742574233', '2024-11-25 15:41:40 sagemaker-studio-flyqyshykkb', '2024-11-20 23:33:36 single-processes-temp-elevydis - test bucket for isolated consumtion', '2024-11-20 23:52:17 text-documents-dev-890742574233 - parsed PDFs, text/md based', '2024-11-20 23:52:17 text-documents-temp-123 - test bucket', '2024-11-20 23:52:29 textract-json-dev-890742574233 - Textract parsed JSON data', '2024-11-20 23:52:29 textract-json-test-890742574233', '2024-11-21 00:04:03 weaviate-backup-dev-890742574233 - weaviate backups']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_dev", - markdown to split into chunks', '"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_dev", - textract jsons to parse', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_dev", - textract task ids to to consume jsons', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_test"']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['batches processing (e.g over scripts/common_publisher.py)', 'VPH sync. Which is endpoint in the documents service, which can be triggered from CI/CD']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['By total count. New total count should not be more or less than 10% from the previous state']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Runs tests in `src/services/*/tests` using `pytest`.', 'Triggered on source changes.', 'Skipped if `SKIP_TESTS` or `SOURCE_INGESTION_BUCKET` is set.']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Job: `run_unit_tests`', 'Purpose: Run unit tests (excluding integration tests) for each service.', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['`start-own-runner-machine` – starts the powerful m5.8xlarge EC2 runner.', '`build-job` – builds all service containers (matrix build).', '`project-build-job` – manually builds `projector-ui` (m5.8xlarge runner should be used)']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Uses `kaniko` for container builds.', 'Tags images with branch or commit tag.', 'Images pushed to `890742574233.dkr.ecr.eu-central-1.amazonaws.com`.']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Jobs:', 'Purpose: Build Docker images and push to AWS ECR.', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['`deploy-job-dev` – deploy to dev (manual).', '`deploy-job-test` – deploy to test (manual).', '`deploy_vectors-dev-to-test-curl` – backup/migrate Weaviate data.']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['SSH to target host, pull branch, authenticate with AWS ECR.', 'Deploy with `docker stack deploy` using `stack.yml`.', 'Optional: Deploy `portainer`, clean Docker system.']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Jobs:', 'Purpose: Deploy services using Docker Swarm.', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['TARGET_COLLECTION must be provided']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Job: `run_synchronization` (manual)', 'Purpose: Invokes the documents/vph_sync endpoint', 'Details:']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Unit tests: `pdm run pytest -s -m "not integration"` use only local system and mocked services', 'Integration tests `pdm run pytest -s -m "not integration"` can use external APIs like Galileo or Weaviate EC2', 'End2end ingestion tests `cd src/tests && pdm run pytest -m "integration" -s ` Emulates the ingestions process', 'End2end Selenium tests are automatically triggered in https://code.roche.com/gene-therapy/tools/genaiele/rag-for-elevidys-regression-tests/-/pipelines']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['Default runner tag: `gen10`', 'Docker builds use Kaniko.', 'Docker images stored in AWS ECR under account `890742574233`.', 'Deployment via Docker Swarm using `docker stack deploy`.', 'Manual steps defined for deployment and ingestion for controlled execution.']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['pdf2task', 'task2json', 'tjson2text', 'embedding']
['metadata_extraction_job', 'auth', 'embedding', 'pdf2task', 'agentic', 'task2json', 'tjson2text', 'wa_manager', 'projector-ui', 'documents', 'querying'] ['ordered_list']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Deployment cloud']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['VPH Sync', 'Statistical tests']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['PDF Parsing', 'Splitting', 'Infrastructure', 'Ingestion']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['1. `unit_tests`', '2. `build`', '3. `deploy`', '4. `end2end`', '4. `end2end`', '5. `run_synchronization`']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['docker compose', 'Single services as python scripts', 'CI/CD Pipeline Overview and pipeline Stages', 'CI/CD Variables', 'Weaviate manager']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Tests', 'Notes']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['IaC', 'Local usage', 'Statistical tests', 'Services']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['curl -X POST http://localhost:3000/ -F "filename=/data/pdf/123.pdf"']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['curl -X POST http://localhost:3001/ -F "filename=/data/pdf/123.txt"']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['curl -X POST http://localhost:3002/ -H "Content-Type: application/json" -d \'{"query": "Who killed Kenny?"}\'']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['http://localhost:3000 - PDF parsing.', 'http://localhost:3001 - Splitting', 'http://localhost:3002 - Backend']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['2024-11-26 08:58:25 aws-emr-studio-890742574233-eu-central-1', '2024-11-16 15:35:01 cdk-hnb659fds-assets-890742574233-eu-central-1', '2024-11-20 21:05:57 cf-templates-1geagnhmafn7y-eu-central-1', '2024-11-20 21:50:40 dev.genaiele.roche.com - Bucket for s3 hosting (not used yet in favour of nginx on ec2)', '2024-11-20 22:02:04 ele-box-dev-890742574233 - Separated bucket for Box files. But actual consumption happens over common files bucket (__box folder)', '2024-11-20 22:02:12 elevydis-mapping-890742574233 - not used', '2024-11-20 22:02:12 elevydis-qa-parsed - text based Q&A storage', '2024-11-16 20:59:28 landing-dev-890742574233 - main source of raw files', '2024-11-25 15:41:40 sagemaker-eu-central-1-890742574233', '2024-11-25 15:41:40 sagemaker-studio-flyqyshykkb', '2024-11-20 23:33:36 single-processes-temp-elevydis - test bucket for isolated consumtion', '2024-11-20 23:52:17 text-documents-dev-890742574233 - parsed PDFs, text/md based', '2024-11-20 23:52:17 text-documents-temp-123 - test bucket', '2024-11-20 23:52:29 textract-json-dev-890742574233 - Textract parsed JSON data', '2024-11-20 23:52:29 textract-json-test-890742574233', '2024-11-21 00:04:03 weaviate-backup-dev-890742574233 - weaviate backups']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_dev", - markdown to split into chunks', '"https://sqs.eu-central-1.amazonaws.com/890742574233/text_to_embed_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_dev", - textract jsons to parse', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_json_to_parse_test",', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_dev", - textract task ids to to consume jsons', '"https://sqs.eu-central-1.amazonaws.com/890742574233/texttract_task_ids_test"']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['batches processing (e.g over scripts/common_publisher.py)', 'VPH sync. Which is endpoint in the documents service, which can be triggered from CI/CD']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['By total count. New total count should not be more or less than 10% from the previous state']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Runs tests in `src/services/*/tests` using `pytest`.', 'Triggered on source changes.', 'Skipped if `SKIP_TESTS` or `SOURCE_INGESTION_BUCKET` is set.']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Job: `run_unit_tests`', 'Purpose: Run unit tests (excluding integration tests) for each service.', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['`start-own-runner-machine` – starts the powerful m5.8xlarge EC2 runner.', '`build-job` – builds all service containers (matrix build).', '`project-build-job` – manually builds `projector-ui` (m5.8xlarge runner should be used)']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Uses `kaniko` for container builds.', 'Tags images with branch or commit tag.', 'Images pushed to `890742574233.dkr.ecr.eu-central-1.amazonaws.com`.']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Jobs:', 'Purpose: Build Docker images and push to AWS ECR.', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['`deploy-job-dev` – deploy to dev (manual).', '`deploy-job-test` – deploy to test (manual).', '`deploy_vectors-dev-to-test-curl` – backup/migrate Weaviate data.']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['SSH to target host, pull branch, authenticate with AWS ECR.', 'Deploy with `docker stack deploy` using `stack.yml`.', 'Optional: Deploy `portainer`, clean Docker system.']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Jobs:', 'Purpose: Deploy services using Docker Swarm.', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Runs `pytest` with marker `integration` in the folder src/tests/end2end']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Job: `end2end` (manual)', 'Purpose: Run end-to-end integration tests.', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['TARGET_COLLECTION must be provided']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Job: `run_synchronization` (manual)', 'Purpose: Invokes the documents/vph_sync endpoint', 'Details:']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Unit tests: `pdm run pytest -s -m "not integration"` use only local system and mocked services', 'Integration tests `pdm run pytest -s -m "not integration"` can use external APIs like Galileo or Weaviate EC2', 'End2end ingestion tests `cd src/tests && pdm run pytest -m "integration" -s ` Emulates the ingestions process', 'End2end Selenium tests are automatically triggered in https://code.roche.com/gene-therapy/tools/genaiele/rag-for-elevidys-regression-tests/-/pipelines']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['Default runner tag: `gen10`', 'Docker builds use Kaniko.', 'Docker images stored in AWS ECR under account `890742574233`.', 'Deployment via Docker Swarm using `docker stack deploy`.', 'Manual steps defined for deployment and ingestion for controlled execution.']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['pdf2task', 'task2json', 'tjson2text', 'embedding']
['elemb', 'pdm', 'embedding', 'Dockerfile', 'pyproject', 'limit_feed', 'consumer', 'stat_tests'] ['ordered_list']

