Skip to content

Api reference

components.compliance.public.commands

alert_unreviewed_gdpr_deletion_batches

alert_unreviewed_gdpr_deletion_batches

alert_unreviewed_gdpr_deletion_batches()

Send Slack alerts for unreviewed GDPR deletion batches per bucket type.

Source code in components/compliance/public/commands/alert_unreviewed_gdpr_deletion_batches.py
@compliance_commands.command()
def alert_unreviewed_gdpr_deletion_batches() -> None:
    """Send Slack alerts for unreviewed GDPR deletion batches per bucket type."""
    today = date.today().day

    for bucket_type in ComplianceDataBucketType:
        config = get_notification_config_for_bucket(bucket_type)

        if config.ops_channel is None:
            current_logger.info(
                "skipping bucket: no ops_channel configured",
                bucket_type=bucket_type.value,
            )
            continue

        if today not in config.ops_alert_days_of_month:
            current_logger.info(
                "skipping bucket: today not in alert days",
                bucket_type=bucket_type.value,
                today=today,
                alert_days=config.ops_alert_days_of_month,
            )
            continue

        batches, total_count = get_gdpr_deletion_batches(
            reviewed_status=GdprDeletionBatchStatus.pending,
            bucket_type=bucket_type,
            limit=10000,
            offset=0,
        )

        if total_count == 0:
            current_logger.info(
                "skipping bucket: no pending batches",
                bucket_type=bucket_type.value,
            )
            continue

        total_records = sum(batch.records_count_for_list for batch in batches)

        channel = (
            config.ops_channel if is_production_mode() else SlackChannel.test.value
        )

        text = (
            f"📋 *GDPR Compliance — {bucket_type.value}*\n"
            f"{total_count} batch(es) pending review with {total_records} record(s) total.\n"
            f"<{current_config['GLOBAL_MARMOT_BASE_URL']}/fr/gdpr-compliance|Review in Marmot>"
        )
        if config.ops_handle is not None:
            text += f"\ncc {config.ops_handle}"

        current_logger.info(
            "posting unreviewed batches alert",
            bucket_type=bucket_type.value,
            pending_batches=total_count,
            pending_records=total_records,
            channel=channel,
        )

        try:
            current_app.slack_web_client.chat_postMessage(  # type: ignore[attr-defined]
                channel=channel,
                text=text,
                blocks=[SectionBlock(text=text)],
            )
        except SlackApiError:
            current_logger.exception(
                "failed to post unreviewed batches alert to Slack",
                bucket_type=bucket_type.value,
                channel=channel,
            )

create_gdpr_deletion_batch_for_data_bucket

DEFAULT_GDPR_BATCH_SIZE module-attribute

DEFAULT_GDPR_BATCH_SIZE = 200

create_gdpr_deletion_batch_for_data_bucket

create_gdpr_deletion_batch_for_data_bucket(
    bucket_type, max_batch_size, dry_run=False
)

Create GDPR deletion batches for a given data bucket, chunked by max-batch-size.

Source code in components/compliance/public/commands/create_gdpr_deletion_batch_for_data_bucket.py
@compliance_commands.command()
@command_with_dry_run
@click.option(
    "--bucket-type",
    type=click.Choice(ComplianceDataBucketType.get_values()),
    required=True,
    help="Type of data bucket for GDPR deletion batch",
)
@click.option(
    "--max-batch-size",
    type=int,
    default=DEFAULT_GDPR_BATCH_SIZE,
    show_default=True,
    help="Maximum number of records per batch",
)
def create_gdpr_deletion_batch_for_data_bucket(
    bucket_type: str, max_batch_size: int, dry_run: bool = False
) -> None:
    """Create GDPR deletion batches for a given data bucket, chunked by max-batch-size."""
    bucket_type_enum = ComplianceDataBucketType(bucket_type)

    record_ids = get_gdpr_deletion_record_ids_to_batch_review(
        bucket_type=bucket_type_enum
    )
    if not record_ids:
        current_logger.info(
            "no unbatched deletion records found",
            bucket_type=bucket_type_enum.value,
        )
        return

    current_logger.info(
        "unbatched deletion records found",
        bucket_type=bucket_type_enum.value,
        count=len(record_ids),
    )

    chunks = [
        record_ids[i : i + max_batch_size]
        for i in range(0, len(record_ids), max_batch_size)
    ]

    for chunk_index, chunk in enumerate(chunks):
        batch = create_gdpr_deletion_batch(
            bucket_type=bucket_type_enum,
            commit=not dry_run,
        )

        assigned = bulk_assign_deletion_records_to_batch(
            record_ids=chunk,
            deletion_batch_id=batch.id,
            commit=not dry_run,
        )

        current_logger.info(
            "created gdpr deletion batch",
            batch_id=str(batch.id),
            batch_number=chunk_index + 1,
            total_batches=len(chunks),
            records_assigned=assigned,
            bucket_type=bucket_type_enum.value,
        )

create_gdpr_deletion_records_for_data_bucket

DEFAULT_CHUNK_SIZE module-attribute

DEFAULT_CHUNK_SIZE = 5000

create_gdpr_deletion_records_for_data_bucket

create_gdpr_deletion_records_for_data_bucket(
    bucket_type, chunk_size, dry_run=False
)

Create GDPR deletion records for a given bucket type using bulk operations.

Source code in components/compliance/public/commands/create_gdpr_deletion_records_for_data_bucket.py
@compliance_commands.command()
@command_with_dry_run
@click.option(
    "--bucket-type",
    type=click.Choice(ComplianceDataBucketType.get_values()),
    required=True,
    help="Type of data bucket for GDPR deletion records",
)
@click.option(
    "--chunk-size",
    type=int,
    default=DEFAULT_CHUNK_SIZE,
    show_default=True,
    help="Number of profiles to process per chunk",
)
def create_gdpr_deletion_records_for_data_bucket(
    bucket_type: str, chunk_size: int, dry_run: bool = False
) -> None:
    """Create GDPR deletion records for a given bucket type using bulk operations."""
    bucket_type_enum = ComplianceDataBucketType(bucket_type)

    methods_to_find_profiles_to_delete = (
        get_callable_rules_to_get_profiles_to_record_deletion(
            bucket_type=bucket_type_enum
        )
    )

    profiles_to_delete_for_each_method = [
        method() for method in methods_to_find_profiles_to_delete
    ]

    profiles_targeted_for_deletion: list[uuid.UUID] = []
    if profiles_to_delete_for_each_method:
        profiles_targeted_for_deletion = list(
            set.intersection(*map(set, profiles_to_delete_for_each_method))
        )

    total = len(profiles_targeted_for_deletion)
    current_logger.info(
        "profiles targeted for deletion",
        count=total,
        bucket_type=bucket_type_enum.value,
    )

    if not profiles_targeted_for_deletion:
        click.echo("No profiles targeted for deletion.")
        return

    start = time.monotonic()

    created, skipped = bulk_create_gdpr_deletion_records(
        global_profile_ids=profiles_targeted_for_deletion,
        bucket_type=bucket_type_enum,
        chunk_size=chunk_size,
        commit=not dry_run,
    )

    elapsed = time.monotonic() - start

    current_logger.info(
        "gdpr deletion records creation completed",
        bucket_type=bucket_type_enum.value,
        total=total,
        created=created,
        skipped=skipped,
        elapsed_seconds=round(elapsed, 2),
    )

    if dry_run:
        click.secho(
            f"\n[DRY RUN] Would create {created} records, skip {skipped} duplicates "
            f"from {total} profiles.",
            fg="yellow",
            bold=True,
        )
    else:
        click.secho(
            f"\nCreated {created} records, skipped {skipped} duplicates "
            f"from {total} profiles. Took {elapsed:.1f}s.",
            fg="green",
            bold=True,
        )

purge_gdpr_compliance_data

DEFAULT_CHUNK_SIZE module-attribute

DEFAULT_CHUNK_SIZE = 5000

purge_gdpr_compliance_data_command

purge_gdpr_compliance_data_command(
    bucket_type, chunk_size, dry_run=False
)

Purge all GDPR deletion records, batches, and orphan profiles for a bucket type.

Source code in components/compliance/public/commands/purge_gdpr_compliance_data.py
@compliance_commands.command()
@command_with_dry_run
@click.option(
    "--bucket-type",
    type=click.Choice(ComplianceDataBucketType.get_values()),
    required=True,
    help="Bucket type to purge all compliance data for",
)
@click.option(
    "--chunk-size",
    type=int,
    default=DEFAULT_CHUNK_SIZE,
    show_default=True,
    help="Number of rows to delete per chunk",
)
def purge_gdpr_compliance_data_command(
    bucket_type: str, chunk_size: int, dry_run: bool = False
) -> None:
    """Purge all GDPR deletion records, batches, and orphan profiles for a bucket type."""
    bucket_type_enum = ComplianceDataBucketType(bucket_type)
    start = time.monotonic()

    result = purge_gdpr_compliance_data(
        bucket_type=bucket_type_enum,
        chunk_size=chunk_size,
        dry_run=dry_run,
    )

    elapsed = time.monotonic() - start

    if dry_run:
        click.secho(
            f"\n[DRY RUN] Would purge for bucket_type={bucket_type}:",
            fg="yellow",
            bold=True,
        )
        click.echo(f"  Records:  {result['records']}")
        click.echo(f"  Batches:  {result['batches']}")
        click.echo(f"  Profiles: {result['profiles']} (orphans only)")
        click.echo("  Re-run with --execute to perform the purge.\n")
    else:
        click.secho(
            f"\nPurged for bucket_type={bucket_type}:",
            fg="green",
            bold=True,
        )
        click.echo(f"  Records:  {result['records']}")
        click.echo(f"  Batches:  {result['batches']}")
        click.echo(f"  Profiles: {result['profiles']} (orphans)")
        click.echo(f"  Took {elapsed:.1f}s\n")

    current_logger.info(
        "purge complete",
        bucket_type=bucket_type,
        records=result["records"],
        batches=result["batches"],
        profiles=result["profiles"],
        elapsed_seconds=round(elapsed, 2),
        dry_run=dry_run,
    )

components.compliance.public.entities

ComplianceProfile dataclass

ComplianceProfile(
    id,
    global_profile_id,
    created_at,
    updated_at,
    global_user_id=None,
)

Bases: DataClassJsonMixin

Entity representing a compliance profile model

created_at instance-attribute

created_at

global_profile_id instance-attribute

global_profile_id

global_user_id class-attribute instance-attribute

global_user_id = None

id instance-attribute

id

updated_at instance-attribute

updated_at

GdprDeletionBatch dataclass

GdprDeletionBatch(
    id,
    bucket_type,
    deletion_records,
    reviewed_status,
    reviewed_at,
    reviewed_by,
    reviewed_reason,
    created_at,
    updated_at,
    records_count_for_list=0,
    failed_records_count=0,
    manually_reviewed_count=0,
    rejected_records_count=0,
    total_records_count=0,
)

Bases: DataClassJsonMixin

Entity representing a GDPR deletion batch model

bucket_type instance-attribute

bucket_type

created_at instance-attribute

created_at

deletion_records instance-attribute

deletion_records

failed_records_count class-attribute instance-attribute

failed_records_count = 0

id instance-attribute

id

manually_reviewed_count class-attribute instance-attribute

manually_reviewed_count = 0

records_count_for_list class-attribute instance-attribute

records_count_for_list = 0

rejected_records_count class-attribute instance-attribute

rejected_records_count = 0

reviewed_at instance-attribute

reviewed_at

reviewed_by instance-attribute

reviewed_by

reviewed_reason instance-attribute

reviewed_reason

reviewed_status instance-attribute

reviewed_status

total_records_count class-attribute instance-attribute

total_records_count = 0

updated_at instance-attribute

updated_at

GdprDeletionBatchProgress dataclass

GdprDeletionBatchProgress(
    total_accepted, completed, failed, pending
)

Bases: DataClassJsonMixin

Progress stats for a GDPR deletion batch.

completed instance-attribute

completed

failed instance-attribute

failed

pending instance-attribute

pending

total_accepted instance-attribute

total_accepted

GdprDeletionRecord dataclass

GdprDeletionRecord(
    id,
    compliance_profile_id,
    compliance_profile,
    bucket_type,
    deletion_batch_id,
    deletion_scheduled_at,
    deletion_applied_at,
    created_at,
    updated_at,
    accepted_for_deletion=None,
    is_manually_reviewed=None,
    deletion_error=None,
)

Bases: DataClassJsonMixin

Entity representing a GDPR deletion record model

accepted_for_deletion class-attribute instance-attribute

accepted_for_deletion = None

bucket_type instance-attribute

bucket_type

compliance_profile instance-attribute

compliance_profile

compliance_profile_id instance-attribute

compliance_profile_id

created_at instance-attribute

created_at

deletion_applied_at instance-attribute

deletion_applied_at

deletion_batch_id instance-attribute

deletion_batch_id

deletion_error class-attribute instance-attribute

deletion_error = None

deletion_scheduled_at instance-attribute

deletion_scheduled_at

id instance-attribute

id

is_manually_reviewed class-attribute instance-attribute

is_manually_reviewed = None

updated_at instance-attribute

updated_at

components.compliance.public.enums

ComplianceDataBucketType

Bases: AlanBaseEnum

Type of data bucket to be considered for GDPR compliance deletion process

health_claims class-attribute instance-attribute

health_claims = 'health_claims'

health_services class-attribute instance-attribute

health_services = 'health_services'

medical_data class-attribute instance-attribute

medical_data = 'medical_data'

prevoyance_claims class-attribute instance-attribute

prevoyance_claims = 'prevoyance_claims'

GdprDeletionBatchStatus

Bases: AlanBaseEnum

Status of a GDPR compliance batch

accepted class-attribute instance-attribute

accepted = 'accepted'

pending class-attribute instance-attribute

pending = 'pending'

rejected class-attribute instance-attribute

rejected = 'rejected'

components.compliance.public.gdpr_access

Helpers for running Metabase saved questions and uploading files for GDPR access requests.

run_metabase_questions_for_user

run_metabase_questions_for_user(
    user_id,
    questions,
    output_dir,
    file_prefix,
    timestamp=None,
)

Run a sequence of Metabase questions parameterized with user_id, saving each as CSV.

Output filenames follow the pattern

{file_prefix}{short_name}.csv}_{iso8601_timestamp

Pass an explicit timestamp to share it across multiple export functions in one batch. If omitted, a timestamp is generated at call time.

Returns:

Type Description
list[str]

List of written file paths.

Source code in components/compliance/public/gdpr_access.py
def run_metabase_questions_for_user(
    user_id: int,
    questions: list[MetabaseQuestion],
    output_dir: str,
    file_prefix: str,
    timestamp: str | None = None,
) -> list[str]:
    """Run a sequence of Metabase questions parameterized with user_id, saving each as CSV.

    Output filenames follow the pattern:
        {file_prefix}_{short_name}_{user_id}_{iso8601_timestamp}.csv

    Pass an explicit timestamp to share it across multiple export functions in one batch.
    If omitted, a timestamp is generated at call time.

    Returns:
        List of written file paths.
    """
    timestamp = timestamp or datetime.now(tz=UTC).strftime("%Y%m%dT%H%M%SZ")
    auth = MetabaseAuth(api_type=MetabaseApiType.INTERNAL)
    written: list[str] = []

    for question in questions:
        url, data = build_api_request_url_and_body(
            question.question_id,
            {question.param_name: [str(user_id)]},
            output_format="csv",
        )
        response = requests.post(url=url, data=data, auth=auth, timeout=80)
        response.raise_for_status()

        filename = f"{file_prefix}_{question.short_name}_{user_id}_{timestamp}.csv"
        path = os.path.join(output_dir, filename)
        with open(path, "wb") as f:
            f.write(response.content)
        current_logger.info(
            f"Wrote {question.short_name}",
            user_id=user_id,
            question=question.short_name,
            path=path,
        )
        written.append(path)

    return written

upload_files_to_gdpr_folder

upload_files_to_gdpr_folder(
    user_id, app_name, file_paths, client
)

Upload GDPR access files to a new Dataroom folder.

Creates folder 'GDPR access - {app_name} - {user_id}' inside the GDPR root folder (ID 2041) and uploads all files to it.

Returns:

Type Description
DataroomFileData

DataroomFileData for the created folder.

Source code in components/compliance/public/gdpr_access.py
def upload_files_to_gdpr_folder(
    user_id: int,
    app_name: str,
    file_paths: list[str],
    client: DataroomFileClient,
) -> DataroomFileData:
    """Upload GDPR access files to a new Dataroom folder.

    Creates folder 'GDPR access - {app_name} - {user_id}' inside the GDPR
    root folder (ID 2041) and uploads all files to it.

    Returns:
        DataroomFileData for the created folder.
    """
    root_folder = client.find_by_id(_GDPR_ROOT_FOLDER_ID)
    if root_folder is None:
        raise RuntimeError(
            f"GDPR root folder (ID {_GDPR_ROOT_FOLDER_ID}) not found in Dataroom"
        )

    folder_name = f"GDPR access - {app_name} - {user_id}"
    folder_path = f"{root_folder.path}/{folder_name}"
    folder = client.create_folder(folder_path)

    for file_path in file_paths:
        filename = os.path.basename(file_path)
        with open(file_path, "rb") as f:
            client.upload_file(path=f"{folder_path}/{filename}", content=f)

    return folder