diff --git a/.github/workflows/deploy-account-wide-infra.yml b/.github/workflows/deploy-account-wide-infra.yml index 564a8e1d1..d81fe0d80 100644 --- a/.github/workflows/deploy-account-wide-infra.yml +++ b/.github/workflows/deploy-account-wide-infra.yml @@ -51,6 +51,14 @@ jobs: echo "${HOME}/.asdf/bin" >> $GITHUB_PATH poetry install --no-root + - name: Build Lambda Layers + run: | + make build-layers + make build-dependency-layer + + - name: Build Seed Sandbox Lambda + run: make build-seed-sandbox-lambda + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: @@ -58,6 +66,12 @@ jobs: role-to-assume: ${{ secrets.MGMT_ROLE_ARN }} role-session-name: github-actions-ci-${{ inputs.environment }}-${{ github.run_id }} + - name: Add S3 Permissions to Lambda Layer + env: + ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} + run: | + make get-s3-perms ENV=${ACCOUNT_NAME} + - name: Retrieve Server Certificates env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} @@ -109,6 +123,14 @@ jobs: echo "${HOME}/.asdf/bin" >> $GITHUB_PATH poetry install --no-root + - name: Build Lambda Layers + run: | + make build-layers + make build-dependency-layer + + - name: Build Seed Sandbox Lambda + run: make build-seed-sandbox-lambda + - name: Configure Management Credentials uses: aws-actions/configure-aws-credentials@7474bc4690e29a8392af63c5b98e7449536d5c3a #v4.3.1 with: @@ -116,6 +138,12 @@ jobs: role-to-assume: ${{ secrets.MGMT_ROLE_ARN }} role-session-name: github-actions-ci-${{ inputs.environment }}-${{ github.run_id}} + - name: Add S3 Permissions to Lambda Layer + env: + ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} + run: | + make get-s3-perms ENV=${ACCOUNT_NAME} + - name: Download Terraform Plan Artifacts env: ACCOUNT_NAME: ${{ vars.ACCOUNT_NAME }} diff --git a/Makefile b/Makefile index 394150e3a..34da8fb02 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,11 @@ check-deploy: ## check the deploy environment is setup correctly check-deploy-warn: @SHOULD_WARN_ONLY=true ./scripts/check-deploy-environment.sh -build: check-warn build-api-packages build-layers build-dependency-layer ## Build the project +build: check-warn build-api-packages build-layers build-dependency-layer build-seed-sandbox-lambda ## Build the project + +build-seed-sandbox-lambda: + @echo "Building seed_sandbox Lambda" + @cd lambdas/seed_sandbox && make build build-dependency-layer: @echo "Building Lambda dependency layer" diff --git a/README.md b/README.md index 9dd572102..b4bf5c83d 100644 --- a/README.md +++ b/README.md @@ -375,8 +375,9 @@ In order to deploy to a sandbox environment (`dev-sandbox`, `qa-sandbox`, `int-s ### Sandbox database clear and reseed -Any workspace suffixed with `-sandbox` has a small amount of additional infrastructure deployed to clear and reseed the DynamoDB tables (auth and document pointers) using a Lambda running -on a cron schedule that can be found in the `cron/seed_sandbox` directory in the root of this project. The data used to seed the DynamoDB tables can found in the `cron/seed_sandbox/data` directory. + + ### Sandbox authorisation diff --git a/lambdas/seed_sandbox/Makefile b/lambdas/seed_sandbox/Makefile new file mode 100644 index 000000000..a9badf0d4 --- /dev/null +++ b/lambdas/seed_sandbox/Makefile @@ -0,0 +1,28 @@ +.PHONY: build clean + +build: clean + @echo "Building Lambda deployment package..." + mkdir -p dist + + # Copy the handler + cp index.py dist/ + + # Copy the required scripts + mkdir -p dist/scripts + cp ../../scripts/delete_all_table_items.py dist/scripts/ + cp ../../scripts/seed_sandbox_table.py dist/scripts/ + cp ../../scripts/seed_utils.py dist/scripts/ + + # Copy the pointer template data + mkdir -p dist/tests/data/samples + cp -r ../../tests/data/samples/*.json dist/tests/data/samples/ + + # Create the zip file + cd dist && zip -r seed_sandbox.zip . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store" + + @echo "✓ Lambda package created: dist/seed_sandbox.zip" + +clean: + @echo "Cleaning build artifacts..." + rm -rf dist + @echo "✓ Clean complete" diff --git a/lambdas/seed_sandbox/index.py b/lambdas/seed_sandbox/index.py new file mode 100644 index 000000000..02946da06 --- /dev/null +++ b/lambdas/seed_sandbox/index.py @@ -0,0 +1,122 @@ +""" +Lambda handler for resetting specified DynamoDB tables with seed test data. + +This Lambda function runs on a schedule to clear and reseed specified +pointers tables with fresh test data. +""" + +# flake8: noqa: T201 + +import json +import os + +from scripts.delete_all_table_items import delete_all_table_items +from scripts.seed_sandbox_table import seed_sandbox_table + + +def handler(event, context): + """ + Lambda handler that orchestrates the reset of specified tables + + The tables to be reset are specified by the TABLE_NAMES environment variable + as a comma-separated list + + Args: + event: Lambda event (from EventBridge schedule) + context: Lambda context + + Returns: + dict: Response with status and details for each table + """ + table_names_str = os.environ.get("TABLE_NAMES", "") + pointers_per_type = int(os.environ.get("POINTERS_PER_TYPE", "2")) + + if not table_names_str: + error_msg = "TABLE_NAMES environment variable is required" + print(f"ERROR: {error_msg}") + return {"statusCode": 500, "body": json.dumps({"error": error_msg})} + + table_names = [name.strip() for name in table_names_str.split(",") if name.strip()] + + if not table_names: + error_msg = "No valid table names provided in TABLE_NAMES" + print(f"ERROR: {error_msg}") + return {"statusCode": 500, "body": json.dumps({"error": error_msg})} + + print( + f"Starting table reset for {len(table_names)} table(s): {', '.join(table_names)}" + ) + print(f"Pointers per type: {pointers_per_type}") + + results = [] + failed_tables = [] + + for table_name in table_names: + print(f"\n{'='*60}") + print(f"Processing table: {table_name}") + print(f"{'='*60}") + + try: + print("Step 1: Deleting all items from table...") + pointers_deleted_count = delete_all_table_items(table_name=table_name) + print(f"✓ Deleted {pointers_deleted_count} items") + + print("Step 2: Seeding table with fresh data...") + seed_result = seed_sandbox_table( + table_name=table_name, + pointers_per_type=pointers_per_type, + force=True, + write_csv=False, + ) + print(f"✓ Created {seed_result['successful']} pointers") + + results.append( + { + "table_name": table_name, + "status": "success", + "pointers_deleted": pointers_deleted_count, + "pointers_created": seed_result["successful"], + "pointers_attempted": seed_result["attempted"], + "pointers_failed": seed_result["failed"], + } + ) + + except Exception as e: + error_msg = f"Failed to reset table {table_name}: {str(e)}" + print(f"ERROR: {error_msg}") + failed_tables.append(table_name) + results.append( + { + "table_name": table_name, + "status": "failed", + "error": str(e), + } + ) + + if failed_tables: + status_code = 500 if len(failed_tables) == len(table_names) else 207 + message = ( + f"Failed to reset {len(failed_tables)} table(s): {', '.join(failed_tables)}" + ) + else: + status_code = 200 + message = f"Successfully reset {len(table_names)} table(s)" + + result = { + "statusCode": status_code, + "body": json.dumps( + { + "message": message, + "tables_processed": len(table_names), + "tables_succeeded": len(table_names) - len(failed_tables), + "tables_failed": len(failed_tables), + "results": results, + "pointers_per_type": pointers_per_type, + } + ), + } + + print(f"\n{'='*60}") + print(f"RESULT: {message}") + print(f"{'='*60}") + return result diff --git a/layer/nrlf/core/constants.py b/layer/nrlf/core/constants.py index 793c2458c..ec4485ddd 100644 --- a/layer/nrlf/core/constants.py +++ b/layer/nrlf/core/constants.py @@ -5,6 +5,7 @@ class Source(Enum): NRLF = "NRLF" LEGACY = "NRL" # not actually used PERFTEST = "NFT-SEED" + SANDBOX = "SANDBOX-SEED" VALID_SOURCES = frozenset(item.value for item in Source.__members__.values()) diff --git a/scripts/clone_dynamodb_table.py b/scripts/clone_dynamodb_table.py new file mode 100755 index 000000000..326d25876 --- /dev/null +++ b/scripts/clone_dynamodb_table.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +import boto3 +import fire + + +def clone_table(source_table_name, target_table_name, copy_items=True, max_items=None): + """ + Create a copy of a DynamoDB table for testing. + + Args: + source_table_name: Name of table to clone + target_table_name: Name for the new table + copy_items: Whether to copy data (default: True) + max_items: Max items to copy (None = all) + """ + dynamodb = boto3.client("dynamodb") + resource = boto3.resource("dynamodb") + + # Get source table schema + source = dynamodb.describe_table(TableName=source_table_name)["Table"] + + # Create new table with same schema + create_params = { + "TableName": target_table_name, + "KeySchema": source["KeySchema"], + "AttributeDefinitions": source["AttributeDefinitions"], + } + + # Copy billing mode from source table + if "BillingModeSummary" in source: + create_params["BillingMode"] = source["BillingModeSummary"]["BillingMode"] + # If provisioned, copy the capacity settings + if source["BillingModeSummary"]["BillingMode"] == "PROVISIONED": + create_params["ProvisionedThroughput"] = { + "ReadCapacityUnits": source["ProvisionedThroughput"][ + "ReadCapacityUnits" + ], + "WriteCapacityUnits": source["ProvisionedThroughput"][ + "WriteCapacityUnits" + ], + } + else: + # Older tables without BillingModeSummary default to provisioned + create_params["BillingMode"] = "PROVISIONED" + create_params["ProvisionedThroughput"] = { + "ReadCapacityUnits": source["ProvisionedThroughput"]["ReadCapacityUnits"], + "WriteCapacityUnits": source["ProvisionedThroughput"]["WriteCapacityUnits"], + } + + # Copy GSIs if they exist + if "GlobalSecondaryIndexes" in source: + create_params["GlobalSecondaryIndexes"] = [ + { + "IndexName": gsi["IndexName"], + "KeySchema": gsi["KeySchema"], + "Projection": gsi["Projection"], + } + for gsi in source["GlobalSecondaryIndexes"] + ] + + print(f"Creating table {target_table_name}...") + dynamodb.create_table(**create_params) + + # Wait for table to be active + waiter = dynamodb.get_waiter("table_exists") + waiter.wait(TableName=target_table_name) + print("Table created and active") + + # Copy items if requested + if copy_items: + source_table = resource.Table(source_table_name) + target_table = resource.Table(target_table_name) + + count = 0 + with target_table.batch_writer() as batch: + response = source_table.scan() + + for item in response["Items"]: + batch.put_item(Item=item) + count += 1 + if max_items and count >= max_items: + break + + # Paginate if needed + while "LastEvaluatedKey" in response and ( + not max_items or count < max_items + ): + response = source_table.scan( + ExclusiveStartKey=response["LastEvaluatedKey"] + ) + for item in response["Items"]: + batch.put_item(Item=item) + count += 1 + if max_items and count >= max_items: + break + + print(f"Copied {count} items") + + return target_table_name + + +if __name__ == "__main__": + fire.Fire(clone_table) diff --git a/scripts/delete_all_table_items.py b/scripts/delete_all_table_items.py new file mode 100755 index 000000000..3247c09c6 --- /dev/null +++ b/scripts/delete_all_table_items.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +import sys + +import boto3 +from botocore.exceptions import ClientError + +# Needed for when the script is run in Lambda where modules are in scripts subdirectory +try: + import fire +except ImportError: + fire = None + + +def delete_all_table_items(table_name): + """Delete all items from a DynamoDB table.""" + dynamodb = boto3.resource("dynamodb") + table = dynamodb.Table(table_name) + + try: + # Verify the table exists + key_names = [key["AttributeName"] for key in table.key_schema] + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ResourceNotFoundException": + print(f"Error: Table '{table_name}' does not exist") + sys.exit(1) + elif error_code == "AccessDeniedException": + print(f"Error: No permission to access table '{table_name}'") + sys.exit(1) + else: + print(f"Error accessing table: {e}") + sys.exit(1) + + # Scan and delete items in batches + deleted_count = 0 + try: + with table.batch_writer() as batch: + scan_kwargs = { + "ProjectionExpression": ",".join(key_names), + } + + while True: + try: + response = table.scan(**scan_kwargs) + + for item in response["Items"]: + batch.delete_item(Key=item) + deleted_count += 1 + + if "LastEvaluatedKey" not in response: + break + scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"] + + if deleted_count % 100 == 0: + print(f"Deleted {deleted_count} items...", end="\r") + + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ProvisionedThroughputExceededException": + print( + f"\nWarning: Throttled at {deleted_count} items. Retrying..." + ) + continue + else: + raise + + except Exception as e: + print(f"\nError during deletion: {e}") + print(f"Successfully deleted {deleted_count} items before error") + sys.exit(1) + + print(f"\n✓ Cleared {deleted_count} items from {table_name}") + return deleted_count + + +if __name__ == "__main__": + if fire is None: + print("Error: fire module not available") + sys.exit(1) + fire.Fire(delete_all_table_items) diff --git a/scripts/reset_sandbox_table.py b/scripts/reset_sandbox_table.py new file mode 100755 index 000000000..f9a7b69de --- /dev/null +++ b/scripts/reset_sandbox_table.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +""" +Resets a sandbox table by clearing all items and reseeding with fresh data + +This script is for manual cli use to reset a sandbox table + +There is a separate lambda function in place (../lambdas/seed_sandbox) which performs this same reset operation on a weekly schedule, but this script allows for on-demand resets without needing to wait for the scheduled job +""" +import sys + +import fire +from delete_all_table_items import delete_all_table_items +from seed_sandbox_table import seed_sandbox_table + + +def reset_sandbox_table(table_name: str, pointers_per_type: int = 2): + """ + Reset a sandbox table by clearing all items and reseeding with fresh data. + + Args: + table_name: Name of the DynamoDB table to reset + pointers_per_type: Number of pointers per type per custodian (default: 2) + """ + print(f"=== Resetting Sandbox Table: {table_name} ===\n") + + print("Step 1: Deleting all existing items...") + try: + delete_all_table_items(table_name) + print() + except SystemExit as e: + print("✗ Failed to delete items. Aborting reset.") + sys.exit(e.code) + except Exception as e: + print(f"✗ Unexpected error during deletion: {e}") + sys.exit(1) + + print("Step 2: Seeding with fresh pointer data...") + try: + result = seed_sandbox_table(table_name, pointers_per_type, force=True) + print(f"\n=== ✓ Reset Complete ===") + print( + f"Table '{table_name}' has been reset with {result['successful']} fresh pointers" + ) + if result["failed"] > 0: + print(f"⚠️ {result['failed']} pointer(s) failed to create") + except SystemExit as e: + print("✗ Failed to seed table after deletion.") + sys.exit(e.code) + except Exception as e: + print(f"✗ Unexpected error during seeding: {e}") + sys.exit(1) + + +if __name__ == "__main__": + fire.Fire(reset_sandbox_table) diff --git a/scripts/seed_nft_tables.py b/scripts/seed_nft_tables.py index 5c5d118bd..a39d13194 100644 --- a/scripts/seed_nft_tables.py +++ b/scripts/seed_nft_tables.py @@ -9,6 +9,7 @@ import boto3 import fire import numpy as np +from seed_utils import CHECKSUM_WEIGHTS, TestNhsNumbersIterator from nrlf.core.boto import get_s3_client from nrlf.core.constants import ( @@ -22,7 +23,6 @@ from nrlf.tests.data import load_document_reference from tests.performance.perftest_environment import create_extract_metadata_file from tests.performance.seed_data_constants import ( # DEFAULT_COUNT_DISTRIBUTIONS, - CHECKSUM_WEIGHTS, CUSTODIAN_DISTRIBUTION_PROFILES, TYPE_DISTRIBUTION_PROFILES, ) @@ -38,30 +38,6 @@ DOC_REF_TEMPLATE = load_document_reference("NFT-template") -class TestNhsNumbersIterator: - def __iter__(self): - self.first9 = 900000000 - return self - - def __next__(self): - if self.first9 > 999999999: - raise StopIteration - checksum = 10 - while checksum == 10: - self.first9 += 1 - nhs_no_digits = list(map(int, str(self.first9))) - checksum = ( - sum( - weight * digit - for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) - ) - * -1 - % 11 - ) - nhs_no = str(self.first9) + str(checksum) - return nhs_no - - def _make_seed_pointer( type_code: str, custodian: str, nhs_number: str, counter: int ) -> DocumentPointer: diff --git a/scripts/seed_sandbox_table.py b/scripts/seed_sandbox_table.py new file mode 100755 index 000000000..688628200 --- /dev/null +++ b/scripts/seed_sandbox_table.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python +""" +Seeds a sandbox table with realistic pointer data using sample templates. +Creates 2 pointers of each type for 2 different custodians, one of which is the custodian that all sandbox users are represented by. +""" +import copy +import csv +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import boto3 +from botocore.exceptions import ClientError + +# Needed for when the script is run in Lambda where modules are in scripts subdirectory +try: + from seed_utils import TestNhsNumbersIterator +except ImportError: + # In Lambda, modules are in scripts subdirectory + from scripts.seed_utils import TestNhsNumbersIterator + +try: + import fire +except ImportError: + fire = None + +try: + from nrlf.core.dynamodb.model import DocumentPointer + from nrlf.core.logger import logger + from nrlf.producer.fhir.r4.model import DocumentReference + + logger.setLevel("ERROR") +except ImportError as e: + print(f"Warning: Failed to import NRLF modules: {e}") + raise + +resource = boto3.resource("dynamodb") + +SAMPLE_TEMPLATES = { + "1382601000000107": "QUY_RESPECT_FORM_Feb25.json", + "16521000000101": "G3H9E_LLOYD_GEORGE_RECORD_FOLDER_Aug25.json", + "2181441000000107": "11X_PERSONALISED_CARE_AND_SUPPORT_PLAN_Feb25.json", + "735324008": "11X_TREATMENT_ESCALATION_PLAN_Feb25.json", + "736253002": "RAT_MENTAL_HEALTH_PLAN_Feb25.json", + "736366004": "11X_ADVANCE_CARE_PLAN_Feb25.json", + "861421000000109": "VM8W7_EOL_COORDINATION_SUMMARY_Feb25.json", + "887701000000100": "B3H2B_EMERGENCY_HEALTHCARE_PLAN_Feb25.json", +} + +# Y05868 is the test custodian required for int-sandbox, since it's the custodian that all sandbox users are represented by +CUSTODIANS = ["Y05868", "Y12345"] +AUTHOR = "X54321" + + +def _load_sample_template(filename: str) -> dict: + samples_dir = Path(__file__).parent.parent / "tests" / "data" / "samples" + filepath = samples_dir / filename + + with open(filepath, "r") as f: + return json.load(f) + + +def _make_realistic_pointer( + template: dict, + custodian: str, + nhs_number: str, + counter: int, +) -> DocumentPointer: + + doc_ref_dict = copy.deepcopy(template) + + doc_ref_dict["id"] = f"{custodian}-SANDBOX-{str(counter).zfill(6)}" + doc_ref_dict["subject"]["identifier"]["value"] = nhs_number + doc_ref_dict["custodian"]["identifier"]["value"] = custodian + doc_ref_dict["author"][0]["identifier"]["value"] = AUTHOR + + if "masterIdentifier" in doc_ref_dict: + doc_ref_dict["masterIdentifier"]["value"] = f"sandbox-{custodian}-{counter}" + + doc_ref = DocumentReference(**doc_ref_dict) + + pointer = DocumentPointer.from_document_reference(doc_ref, source="SANDBOX-SEED") + return pointer + + +def _validate_table_access(table_name: str): + """Validate that the table exists and can be accessed""" + try: + table = resource.Table(table_name) + table.load() + return table + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ResourceNotFoundException": + print(f"Error: Table '{table_name}' does not exist") + sys.exit(1) + elif error_code == "AccessDeniedException": + print(f"Error: No permission to access table '{table_name}'") + sys.exit(1) + else: + print(f"Error accessing table: {e}") + sys.exit(1) + + +def _check_for_existing_sandbox_pointers(table, force: bool): + if force: + print("⚠️ Force mode enabled - will overwrite existing sandbox pointers") + return + + try: + response = table.scan( + FilterExpression="begins_with(#src, :sandbox)", + ExpressionAttributeNames={"#src": "source"}, + ExpressionAttributeValues={":sandbox": "SANDBOX"}, + Limit=1, + ProjectionExpression="id", + ) + + if response.get("Items"): + print("\n⚠️ Warning: Sandbox pointers already exist in this table.") + print( + "Running this script will OVERWRITE any existing sandbox pointers that have the same IDs." + ) + print("\nOptions:") + print(" 1. Use --force flag to overwrite existing pointers") + print(" 2. Use reset_sandbox_table.py to clear all items first") + print(" 3. Use delete_all_table_items.py to manually clear the table\n") + sys.exit(1) + except ClientError as e: + print(f"Warning: Could not check for existing pointers: {e}") + + +def _load_pointer_templates() -> dict[str, dict]: + templates = {} + for pointer_type, filename in SAMPLE_TEMPLATES.items(): + try: + templates[pointer_type] = _load_sample_template(filename) + print(f"✓ Loaded template for type {pointer_type}") + except FileNotFoundError: + print(f"✗ Template file not found: {filename}") + continue + except json.JSONDecodeError as e: + print(f"✗ Invalid JSON in template {filename}: {e}") + continue + except Exception as e: + print(f"✗ Failed to load template {filename}: {e}") + continue + + if not templates: + print("Error: No templates could be loaded. Exiting.") + sys.exit(1) + + return templates + + +def _generate_and_write_pointers( + table_name: str, templates: dict[str, dict], pointers_per_type: int, testnum_iter +) -> tuple[list[list[str]], int]: + """Generate pointers and write them to DynamoDB in batches.""" + counter = 0 + pointer_data: list[list[str]] = [] + batch_upsert_items: list[dict[str, Any]] = [] + + for pointer_type, template in templates.items(): + for custodian in CUSTODIANS: + for i in range(pointers_per_type): + counter += 1 + + try: + nhs_number = next(testnum_iter) + except StopIteration: + print(f"\n✗ Error: Ran out of NHS numbers at pointer {counter}") + break + + try: + pointer = _make_realistic_pointer( + template, custodian, nhs_number, counter + ) + + put_req = {"PutRequest": {"Item": pointer.model_dump()}} + batch_upsert_items.append(put_req) + + pointer_data.append( + [ + pointer.id, + pointer_type, + pointer.custodian, + pointer.nhs_number, + ] + ) + + if len(batch_upsert_items) >= 25: + try: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + + if response.get("UnprocessedItems"): + unprocessed = len( + response["UnprocessedItems"].get(table_name, []) + ) + print(f"\nWarning: {unprocessed} unprocessed items") + + batch_upsert_items = [] + print(".", end="", flush=True) + + except ClientError as e: + error_code = e.response["Error"]["Code"] + if error_code == "ProvisionedThroughputExceededException": + print( + f"\n✗ Throttled at pointer {counter}. Retrying batch..." + ) + else: + print(f"\n✗ Error writing batch, batch cancelled: {e}") + batch_upsert_items = [] + + except ValueError as e: + print(f"\n✗ Validation error for pointer {counter}: {e}") + continue + except Exception as e: + print(f"\n✗ Error creating pointer {counter}: {e}") + continue + + if batch_upsert_items: + try: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + if response.get("UnprocessedItems"): + unprocessed = len(response["UnprocessedItems"].get(table_name, [])) + print(f"\nWarning: {unprocessed} unprocessed items in final batch") + except ClientError as e: + print(f"\n✗ Error writing final batch, batch cancelled: {e}") + + return pointer_data, counter + + +def seed_sandbox_table( + table_name: str, + pointers_per_type: int = 2, + force: bool = False, + write_csv: bool = True, +): + """ + Seed a sandbox table with realistic pointer data. + + Args: + table_name: Name of the DynamoDB table to seed + pointers_per_type: Number of pointers per type per custodian (default: 2) + force: If True, overwrite existing sandbox pointers without prompting (default: False) + write_csv: If True, write pointer data to CSV file (default: True) + """ + print( + f"Seeding table {table_name} with {pointers_per_type} pointers per type per custodian" + ) + print(f"Total pointer types: {len(SAMPLE_TEMPLATES)}") + print(f"Total custodians: {len(CUSTODIANS)}") + print( + f"Total pointers to create: {len(SAMPLE_TEMPLATES) * len(CUSTODIANS) * pointers_per_type}" + ) + + table = _validate_table_access(table_name) + _check_for_existing_sandbox_pointers(table, force) + + testnum_cls = TestNhsNumbersIterator() + testnum_iter = iter(testnum_cls) + + start_time = datetime.now(tz=timezone.utc) + + templates = _load_pointer_templates() + pointer_data, total_attempts = _generate_and_write_pointers( + table_name, templates, pointers_per_type, testnum_iter + ) + + print("\n✓ Done!") + + end_time = datetime.now(tz=timezone.utc) + duration = (end_time - start_time).total_seconds() + + total_pointers_created = len(pointer_data) + print( + f"\nAttempted {total_attempts} pointers, successfully created {total_pointers_created}" + ) + + if total_attempts > total_pointers_created: + failed = total_attempts - total_pointers_created + print(f"⚠️ {failed} pointer(s) failed to create") + + print(f"Completed in {duration:.2f} seconds") + if duration > 0: + print(f"Average: {total_pointers_created/duration:.2f} pointers/second") + + if write_csv: + try: + _write_pointer_extract(table_name, pointer_data) + except Exception as e: + print(f"Warning: Failed to write CSV extract: {e}") + + return { + "successful": total_pointers_created, + "attempted": total_attempts, + "failed": total_attempts - total_pointers_created, + } + + +def _write_pointer_extract(table_name: str, pointer_data: list[list[str]]): + """Write pointer data to CSV file for reference.""" + try: + output_dir = Path(__file__).parent.parent / "dist" / "sandbox" + output_dir.mkdir(parents=True, exist_ok=True) + + csv_file = ( + output_dir + / f"sandbox-pointers-{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" + ) + + with open(csv_file, "w") as f: + writer = csv.writer(f) + writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"]) + writer.writerows(pointer_data) + + print(f"Pointer data saved to {csv_file}") + except PermissionError: + print(f"Error: Permission denied writing to {output_dir}") + raise + except Exception as e: + print(f"Error writing CSV file: {e}") + raise + + +if __name__ == "__main__": + if fire is None: + print("Error: fire module not available") + sys.exit(1) + fire.Fire(seed_sandbox_table) diff --git a/scripts/seed_utils.py b/scripts/seed_utils.py new file mode 100644 index 000000000..50f38e8e8 --- /dev/null +++ b/scripts/seed_utils.py @@ -0,0 +1,32 @@ +""" +Shared utilities for seeding DynamoDB tables with pointer data. +""" + +# NHS number checksum weights (10, 9, 8, 7, 6, 5, 4, 3, 2) +CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)] + + +class TestNhsNumbersIterator: + """Iterator that generates valid NHS numbers with proper checksums.""" + + def __iter__(self): + self.first9 = 900000000 + return self + + def __next__(self): + if self.first9 > 999999999: + raise StopIteration + checksum = 10 + while checksum == 10: + self.first9 += 1 + nhs_no_digits = list(map(int, str(self.first9))) + checksum = ( + sum( + weight * digit + for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) + ) + * -1 + % 11 + ) + nhs_no = str(self.first9) + str(checksum) + return nhs_no diff --git a/scripts/tests/test_delete_all_table_items.py b/scripts/tests/test_delete_all_table_items.py new file mode 100644 index 000000000..ea4261574 --- /dev/null +++ b/scripts/tests/test_delete_all_table_items.py @@ -0,0 +1,267 @@ +import sys +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from botocore.exceptions import ClientError +from delete_all_table_items import delete_all_table_items + + +class TestDeleteAllTableItems: + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_single_page(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "id", "KeyType": "HASH"}, + ] + mock_table.scan.return_value = { + "Items": [{"id": "item1"}, {"id": "item2"}, {"id": "item3"}], + } + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 3 + mock_resource.Table.assert_called_once_with("test-table") + mock_table.scan.assert_called_once() + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_multiple_pages(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "id", "KeyType": "HASH"}, + ] + + mock_table.scan.side_effect = [ + { + "Items": [{"id": f"item{i}"} for i in range(100)], + "LastEvaluatedKey": {"id": "item99"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(100, 150)], + }, + ] + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 150 + assert mock_table.scan.call_count == 2 + + @patch("delete_all_table_items.boto3") + def test_successful_deletion_composite_key(self, mock_boto3): + """Test deletion with composite key (hash + range).""" + + mock_table = MagicMock() + mock_table.key_schema = [ + {"AttributeName": "pk", "KeyType": "HASH"}, + {"AttributeName": "sk", "KeyType": "RANGE"}, + ] + mock_table.scan.return_value = { + "Items": [ + {"pk": "cust1", "sk": "ptr1"}, + {"pk": "cust1", "sk": "ptr2"}, + ], + } + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 2 + call_kwargs = mock_table.scan.call_args[1] + assert "pk,sk" in call_kwargs["ProjectionExpression"] + + @patch("delete_all_table_items.boto3") + def test_empty_table(self, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.return_value = {"Items": []} + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 0 + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + @patch("sys.exit") + def test_table_not_found(self, mock_exit, mock_print, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + type(mock_table).key_schema = PropertyMock( + side_effect=ClientError( + { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Table not found", + } + }, + "DescribeTable", + ) + ) + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("nonexistent-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with("Error: Table 'nonexistent-table' does not exist") + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + @patch("sys.exit") + def test_access_denied(self, mock_exit, mock_print, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + type(mock_table).key_schema = PropertyMock( + side_effect=ClientError( + { + "Error": { + "Code": "AccessDeniedException", + "Message": "Access denied", + } + }, + "DescribeTable", + ) + ) + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("protected-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with( + "Error: No permission to access table 'protected-table'" + ) + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + def test_throttling_warning(self, mock_print, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + + throttle_count = [0] + + def scan_side_effect(**kwargs): + if throttle_count[0] == 0: + throttle_count[0] += 1 + raise ClientError( + {"Error": {"Code": "ProvisionedThroughputExceededException"}}, + "Scan", + ) + return {"Items": [{"id": "item1"}]} + + mock_table.scan.side_effect = scan_side_effect + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 1 + warning_calls = [ + call for call in mock_print.call_args_list if "Throttled" in str(call) + ] + assert len(warning_calls) > 0 + + @patch("delete_all_table_items.boto3") + @patch("sys.exit") + def test_unexpected_error(self, mock_exit, mock_boto3): + + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.side_effect = RuntimeError("Unexpected error") + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + with pytest.raises(SystemExit): + delete_all_table_items("test-table") + + mock_exit.assert_called_once_with(1) + + @patch("delete_all_table_items.boto3") + @patch("builtins.print") + def test_progress_indicator(self, mock_print, mock_boto3): + + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + + mock_table.scan.side_effect = [ + { + "Items": [{"id": f"item{i}"} for i in range(100)], + "LastEvaluatedKey": {"id": "item99"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(100, 200)], + "LastEvaluatedKey": {"id": "item199"}, + }, + { + "Items": [{"id": f"item{i}"} for i in range(200, 250)], + }, + ] + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + assert result == 250 + + progress_calls = [ + call + for call in mock_print.call_args_list + if "Deleted" in str(call) and "items..." in str(call) + ] + assert len(progress_calls) == 2 + + @patch("delete_all_table_items.boto3") + def test_batch_writer_context_manager(self, mock_boto3): + + mock_batch_writer = MagicMock() + mock_table = MagicMock() + mock_table.key_schema = [{"AttributeName": "id", "KeyType": "HASH"}] + mock_table.scan.return_value = { + "Items": [{"id": "item1"}], + } + mock_table.batch_writer.return_value.__enter__.return_value = mock_batch_writer + + mock_resource = MagicMock() + mock_resource.Table.return_value = mock_table + mock_boto3.resource.return_value = mock_resource + + result = delete_all_table_items("test-table") + + mock_table.batch_writer.assert_called_once() + mock_batch_writer.delete_item.assert_called_once_with(Key={"id": "item1"}) diff --git a/scripts/tests/test_seed_sandbox_table.py b/scripts/tests/test_seed_sandbox_table.py new file mode 100644 index 000000000..c7096d68d --- /dev/null +++ b/scripts/tests/test_seed_sandbox_table.py @@ -0,0 +1,363 @@ +import json +import sys +from pathlib import Path +from unittest.mock import MagicMock, PropertyMock, mock_open, patch + +import pytest +from botocore.exceptions import ClientError +from seed_sandbox_table import ( + _check_for_existing_sandbox_pointers, + _generate_and_write_pointers, + _load_pointer_templates, + _load_sample_template, + _make_realistic_pointer, + _validate_table_access, + _write_pointer_extract, + seed_sandbox_table, +) + + +class TestLoadSampleTemplate: + + @patch("builtins.open", new_callable=mock_open, read_data='{"id": "test"}') + def test_load_valid_template(self, mock_file): + result = _load_sample_template("test.json") + + assert result == {"id": "test"} + mock_file.assert_called_once() + + @patch("builtins.open", side_effect=FileNotFoundError) + def test_load_missing_template(self, mock_file): + with pytest.raises(FileNotFoundError): + _load_sample_template("missing.json") + + @patch("builtins.open", new_callable=mock_open, read_data="invalid json") + def test_load_invalid_json(self, mock_file): + with pytest.raises(json.JSONDecodeError): + _load_sample_template("invalid.json") + + +class TestValidateTableAccess: + + @patch("seed_sandbox_table.resource") + def test_successful_table_access(self, mock_resource): + mock_table = MagicMock() + mock_resource.Table.return_value = mock_table + + result = _validate_table_access("test-table") + + assert result == mock_table + mock_resource.Table.assert_called_once_with("test-table") + mock_table.load.assert_called_once() + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + @patch("sys.exit") + def test_table_not_found(self, mock_exit, mock_print, mock_resource): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.load.side_effect = ClientError( + { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Table not found", + } + }, + "DescribeTable", + ) + mock_resource.Table.return_value = mock_table + + with pytest.raises(SystemExit): + _validate_table_access("nonexistent-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with("Error: Table 'nonexistent-table' does not exist") + + @patch("seed_sandbox_table.resource") + @patch("builtins.print") + @patch("sys.exit") + def test_access_denied(self, mock_exit, mock_print, mock_resource): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.load.side_effect = ClientError( + {"Error": {"Code": "AccessDeniedException", "Message": "Access denied"}}, + "DescribeTable", + ) + mock_resource.Table.return_value = mock_table + + with pytest.raises(SystemExit): + _validate_table_access("protected-table") + + mock_exit.assert_called_once_with(1) + mock_print.assert_called_with( + "Error: No permission to access table 'protected-table'" + ) + + +class TestCheckForExistingSandboxPointers: + + @patch("builtins.print") + def test_force_mode_enabled(self, mock_print): + mock_table = MagicMock() + + _check_for_existing_sandbox_pointers(mock_table, force=True) + + mock_table.scan.assert_not_called() + mock_print.assert_called_with( + "⚠️ Force mode enabled - will overwrite existing sandbox pointers" + ) + + def test_no_existing_pointers(self): + mock_table = MagicMock() + mock_table.scan.return_value = {"Items": []} + + _check_for_existing_sandbox_pointers(mock_table, force=False) + + mock_table.scan.assert_called_once() + + @patch("builtins.print") + @patch("sys.exit") + def test_existing_pointers_found(self, mock_exit, mock_print): + mock_exit.side_effect = SystemExit(1) + + mock_table = MagicMock() + mock_table.scan.return_value = {"Items": [{"id": "existing"}]} + + with pytest.raises(SystemExit): + _check_for_existing_sandbox_pointers(mock_table, force=False) + + mock_exit.assert_called_once_with(1) + + print_calls = [str(call) for call in mock_print.call_args_list] + assert any( + "Warning: Sandbox pointers already exist" in call for call in print_calls + ) + + +class TestLoadPointerTemplates: + + @patch("seed_sandbox_table._load_sample_template") + @patch( + "seed_sandbox_table.SAMPLE_TEMPLATES", + {"type1": "file1.json", "type2": "file2.json"}, + ) + def test_load_all_templates_success(self, mock_load): + mock_load.side_effect = [{"template": "1"}, {"template": "2"}] + + result = _load_pointer_templates() + + assert len(result) == 2 + assert result["type1"] == {"template": "1"} + assert result["type2"] == {"template": "2"} + + @patch("seed_sandbox_table._load_sample_template") + @patch( + "seed_sandbox_table.SAMPLE_TEMPLATES", + {"type1": "file1.json", "type2": "file2.json"}, + ) + @patch("builtins.print") + def test_load_templates_with_failures(self, mock_print, mock_load): + mock_load.side_effect = [{"template": "1"}, FileNotFoundError()] + + result = _load_pointer_templates() + + assert len(result) == 1 + assert result["type1"] == {"template": "1"} + mock_print.assert_any_call("✗ Template file not found: file2.json") + + @patch("seed_sandbox_table._load_sample_template") + @patch("seed_sandbox_table.SAMPLE_TEMPLATES", {"type1": "file1.json"}) + @patch("builtins.print") + @patch("sys.exit") + def test_load_templates_all_fail(self, mock_exit, mock_print, mock_load): + mock_exit.side_effect = SystemExit(1) + mock_load.side_effect = FileNotFoundError() + + with pytest.raises(SystemExit): + _load_pointer_templates() + + mock_exit.assert_called_once_with(1) + mock_print.assert_any_call("Error: No templates could be loaded. Exiting.") + + +class TestMakeRealisticPointer: + + @patch("seed_sandbox_table.DocumentReference") + @patch("seed_sandbox_table.DocumentPointer") + def test_create_pointer_success(self, mock_pointer_class, mock_doc_ref_class): + template = { + "id": "original", + "subject": {"identifier": {"value": "0000000000"}}, + "custodian": {"identifier": {"value": "OLD"}}, + "author": [{"identifier": {"value": "OLD_AUTHOR"}}], + "masterIdentifier": {"value": "old-master"}, + } + + mock_doc_ref = MagicMock() + mock_doc_ref_class.return_value = mock_doc_ref + + mock_pointer = MagicMock() + mock_pointer_class.from_document_reference.return_value = mock_pointer + + result = _make_realistic_pointer(template, "Y12345", "9000000001", 1) + + assert result == mock_pointer + mock_doc_ref_class.assert_called_once() + mock_pointer_class.from_document_reference.assert_called_once_with( + mock_doc_ref, source="SANDBOX-SEED" + ) + + +class TestGenerateAndWritePointers: + + @patch("seed_sandbox_table.resource") + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("seed_sandbox_table.CUSTODIANS", ["CUST1"]) + def test_generate_pointers_success(self, mock_make_pointer, mock_resource): + templates = {"type1": {"template": "data"}} + + mock_pointer = MagicMock() + mock_pointer.id = "TEST-001" + mock_pointer.custodian = "CUST1" + mock_pointer.nhs_number = "9000000001" + mock_pointer.model_dump.return_value = {"id": "TEST-001"} + mock_make_pointer.return_value = mock_pointer + + mock_resource.batch_write_item.return_value = {} + + nhs_iter = iter(["9000000001", "9000000002"]) + + pointer_data, total_attempts = _generate_and_write_pointers( + "test-table", templates, 2, nhs_iter + ) + + assert total_attempts == 2 + assert len(pointer_data) == 2 + assert pointer_data[0][0] == "TEST-001" + + @patch("seed_sandbox_table.resource") + @patch("seed_sandbox_table._make_realistic_pointer") + @patch("seed_sandbox_table.CUSTODIANS", ["CUST1"]) + def test_generate_pointers_with_validation_error( + self, mock_make_pointer, mock_resource + ): + templates = {"type1": {"template": "data"}} + + mock_make_pointer.side_effect = [ValueError("Invalid data"), MagicMock()] + + nhs_iter = iter(["9000000001", "9000000002"]) + + pointer_data, total_attempts = _generate_and_write_pointers( + "test-table", templates, 2, nhs_iter + ) + + # First attempt failed, second succeeded + assert total_attempts == 2 + assert len(pointer_data) == 1 + + +class TestSeedSandboxTable: + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + def test_seed_table_success( + self, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + pointer_data = [["PTR-001", "type1", "CUST1", "9000000001"]] + mock_generate.return_value = (pointer_data, 1) + + result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) + + assert result == {"successful": 1, "attempted": 1, "failed": 0} + mock_validate.assert_called_once_with("test-table") + mock_check_pointers.assert_called_once_with(mock_table, False) + mock_generate.assert_called_once() + mock_write_extract.assert_called_once() + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + @patch("builtins.print") + def test_seed_table_with_failures( + self, + mock_print, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + # 5 attempts, 3 successful + pointer_data = [ + ["PTR-001", "type1", "CUST1", "9000000001"], + ["PTR-002", "type1", "CUST1", "9000000002"], + ["PTR-003", "type1", "CUST1", "9000000003"], + ] + mock_generate.return_value = (pointer_data, 5) + + result = seed_sandbox_table("test-table", pointers_per_type=1, force=False) + + # Should return dict with counts + assert result == {"successful": 3, "attempted": 5, "failed": 2} + + # Verify error message about failures + mock_print.assert_any_call("⚠️ 2 pointer(s) failed to create") + + @patch("seed_sandbox_table._validate_table_access") + @patch("seed_sandbox_table._check_for_existing_sandbox_pointers") + @patch("seed_sandbox_table._load_pointer_templates") + @patch("seed_sandbox_table._generate_and_write_pointers") + @patch("seed_sandbox_table._write_pointer_extract") + @patch("seed_sandbox_table.TestNhsNumbersIterator") + def test_seed_table_skip_csv_writing( + self, + mock_nhs_iter_class, + mock_write_extract, + mock_generate, + mock_load_templates, + mock_check_pointers, + mock_validate, + ): + mock_table = MagicMock() + mock_validate.return_value = mock_table + + mock_templates = {"type1": {"template": "data"}} + mock_load_templates.return_value = mock_templates + + pointer_data = [["PTR-001", "type1", "CUST1", "9000000001"]] + mock_generate.return_value = (pointer_data, 1) + + result = seed_sandbox_table( + "test-table", pointers_per_type=1, force=False, write_csv=False + ) + + assert result == {"successful": 1, "attempted": 1, "failed": 0} + mock_write_extract.assert_not_called() diff --git a/terraform/account-wide-infrastructure/dev/lambda-layers.tf b/terraform/account-wide-infrastructure/dev/lambda-layers.tf new file mode 100644 index 000000000..697e28618 --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/lambda-layers.tf @@ -0,0 +1,6 @@ +# Account-wide Lambda layers for shared infrastructure +# Built once per account, used by account-wide Lambdas like the seed Lambda +module "shared_lambda_layers" { + source = "../modules/lambda-layers" + name_prefix = local.prefix +} diff --git a/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf new file mode 100644 index 000000000..652591c6b --- /dev/null +++ b/terraform/account-wide-infrastructure/dev/lambda__seed-sandbox.tf @@ -0,0 +1,27 @@ +# Lambda to reset specified DynamoDB tables with seed test data on a schedule +# Uses account-wide Lambda layers +# Only deploys if tables are specified + +locals { + seed_table_names = ["nhsd-nrlf--dev-sandy-dev-pointers-table"] +} + +module "seed_sandbox_lambda" { + count = length(local.seed_table_names) > 0 ? 1 : 0 + source = "../modules/seed_sandbox_lambda" + region = local.region + prefix = local.prefix + layers = [ + module.shared_lambda_layers.nrlf_layer_arn, + module.shared_lambda_layers.third_party_layer_arn, + module.shared_lambda_layers.nrlf_permissions_layer_arn + ] + + table_names = local.seed_table_names + + environment_variables = { + PREFIX = "${local.prefix}--" + ENVIRONMENT = local.environment + POINTERS_PER_TYPE = "2" + } +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf new file mode 100644 index 000000000..cb2348749 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf @@ -0,0 +1,41 @@ +# Account-wide Lambda layers for shared infrastructure +# These reference the same pre-built zips as workspace layers +# but are deployed once per account rather than per workspace + +locals { + dist_dir = "${path.module}/../../../../dist" + + # IMPORTANT: These filenames must match: + # 1. The build outputs from Makefile (see: make build-lambda-layer) + # 2. The workspace layer module at terraform/infrastructure/modules/layer/lambda_layer.tf + # If you change the build process, update both locations. + layer_zips = { + nrlf = "nrlf.zip" + third_party = "dependency_layer.zip" + nrlf_permissions = "nrlf_permissions.zip" + } +} + +resource "aws_lambda_layer_version" "nrlf" { + layer_name = "${var.name_prefix}--nrlf-layer" + filename = "${local.dist_dir}/${local.layer_zips.nrlf}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.nrlf}") + compatible_runtimes = ["python3.12"] + description = "NRLF core library layer (account-wide)" +} + +resource "aws_lambda_layer_version" "third_party" { + layer_name = "${var.name_prefix}--dependency-layer" + filename = "${local.dist_dir}/${local.layer_zips.third_party}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.third_party}") + compatible_runtimes = ["python3.12"] + description = "Third party dependencies layer (account-wide)" +} + +resource "aws_lambda_layer_version" "nrlf_permissions" { + layer_name = "${var.name_prefix}--nrlf-permissions-layer" + filename = "${local.dist_dir}/${local.layer_zips.nrlf_permissions}" + source_code_hash = filebase64sha256("${local.dist_dir}/${local.layer_zips.nrlf_permissions}") + compatible_runtimes = ["python3.12"] + description = "NRLF permissions library layer (account-wide)" +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf new file mode 100644 index 000000000..e86ad3467 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/outputs.tf @@ -0,0 +1,14 @@ +output "nrlf_layer_arn" { + description = "ARN of the NRLF Lambda layer" + value = aws_lambda_layer_version.nrlf.arn +} + +output "third_party_layer_arn" { + description = "ARN of the third party dependencies Lambda layer" + value = aws_lambda_layer_version.third_party.arn +} + +output "nrlf_permissions_layer_arn" { + description = "ARN of the NRLF permissions Lambda layer" + value = aws_lambda_layer_version.nrlf_permissions.arn +} diff --git a/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf b/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf new file mode 100644 index 000000000..8c9de5637 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/lambda-layers/variables.tf @@ -0,0 +1,4 @@ +variable "name_prefix" { + description = "Resource name prefix (e.g., nhsd-nrlf--dev)" + type = string +} diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf new file mode 100644 index 000000000..a0393e77c --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/eventbridge.tf @@ -0,0 +1,22 @@ +resource "aws_cloudwatch_event_rule" "event_rule" { + name = "${var.prefix}--event_rule" + description = "Rule to clear and reseed sandbox data" + # Set this to weekly once development and testing is complete + # schedule_expression = "cron(0 2 ? * SUN *)" # 2am UTC, every Sunday + schedule_expression = "cron(0 9-17 ? * MON-FRI *)" # Hourly, 9am-5pm UTC, Monday-Friday +} + +resource "aws_cloudwatch_event_target" "event_target" { + target_id = "${var.prefix}--event_target" + rule = aws_cloudwatch_event_rule.event_rule.name + arn = aws_lambda_function.lambda_function.arn +} + + +resource "aws_lambda_permission" "allow_execution_from_eventbridge" { + statement_id = "AllowExecutionFromEventBridge" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_function.arn + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.event_rule.arn +} diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf new file mode 100644 index 000000000..ce269bcf8 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/iam.tf @@ -0,0 +1,48 @@ +resource "aws_iam_role" "lambda_role" { + name = "${var.prefix}--sandbox-seeder" + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Principal = { + Service = "lambda.amazonaws.com" + }, + Action = "sts:AssumeRole" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "lambda_policy_attachment" { + role = aws_iam_role.lambda_role.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + + depends_on = [ + aws_iam_role.lambda_role + ] +} + +resource "aws_iam_role_policy" "seed_sandbox_additional_permissions" { + name = "${var.prefix}--sandbox-seeder-additional" + role = aws_iam_role.lambda_role.id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "dynamodb:DescribeTable", + "dynamodb:Scan", + "dynamodb:Query", + "dynamodb:GetItem", + "dynamodb:PutItem", + "dynamodb:DeleteItem", + "dynamodb:BatchWriteItem" + ] + Resource = [for table_name in var.table_names : "arn:aws:dynamodb:${var.region}:*:table/${table_name}"] + } + ] + }) +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf similarity index 54% rename from terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf index 8854454bf..88bfc9e4e 100644 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/lambda.tf +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/lambda.tf @@ -1,15 +1,17 @@ resource "aws_lambda_function" "lambda_function" { function_name = "${var.prefix}--sandbox-seeder" - runtime = "python3.9" - handler = "cron.seed_sandbox.index.handler" + runtime = "python3.12" + handler = "index.handler" role = aws_iam_role.lambda_role.arn - filename = "${path.module}/../../../../cron/seed_sandbox/dist/seed_sandbox.zip" - source_code_hash = filebase64sha256("${path.module}/../../../../cron/seed_sandbox/dist/seed_sandbox.zip") + filename = "${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip" + source_code_hash = filebase64sha256("${path.module}/../../../../lambdas/seed_sandbox/dist/seed_sandbox.zip") timeout = local.lambda_timeout memory_size = 128 environment { - variables = var.environment_variables + variables = merge(var.environment_variables, { + TABLE_NAMES = join(",", var.table_names) + }) } layers = var.layers diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/locals.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/locals.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/locals.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/locals.tf diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf new file mode 100644 index 000000000..a9d01f3c7 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/logs.tf @@ -0,0 +1,4 @@ +resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { + name = "/aws/lambda/${aws_lambda_function.lambda_function.function_name}" + retention_in_days = local.lambda_log_retention_in_days +} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/output.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/output.tf similarity index 100% rename from terraform/infrastructure/modules/seed_sandbox_lambda/output.tf rename to terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/output.tf diff --git a/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf new file mode 100644 index 000000000..4b3962ad2 --- /dev/null +++ b/terraform/account-wide-infrastructure/modules/seed_sandbox_lambda/vars.tf @@ -0,0 +1,12 @@ +variable "prefix" {} + +variable "region" {} + +variable "layers" {} + +variable "environment_variables" {} + +variable "table_names" { + description = "List of DynamoDB table names to reset" + type = list(string) +} diff --git a/terraform/account-wide-infrastructure/test/lambda-layers.tf b/terraform/account-wide-infrastructure/test/lambda-layers.tf new file mode 100644 index 000000000..697e28618 --- /dev/null +++ b/terraform/account-wide-infrastructure/test/lambda-layers.tf @@ -0,0 +1,6 @@ +# Account-wide Lambda layers for shared infrastructure +# Built once per account, used by account-wide Lambdas like the seed Lambda +module "shared_lambda_layers" { + source = "../modules/lambda-layers" + name_prefix = local.prefix +} diff --git a/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf new file mode 100644 index 000000000..76d9261dd --- /dev/null +++ b/terraform/account-wide-infrastructure/test/lambda__seed-sandbox.tf @@ -0,0 +1,28 @@ +# Lambda to reset specified DynamoDB tables with seed test data on a schedule +# Deployed at account level to avoid duplication across workspaces +# Uses account-wide Lambda layers +# Only deploys if tables are specified + +locals { + seed_table_names = ["nhsd-nrlf--int-sandboxcopy-pointers-table"] # Change to int-sandbox-pointers-table when ready +} + +module "seed_sandbox_lambda" { + count = length(local.seed_table_names) > 0 ? 1 : 0 + source = "../modules/seed_sandbox_lambda" + region = local.region + prefix = local.prefix + layers = [ + module.shared_lambda_layers.nrlf_layer_arn, + module.shared_lambda_layers.third_party_layer_arn, + module.shared_lambda_layers.nrlf_permissions_layer_arn + ] + + table_names = local.seed_table_names + + environment_variables = { + PREFIX = "${local.prefix}--" + ENVIRONMENT = local.environment + POINTERS_PER_TYPE = "2" + } +} diff --git a/terraform/infrastructure/modules/layer/lambda_layer.tf b/terraform/infrastructure/modules/layer/lambda_layer.tf index f945794a1..f95856fa0 100644 --- a/terraform/infrastructure/modules/layer/lambda_layer.tf +++ b/terraform/infrastructure/modules/layer/lambda_layer.tf @@ -1,3 +1,12 @@ +# Workspace-level Lambda layer module +# Creates layers per workspace for API Lambdas +# +# IMPORTANT: The layer zip filenames (${var.name}.zip) must match: +# 1. The build outputs from Makefile (see: make build-lambda-layer) +# 2. The account-wide layer module at terraform/account-wide-infrastructure/modules/lambda-layers/layers.tf +# Expected filenames: nrlf.zip, dependency_layer.zip, nrlf_permissions.zip +# If you change the build process, update both locations. + resource "aws_lambda_layer_version" "lambda_layer" { layer_name = "${var.prefix}--${replace(var.name, "_", "-")}" filename = "${path.module}/../../../../dist/${var.name}.zip" diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf deleted file mode 100644 index 8a2e36b96..000000000 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/cloudwatch.tf +++ /dev/null @@ -1,27 +0,0 @@ -resource "aws_cloudwatch_log_group" "lambda_cloudwatch_log_group" { - name = "/aws/lambda/${aws_lambda_function.lambda_function.function_name}" - retention_in_days = local.lambda_log_retention_in_days - kms_key_id = var.kms_key_id -} - - -resource "aws_cloudwatch_event_rule" "event_rule" { - name = "${var.prefix}--event_rule" - description = "Rule to fire to clear and reseed sandbox data" - schedule_expression = "cron(0 3 ? * * *)" # 3am, every day -} - -resource "aws_cloudwatch_event_target" "event_target" { - target_id = "${var.prefix}--event_target" - rule = aws_cloudwatch_event_rule.event_rule.name - arn = aws_lambda_function.lambda_function.arn -} - - -resource "aws_lambda_permission" "allow_execution_from_cloudwatch" { - statement_id = "AllowExecutionFromCloudWatch" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_function.arn - principal = "events.amazonaws.com" - source_arn = aws_cloudwatch_event_rule.event_rule.arn -} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf deleted file mode 100644 index 0e4a0c3fa..000000000 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/iam.tf +++ /dev/null @@ -1,30 +0,0 @@ -resource "aws_iam_role" "lambda_role" { - name = "${var.prefix}--sandbox-seeder" - assume_role_policy = jsonencode({ - Version = "2012-10-17", - Statement = [ - { - Effect = "Allow", - Principal = { - Service = "lambda.amazonaws.com" - }, - Action = "sts:AssumeRole" - } - ] - }) -} - -resource "aws_iam_role_policy_attachment" "lambda_policy_attachment" { - role = aws_iam_role.lambda_role.name - policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" - - depends_on = [ - aws_iam_role.lambda_role - ] -} - -resource "aws_iam_role_policy_attachment" "additional_policies" { - role = aws_iam_role.lambda_role.name - count = length(var.additional_policies) - policy_arn = var.additional_policies[count.index] -} diff --git a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf b/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf deleted file mode 100644 index 65473ab61..000000000 --- a/terraform/infrastructure/modules/seed_sandbox_lambda/vars.tf +++ /dev/null @@ -1,13 +0,0 @@ -variable "prefix" {} - -variable "region" {} - -variable "layers" {} - -variable "kms_key_id" {} - -variable "environment_variables" {} - -variable "additional_policies" { - default = [] -} diff --git a/terraform/infrastructure/seed_sandbox_lambda.tf b/terraform/infrastructure/seed_sandbox_lambda.tf deleted file mode 100644 index 3b1ec0f71..000000000 --- a/terraform/infrastructure/seed_sandbox_lambda.tf +++ /dev/null @@ -1,18 +0,0 @@ - -# module "seed_sandbox_lambda" { -# count = endswith(local.environment, "-sandbox") ? 1 : 0 -# source = "./modules/seed_sandbox_lambda" -# region = local.region -# prefix = local.prefix -# layers = [module.lambda-utils.layer_arn, module.nrlf.layer_arn, module.third_party.layer_arn] -# kms_key_id = module.kms__cloudwatch.kms_arn -# environment_variables = { -# PREFIX = "${local.prefix}--" -# ENVIRONMENT = local.environment -# } -# additional_policies = [ -# aws_iam_policy.document-pointer__dynamodb-write.arn, -# aws_iam_policy.document-pointer__dynamodb-read.arn, -# aws_iam_policy.document-pointer__kms-read-write.arn -# ] -# }