⚠ This page is served via a proxy. Original site: https://github.com
This service does not collect credentials or authentication data.
Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ check-deploy: ## check the deploy environment is setup correctly
check-deploy-warn:
@SHOULD_WARN_ONLY=true ./scripts/check-deploy-environment.sh

build: check-warn build-api-packages build-layers build-dependency-layer ## Build the project
build: check-warn build-api-packages build-layers build-dependency-layer build-seed-sandbox-lambda ## Build the project

build-seed-sandbox-lambda:
@echo "Building seed_sandbox Lambda"
@cd lambdas/seed_sandbox && make build

build-dependency-layer:
@echo "Building Lambda dependency layer"
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,9 @@ In order to deploy to a sandbox environment (`dev-sandbox`, `qa-sandbox`, `int-s

### Sandbox database clear and reseed

Any workspace suffixed with `-sandbox` has a small amount of additional infrastructure deployed to clear and reseed the DynamoDB tables (auth and document pointers) using a Lambda running
on a cron schedule that can be found in the `cron/seed_sandbox` directory in the root of this project. The data used to seed the DynamoDB tables can found in the `cron/seed_sandbox/data` directory.
<!-- TODO Update this -->
<!-- Any workspace suffixed with `-sandbox` has a small amount of additional infrastructure deployed to clear and reseed the DynamoDB tables (auth and document pointers) using a Lambda running
on a cron schedule that can be found in the `cron/seed_sandbox` directory in the root of this project. The data used to seed the DynamoDB tables can found in the `cron/seed_sandbox/data` directory. -->

### Sandbox authorisation

Expand Down
28 changes: 28 additions & 0 deletions lambdas/seed_sandbox/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
.PHONY: build clean

build: clean
@echo "Building Lambda deployment package..."
mkdir -p dist

# Copy the handler
cp index.py dist/

# Copy the required scripts
mkdir -p dist/scripts
cp ../../scripts/delete_all_table_items.py dist/scripts/
cp ../../scripts/seed_sandbox_table.py dist/scripts/
cp ../../scripts/seed_utils.py dist/scripts/

# Copy the pointer template data
mkdir -p dist/tests/data/samples
cp -r ../../tests/data/samples/*.json dist/tests/data/samples/

# Create the zip file
cd dist && zip -r seed_sandbox.zip . -x "*.pyc" -x "__pycache__/*" -x ".DS_Store"

@echo "✓ Lambda package created: dist/seed_sandbox.zip"

clean:
@echo "Cleaning build artifacts..."
rm -rf dist
@echo "✓ Clean complete"
79 changes: 79 additions & 0 deletions lambdas/seed_sandbox/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""
Lambda handler for resetting the sandbox DynamoDB table.

This Lambda function runs on a weekly schedule to clear and reseed the
sandbox pointers table with fresh test data.
"""

# flake8: noqa: T201

import json
import os

from scripts.delete_all_table_items import delete_all_table_items
from scripts.seed_sandbox_table import seed_sandbox_table


def handler(event, context):
"""
Lambda handler that orchestrates the reset of the sandbox table.

The sandbox table to be reset is determined by the TABLE_NAME environment variable.

Args:
event: Lambda event (from EventBridge schedule)
context: Lambda context

Returns:
dict: Response with status and details
"""
table_name = os.environ.get("TABLE_NAME")
pointers_per_type = int(os.environ.get("POINTERS_PER_TYPE", "2"))

if not table_name:
error_msg = "TABLE_NAME environment variable is required"
print(f"ERROR: {error_msg}")
return {"statusCode": 500, "body": json.dumps({"error": error_msg})}

print(f"Starting sandbox table reset for: {table_name}")
print(f"Pointers per type: {pointers_per_type}")

try:
print("Step 1: Deleting all items from table...")
pointers_deleted_count = delete_all_table_items(table_name=table_name)
print("✓ All items deleted successfully")

print("Step 2: Seeding table with fresh data...")
seed_result = seed_sandbox_table(
table_name=table_name,
pointers_per_type=pointers_per_type,
force=True,
write_csv=False,
)
print("✓ Table seeded successfully")

result = {
"statusCode": 200,
"body": json.dumps(
{
"message": "Sandbox table reset completed successfully",
"table_name": table_name,
"pointers_deleted": pointers_deleted_count,
"pointers_created": seed_result["successful"],
"pointers_attempted": seed_result["attempted"],
"pointers_failed": seed_result["failed"],
"pointers_per_type": pointers_per_type,
}
),
}

print(f"SUCCESS: {result}")
return result

except Exception as e:
error_msg = f"Failed to reset sandbox table: {str(e)}"
print(f"ERROR: {error_msg}")
return {
"statusCode": 500,
"body": json.dumps({"error": error_msg, "table_name": table_name}),
}
1 change: 1 addition & 0 deletions layer/nrlf/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ class Source(Enum):
NRLF = "NRLF"
LEGACY = "NRL" # not actually used
PERFTEST = "NFT-SEED"
SANDBOX = "SANDBOX-SEED"


VALID_SOURCES = frozenset(item.value for item in Source.__members__.values())
Expand Down
103 changes: 103 additions & 0 deletions scripts/clone_dynamodb_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python
import boto3
import fire


def clone_table(source_table_name, target_table_name, copy_items=True, max_items=None):
"""
Create a copy of a DynamoDB table for testing.

Args:
source_table_name: Name of table to clone
target_table_name: Name for the new table
copy_items: Whether to copy data (default: True)
max_items: Max items to copy (None = all)
"""
dynamodb = boto3.client("dynamodb")
resource = boto3.resource("dynamodb")

# Get source table schema
source = dynamodb.describe_table(TableName=source_table_name)["Table"]

# Create new table with same schema
create_params = {
"TableName": target_table_name,
"KeySchema": source["KeySchema"],
"AttributeDefinitions": source["AttributeDefinitions"],
}

# Copy billing mode from source table
if "BillingModeSummary" in source:
create_params["BillingMode"] = source["BillingModeSummary"]["BillingMode"]
# If provisioned, copy the capacity settings
if source["BillingModeSummary"]["BillingMode"] == "PROVISIONED":
create_params["ProvisionedThroughput"] = {
"ReadCapacityUnits": source["ProvisionedThroughput"][
"ReadCapacityUnits"
],
"WriteCapacityUnits": source["ProvisionedThroughput"][
"WriteCapacityUnits"
],
}
else:
# Older tables without BillingModeSummary default to provisioned
create_params["BillingMode"] = "PROVISIONED"
create_params["ProvisionedThroughput"] = {
"ReadCapacityUnits": source["ProvisionedThroughput"]["ReadCapacityUnits"],
"WriteCapacityUnits": source["ProvisionedThroughput"]["WriteCapacityUnits"],
}

# Copy GSIs if they exist
if "GlobalSecondaryIndexes" in source:
create_params["GlobalSecondaryIndexes"] = [
{
"IndexName": gsi["IndexName"],
"KeySchema": gsi["KeySchema"],
"Projection": gsi["Projection"],
}
for gsi in source["GlobalSecondaryIndexes"]
]

print(f"Creating table {target_table_name}...")
dynamodb.create_table(**create_params)

# Wait for table to be active
waiter = dynamodb.get_waiter("table_exists")
waiter.wait(TableName=target_table_name)
print("Table created and active")

# Copy items if requested
if copy_items:
source_table = resource.Table(source_table_name)
target_table = resource.Table(target_table_name)

count = 0
with target_table.batch_writer() as batch:
response = source_table.scan()

for item in response["Items"]:
batch.put_item(Item=item)
count += 1
if max_items and count >= max_items:
break

# Paginate if needed
while "LastEvaluatedKey" in response and (
not max_items or count < max_items
):
response = source_table.scan(
ExclusiveStartKey=response["LastEvaluatedKey"]
)
for item in response["Items"]:
batch.put_item(Item=item)
count += 1
if max_items and count >= max_items:
break

print(f"Copied {count} items")

return target_table_name


if __name__ == "__main__":
fire.Fire(clone_table)
80 changes: 80 additions & 0 deletions scripts/delete_all_table_items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
import sys

import boto3
from botocore.exceptions import ClientError

# Needed for when the script is run in Lambda where modules are in scripts subdirectory
try:
import fire
except ImportError:
fire = None


def delete_all_table_items(table_name):
"""Delete all items from a DynamoDB table."""
dynamodb = boto3.resource("dynamodb")
table = dynamodb.Table(table_name)

try:
# Verify the table exists
key_names = [key["AttributeName"] for key in table.key_schema]
except ClientError as e:
error_code = e.response["Error"]["Code"]
if error_code == "ResourceNotFoundException":
print(f"Error: Table '{table_name}' does not exist")
sys.exit(1)
elif error_code == "AccessDeniedException":
print(f"Error: No permission to access table '{table_name}'")
sys.exit(1)
else:
print(f"Error accessing table: {e}")
sys.exit(1)

# Scan and delete items in batches
deleted_count = 0
try:
with table.batch_writer() as batch:
scan_kwargs = {
"ProjectionExpression": ",".join(key_names),
}

while True:
try:
response = table.scan(**scan_kwargs)

for item in response["Items"]:
batch.delete_item(Key=item)
deleted_count += 1

if "LastEvaluatedKey" not in response:
break
scan_kwargs["ExclusiveStartKey"] = response["LastEvaluatedKey"]

if deleted_count % 100 == 0:
print(f"Deleted {deleted_count} items...", end="\r")

except ClientError as e:
error_code = e.response["Error"]["Code"]
if error_code == "ProvisionedThroughputExceededException":
print(
f"\nWarning: Throttled at {deleted_count} items. Retrying..."
)
continue
else:
raise

except Exception as e:
print(f"\nError during deletion: {e}")
print(f"Successfully deleted {deleted_count} items before error")
sys.exit(1)

print(f"\n✓ Cleared {deleted_count} items from {table_name}")
return deleted_count


if __name__ == "__main__":
if fire is None:
print("Error: fire module not available")
sys.exit(1)
fire.Fire(delete_all_table_items)
55 changes: 55 additions & 0 deletions scripts/reset_sandbox_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/usr/bin/env python
"""
Resets a sandbox table by clearing all items and reseeding with fresh data

This script is for manual cli use to reset a sandbox table

There is a separate lambda function in place (../lambdas/seed_sandbox) which performs this same reset operation on a weekly schedule, but this script allows for on-demand resets without needing to wait for the scheduled job
"""
import sys

import fire
from delete_all_table_items import delete_all_table_items
from seed_sandbox_table import seed_sandbox_table


def reset_sandbox_table(table_name: str, pointers_per_type: int = 2):
"""
Reset a sandbox table by clearing all items and reseeding with fresh data.

Args:
table_name: Name of the DynamoDB table to reset
pointers_per_type: Number of pointers per type per custodian (default: 2)
"""
print(f"=== Resetting Sandbox Table: {table_name} ===\n")

print("Step 1: Deleting all existing items...")
try:
delete_all_table_items(table_name)
print()
except SystemExit as e:
print("✗ Failed to delete items. Aborting reset.")
sys.exit(e.code)
except Exception as e:
print(f"✗ Unexpected error during deletion: {e}")
sys.exit(1)

print("Step 2: Seeding with fresh pointer data...")
try:
result = seed_sandbox_table(table_name, pointers_per_type, force=True)
print(f"\n=== ✓ Reset Complete ===")
print(
f"Table '{table_name}' has been reset with {result['successful']} fresh pointers"
)
if result["failed"] > 0:
print(f"⚠️ {result['failed']} pointer(s) failed to create")
except SystemExit as e:
print("✗ Failed to seed table after deletion.")
sys.exit(e.code)
except Exception as e:
print(f"✗ Unexpected error during seeding: {e}")
sys.exit(1)


if __name__ == "__main__":
fire.Fire(reset_sandbox_table)
Loading
Loading