Skip to content

Commit 8885d92

Browse files
author
Bob Strahan
committed
Fix #58 Occasional UI error 'Failed to get document details - please try again later'
1 parent ad98577 commit 8885d92

File tree

4 files changed

+634
-109
lines changed

4 files changed

+634
-109
lines changed

src/lambda/create_document_resolver/index.py

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import json
77
import logging
88
from decimal import Decimal
9-
import datetime
9+
from robust_list_deletion import delete_list_entries_robust, calculate_shard
1010

1111
# Configure logging
1212
logger = logging.getLogger()
@@ -28,8 +28,18 @@ def handler(event, context):
2828
try:
2929
# Extract input data from full AppSync context
3030
input_data = event['arguments']['input']
31-
object_key = input_data['ObjectKey']
32-
queued_time = input_data['QueuedTime']
31+
32+
# Validate required input fields
33+
if not input_data:
34+
raise ValueError("Input data is required")
35+
36+
object_key = input_data.get('ObjectKey')
37+
queued_time = input_data.get('QueuedTime')
38+
39+
if not object_key or not isinstance(object_key, str):
40+
raise ValueError("ObjectKey must be a non-empty string")
41+
if not queued_time or not isinstance(queued_time, str):
42+
raise ValueError("QueuedTime must be a non-empty string")
3343

3444
logger.info(f"Processing document: {object_key}, QueuedTime: {queued_time}")
3545

@@ -57,52 +67,22 @@ def handler(event, context):
5767
logger.error(f"Error checking for existing document: {str(e)}")
5868
# Continue with creation process even if this check fails
5969

60-
# If existing document found, delete its list entry
61-
if existing_doc and 'QueuedTime' in existing_doc:
70+
# If existing document found, delete its list entry using robust deletion
71+
if existing_doc:
6272
try:
63-
old_time = existing_doc['QueuedTime']
64-
logger.info(f"Deleting list entry for existing document with QueuedTime: {old_time}")
65-
66-
# Calculate shard ID for the old list entry
67-
old_date = old_time.split('T')[0] # e.g., 2023-01-01
68-
old_hour = int(old_time.split('T')[1].split(':')[0]) # e.g., 12
69-
70-
# Calculate shard (6 shards per day = 4 hours each)
71-
shards_in_day = 6
72-
hours_in_shard = 24 / shards_in_day
73-
old_shard = int(old_hour / hours_in_shard)
74-
old_shard_str = f"{old_shard:02d}" # Format with leading zero
73+
logger.info(f"Attempting robust deletion of list entries for existing document: {object_key}")
74+
deletion_success = delete_list_entries_robust(tracking_table, object_key, existing_doc)
7575

76-
old_list_pk = f"list#{old_date}#s#{old_shard_str}"
77-
old_list_sk = f"ts#{old_time}#id#{object_key}"
78-
79-
logger.info(f"Deleting list entry with PK={old_list_pk}, SK={old_list_sk}")
80-
result = tracking_table.delete_item(
81-
Key={
82-
'PK': old_list_pk,
83-
'SK': old_list_sk
84-
},
85-
ReturnValues='ALL_OLD'
86-
)
87-
88-
if 'Attributes' in result:
89-
logger.info(f"Successfully deleted list entry: {json.dumps(result['Attributes'], cls=DecimalEncoder)}")
76+
if deletion_success:
77+
logger.info(f"Successfully deleted existing list entries for {object_key}")
9078
else:
91-
logger.warning(f"No list entry found to delete with PK={old_list_pk}, SK={old_list_sk}")
79+
logger.warning(f"No existing list entries found/deleted for {object_key}")
9280
except Exception as e:
93-
logger.error(f"Error deleting list entry: {str(e)}")
81+
logger.error(f"Error in robust list entry deletion: {str(e)}")
9482
# Continue with creation process even if deletion fails
9583

96-
# Calculate shard ID for new list entry
97-
date_part = queued_time.split('T')[0] # e.g., 2023-01-01
98-
hour_part = int(queued_time.split('T')[1].split(':')[0]) # e.g., 12
99-
100-
# Calculate shard (6 shards per day = 4 hours each)
101-
shards_in_day = 6
102-
hours_in_shard = 24 / shards_in_day
103-
shard = int(hour_part / hours_in_shard)
104-
shard_str = f"{shard:02d}" # Format with leading zero
105-
84+
# Calculate shard ID for new list entry using shared utility
85+
date_part, shard_str = calculate_shard(queued_time)
10686
list_pk = f"list#{date_part}#s#{shard_str}"
10787
list_sk = f"ts#{queued_time}#id#{object_key}"
10888

@@ -140,4 +120,4 @@ def handler(event, context):
140120
return {"ObjectKey": object_key}
141121
except Exception as e:
142122
logger.error(f"Error in create_document resolver: {str(e)}", exc_info=True)
143-
raise e
123+
raise e
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
4+
"""
5+
Robust list deletion utilities for DynamoDB document tracking.
6+
7+
This module provides robust deletion functions that handle timestamp mismatches
8+
between document records and list entries, preventing orphaned list entries.
9+
"""
10+
11+
import logging
12+
from typing import Dict, Any, Optional, List
13+
from boto3.dynamodb.conditions import Key
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def calculate_shard(timestamp: str) -> tuple[str, str]:
19+
"""
20+
Calculate shard information from timestamp.
21+
22+
Args:
23+
timestamp: ISO timestamp string (e.g., "2025-09-10T12:03:27.256164+00:00")
24+
25+
Returns:
26+
tuple: (date_part, shard_str) where shard_str is 2-digit padded
27+
28+
Raises:
29+
ValueError: If timestamp format is invalid
30+
"""
31+
if not timestamp or not isinstance(timestamp, str):
32+
raise ValueError(f"Invalid timestamp: must be a non-empty string, got {type(timestamp)}")
33+
34+
if 'T' not in timestamp:
35+
raise ValueError(f"Invalid timestamp format: missing 'T' separator, got {timestamp}")
36+
37+
try:
38+
date_part = timestamp.split('T')[0] # e.g., 2025-09-10
39+
time_part = timestamp.split('T')[1]
40+
41+
if ':' not in time_part:
42+
raise ValueError(f"Invalid time format: missing ':' separator, got {time_part}")
43+
44+
hour_part = int(time_part.split(':')[0]) # e.g., 12
45+
46+
# Validate hour range
47+
if not 0 <= hour_part <= 23:
48+
raise ValueError(f"Invalid hour: must be 0-23, got {hour_part}")
49+
50+
# Calculate shard (6 shards per day = 4 hours each)
51+
hours_in_shard = 24 / 6
52+
shard = int(hour_part / hours_in_shard)
53+
shard_str = f"{shard:02d}" # Format with leading zero
54+
55+
return date_part, shard_str
56+
57+
except (ValueError, IndexError) as e:
58+
if "Invalid" in str(e):
59+
raise # Re-raise our custom validation errors
60+
raise ValueError(f"Invalid timestamp format: {timestamp}, error: {str(e)}")
61+
62+
63+
def try_exact_list_deletion(tracking_table, list_pk: str, list_sk: str, object_key: str) -> bool:
64+
"""
65+
Attempt to delete list entry with exact timestamp match.
66+
67+
Args:
68+
tracking_table: DynamoDB table resource
69+
list_pk: Primary key of list entry
70+
list_sk: Sort key of list entry
71+
object_key: Document object key for logging
72+
73+
Returns:
74+
bool: True if successful, False if not found
75+
"""
76+
try:
77+
logger.info(f"Trying exact deletion - PK={list_pk}, SK={list_sk}")
78+
result = tracking_table.delete_item(
79+
Key={
80+
'PK': list_pk,
81+
'SK': list_sk
82+
},
83+
ReturnValues='ALL_OLD'
84+
)
85+
86+
if 'Attributes' in result:
87+
logger.info(f"Successfully deleted list entry with exact match: {result['Attributes']}")
88+
return True
89+
else:
90+
logger.warning(f"No list entry found with exact match: PK={list_pk}, SK={list_sk}")
91+
return False
92+
except Exception as e:
93+
logger.error(f"Error in exact list deletion for {object_key}: {str(e)}")
94+
return False
95+
96+
97+
def query_shard_for_object_key(tracking_table, list_pk: str, object_key: str) -> List[Dict[str, Any]]:
98+
"""
99+
Query a shard for any list entries containing the specified object key.
100+
Uses DynamoDB filter expressions for efficiency.
101+
102+
Args:
103+
tracking_table: DynamoDB table resource
104+
list_pk: Primary key of the shard
105+
object_key: Document object key to search for
106+
107+
Returns:
108+
List[Dict]: List of matching DynamoDB items
109+
"""
110+
try:
111+
logger.info(f"Querying shard {list_pk} for entries containing ObjectKey: {object_key}")
112+
113+
# Use filter expression to efficiently query only matching items
114+
# This prevents loading all shard items and reduces pagination issues
115+
response = tracking_table.query(
116+
KeyConditionExpression=Key('PK').eq(list_pk),
117+
FilterExpression="ObjectKey = :obj_key OR contains(SK, :obj_id)",
118+
ExpressionAttributeValues={
119+
':obj_key': object_key,
120+
':obj_id': f"#id#{object_key}"
121+
}
122+
)
123+
124+
matching_items = []
125+
if 'Items' in response:
126+
for item in response['Items']:
127+
matching_items.append(item)
128+
logger.info(f"Found matching list entry: PK={item.get('PK')}, SK={item.get('SK')}")
129+
130+
# Handle pagination if needed (though filter should make this rare)
131+
while 'LastEvaluatedKey' in response:
132+
logger.info(f"Handling pagination for shard query, continuing from: {response['LastEvaluatedKey']}")
133+
response = tracking_table.query(
134+
KeyConditionExpression=Key('PK').eq(list_pk),
135+
FilterExpression="ObjectKey = :obj_key OR contains(SK, :obj_id)",
136+
ExpressionAttributeValues={
137+
':obj_key': object_key,
138+
':obj_id': f"#id#{object_key}"
139+
},
140+
ExclusiveStartKey=response['LastEvaluatedKey']
141+
)
142+
143+
if 'Items' in response:
144+
for item in response['Items']:
145+
matching_items.append(item)
146+
logger.info(f"Found matching list entry (paginated): PK={item.get('PK')}, SK={item.get('SK')}")
147+
148+
logger.info(f"Found {len(matching_items)} matching entries in shard {list_pk}")
149+
return matching_items
150+
151+
except Exception as e:
152+
logger.error(f"Error querying shard for object key {object_key}: {str(e)}")
153+
return []
154+
155+
156+
def get_adjacent_shards(date_part: str, shard_str: str) -> List[str]:
157+
"""
158+
Get adjacent shard identifiers for edge case handling.
159+
160+
Args:
161+
date_part: Date string (e.g., "2025-09-10")
162+
shard_str: Shard string (e.g., "03")
163+
164+
Returns:
165+
List[str]: List of adjacent shard PKs
166+
"""
167+
try:
168+
current_shard = int(shard_str)
169+
adjacent_shards = []
170+
171+
# Previous shard
172+
if current_shard > 0:
173+
prev_shard = f"{current_shard - 1:02d}"
174+
adjacent_shards.append(f"list#{date_part}#s#{prev_shard}")
175+
176+
# Next shard
177+
if current_shard < 5: # Max shard is 05
178+
next_shard = f"{current_shard + 1:02d}"
179+
adjacent_shards.append(f"list#{date_part}#s#{next_shard}")
180+
181+
logger.info(f"Adjacent shards for {date_part}#s#{shard_str}: {adjacent_shards}")
182+
return adjacent_shards
183+
184+
except Exception as e:
185+
logger.error(f"Error calculating adjacent shards: {str(e)}")
186+
return []
187+
188+
189+
def delete_list_entries_robust(tracking_table, object_key: str, document_metadata: Optional[Dict[str, Any]]) -> bool:
190+
"""
191+
Robustly delete list entries for the given object key.
192+
Uses multiple strategies: exact match, shard query, adjacent shard search.
193+
194+
Args:
195+
tracking_table: DynamoDB table resource
196+
object_key: Document object key
197+
document_metadata: Optional document metadata containing timestamp info
198+
199+
Returns:
200+
bool: True if any entries were deleted
201+
"""
202+
deleted_any = False
203+
204+
# Strategy 1: Try exact timestamp match if we have document metadata
205+
if document_metadata:
206+
event_time = None
207+
if 'QueuedTime' in document_metadata and document_metadata['QueuedTime']:
208+
event_time = document_metadata['QueuedTime']
209+
logger.info(f"Using QueuedTime for exact match: {event_time}")
210+
elif 'InitialEventTime' in document_metadata and document_metadata['InitialEventTime']:
211+
event_time = document_metadata['InitialEventTime']
212+
logger.info(f"Using InitialEventTime for exact match: {event_time}")
213+
214+
if event_time:
215+
try:
216+
date_part, shard_str = calculate_shard(event_time)
217+
list_pk = f"list#{date_part}#s#{shard_str}"
218+
list_sk = f"ts#{event_time}#id#{object_key}"
219+
220+
if try_exact_list_deletion(tracking_table, list_pk, list_sk, object_key):
221+
deleted_any = True
222+
logger.info("Successfully deleted list entry with exact timestamp match")
223+
return deleted_any # Success, no need for fallback
224+
else:
225+
logger.warning("Exact timestamp match failed, proceeding with fallback strategies")
226+
except Exception as e:
227+
logger.error(f"Error in exact timestamp deletion: {str(e)}")
228+
229+
# Strategy 2: Query calculated shard for any entries with matching ObjectKey
230+
if document_metadata:
231+
try:
232+
# Use the same timestamp for shard calculation
233+
event_time = document_metadata.get('QueuedTime') or document_metadata.get('InitialEventTime')
234+
if event_time:
235+
date_part, shard_str = calculate_shard(event_time)
236+
list_pk = f"list#{date_part}#s#{shard_str}"
237+
238+
matching_entries = query_shard_for_object_key(tracking_table, list_pk, object_key)
239+
for entry in matching_entries:
240+
try:
241+
result = tracking_table.delete_item(
242+
Key={
243+
'PK': entry['PK'],
244+
'SK': entry['SK']
245+
},
246+
ReturnValues='ALL_OLD'
247+
)
248+
if 'Attributes' in result:
249+
logger.info(f"Successfully deleted list entry via shard query: {entry['SK']}")
250+
deleted_any = True
251+
except Exception as e:
252+
logger.error(f"Error deleting found list entry: {str(e)}")
253+
254+
# Strategy 3: Check adjacent shards for edge cases
255+
if not deleted_any:
256+
logger.info("No entries found in calculated shard, checking adjacent shards")
257+
adjacent_shards = get_adjacent_shards(date_part, shard_str)
258+
259+
for adj_pk in adjacent_shards:
260+
matching_entries = query_shard_for_object_key(tracking_table, adj_pk, object_key)
261+
for entry in matching_entries:
262+
try:
263+
result = tracking_table.delete_item(
264+
Key={
265+
'PK': entry['PK'],
266+
'SK': entry['SK']
267+
},
268+
ReturnValues='ALL_OLD'
269+
)
270+
if 'Attributes' in result:
271+
logger.info(f"Successfully deleted list entry via adjacent shard: {entry['SK']}")
272+
deleted_any = True
273+
except Exception as e:
274+
logger.error(f"Error deleting adjacent shard entry: {str(e)}")
275+
except Exception as e:
276+
logger.error(f"Error in shard query strategies: {str(e)}")
277+
278+
# Strategy 4: Last resort - search recent dates if no metadata available
279+
if not deleted_any and not document_metadata:
280+
logger.warning(f"No document metadata available for {object_key}, attempting recent date search")
281+
# This could be expanded to search recent dates/shards as a last resort
282+
# For now, just log the limitation
283+
logger.warning("Cannot delete list entries without any timestamp information")
284+
285+
if deleted_any:
286+
logger.info(f"Successfully deleted list entries for {object_key}")
287+
else:
288+
logger.warning(f"No list entries found or deleted for {object_key}")
289+
290+
return deleted_any

0 commit comments

Comments
 (0)