Skip to content

Commit 02bb15c

Browse files
author
John Major
committed
X
1 parent db5f376 commit 02bb15c

File tree

1 file changed

+149
-6
lines changed

1 file changed

+149
-6
lines changed

daylib/workset_api.py

Lines changed: 149 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,46 @@ class WorkYamlGenerateRequest(BaseModel):
222222
estimated_coverage: float = 30.0
223223

224224

225+
# ========== Portal File Registration Models ==========
226+
227+
228+
class PortalFileAutoRegisterRequest(BaseModel):
229+
"""Request model for auto-registering discovered files from the portal.
230+
231+
Notes:
232+
- `customer_id` is intentionally omitted; the server derives it from the
233+
authenticated portal session to prevent cross-customer registration.
234+
- Either `bucket_id` (preferred) or `bucket_name` must be provided.
235+
"""
236+
237+
bucket_id: Optional[str] = Field(None, description="Linked bucket ID")
238+
bucket_name: Optional[str] = Field(None, description="S3 bucket name (fallback if bucket_id not provided)")
239+
240+
prefix: str = Field("", description="Prefix to scan")
241+
file_formats: Optional[List[str]] = Field(None, description="Filter by formats (e.g. fastq,bam,vcf)")
242+
selected_keys: Optional[List[str]] = Field(
243+
None,
244+
description="Optional list of S3 object keys to register (subset of discovered files)",
245+
)
246+
max_files: int = Field(1000, ge=1, le=10000, description="Maximum files to scan in the bucket")
247+
248+
biosample_id: str = Field(..., min_length=1, description="Biosample ID to apply to all registered files")
249+
subject_id: str = Field(..., min_length=1, description="Subject ID to apply to all registered files")
250+
sequencing_platform: str = Field(
251+
"NOVASEQX",
252+
description="Sequencing platform (prefer SequencingPlatform enum values like NOVASEQX, NOVASEQ6000)",
253+
)
254+
255+
256+
class PortalFileAutoRegisterResponse(BaseModel):
257+
"""Response model for portal auto-registration."""
258+
259+
registered_count: int
260+
skipped_count: int
261+
errors: List[str]
262+
missing_selected_keys: Optional[List[str]] = None
263+
264+
225265
def create_app(
226266
state_db: WorksetStateDB,
227267
scheduler: Optional[WorksetScheduler] = None,
@@ -264,8 +304,11 @@ def create_app(
264304
region = os.getenv("AWS_DEFAULT_REGION", "us-west-2")
265305
profile = os.getenv("AWS_PROFILE", None)
266306

307+
267308
# Initialize LinkedBucketManager early so portal routes can use it
268309
linked_bucket_manager = None
310+
# BucketFileDiscovery is optional; keep a stable binding for portal routes
311+
bucket_file_discovery = None
269312
if FILE_MANAGEMENT_AVAILABLE and LinkedBucketManager:
270313
try:
271314
linked_bucket_manager = LinkedBucketManager(
@@ -2635,21 +2678,17 @@ async def portal_files_buckets(request: Request):
26352678
),
26362679
)
26372680

2681+
26382682
@app.get("/portal/files/register", response_class=HTMLResponse, tags=["portal"])
26392683
async def portal_files_register(request: Request):
26402684
"""File registration page."""
26412685
auth_redirect = require_portal_auth(request)
26422686
if auth_redirect:
26432687
return auth_redirect
26442688

2645-
customer = None
2689+
customer, _customer_config = get_customer_for_session(request)
26462690
buckets = []
26472691

2648-
if customer_manager:
2649-
customers = customer_manager.list_customers()
2650-
if customers:
2651-
customer = _convert_customer_for_template(customers[0])
2652-
26532692
if FILE_MANAGEMENT_AVAILABLE and linked_bucket_manager:
26542693
try:
26552694
customer_id = customer.customer_id if customer else None
@@ -2664,7 +2703,9 @@ async def portal_files_register(request: Request):
26642703
"is_validated": b.is_validated,
26652704
"can_read": b.can_read,
26662705
"can_write": b.can_write,
2706+
"can_list": b.can_list,
26672707
"read_only": b.read_only,
2708+
"prefix_restriction": b.prefix_restriction,
26682709
}
26692710
for b in linked_buckets
26702711
]
@@ -2682,6 +2723,108 @@ async def portal_files_register(request: Request):
26822723
),
26832724
)
26842725

2726+
@app.post(
2727+
"/portal/files/register",
2728+
response_model=PortalFileAutoRegisterResponse,
2729+
tags=["portal"],
2730+
)
2731+
async def portal_files_register_submit(request: Request, payload: PortalFileAutoRegisterRequest):
2732+
"""Register selected discovered files from a linked bucket.
2733+
2734+
Used by the portal UI (auto-discover flow). `customer_id` is derived from
2735+
the authenticated session.
2736+
"""
2737+
2738+
# For JSON endpoints, prefer explicit 401 over a redirect
2739+
user_email = request.session.get("user_email")
2740+
if not user_email:
2741+
raise HTTPException(status_code=401, detail="Not authenticated")
2742+
2743+
if not (FILE_MANAGEMENT_AVAILABLE and file_registry and BucketFileDiscovery):
2744+
raise HTTPException(status_code=501, detail="File management is not configured")
2745+
if not linked_bucket_manager:
2746+
raise HTTPException(status_code=501, detail="LinkedBucketManager is not configured")
2747+
if not customer_manager:
2748+
raise HTTPException(status_code=501, detail="Customer manager is not configured")
2749+
2750+
customer_config = customer_manager.get_customer_by_email(user_email)
2751+
if not customer_config:
2752+
raise HTTPException(status_code=403, detail="Customer not found for current session")
2753+
customer_id = customer_config.customer_id
2754+
2755+
# Resolve bucket and enforce that it belongs to the session customer
2756+
bucket = None
2757+
if payload.bucket_id:
2758+
bucket = linked_bucket_manager.get_bucket(payload.bucket_id)
2759+
if not bucket:
2760+
raise HTTPException(status_code=404, detail="Linked bucket not found")
2761+
if bucket.customer_id != customer_id:
2762+
raise HTTPException(status_code=403, detail="Bucket does not belong to current customer")
2763+
elif payload.bucket_name:
2764+
# Fallback: ensure the bucket_name is among customer's linked buckets
2765+
linked_buckets = linked_bucket_manager.list_customer_buckets(customer_id)
2766+
for b in linked_buckets:
2767+
if b.bucket_name == payload.bucket_name:
2768+
bucket = b
2769+
break
2770+
if not bucket:
2771+
raise HTTPException(status_code=404, detail="Bucket name is not linked to current customer")
2772+
else:
2773+
raise HTTPException(status_code=422, detail="Either bucket_id or bucket_name is required")
2774+
2775+
bucket_name = bucket.bucket_name
2776+
effective_prefix = payload.prefix or ""
2777+
if bucket.prefix_restriction:
2778+
if not effective_prefix:
2779+
effective_prefix = bucket.prefix_restriction
2780+
elif not effective_prefix.startswith(bucket.prefix_restriction):
2781+
raise HTTPException(
2782+
status_code=400,
2783+
detail="Prefix is outside of this bucket's allowed prefix restriction",
2784+
)
2785+
2786+
# Discover files and optionally filter to selected keys
2787+
bfd = BucketFileDiscovery(region=region, profile=profile)
2788+
discovered = bfd.discover_files(
2789+
bucket_name=bucket_name,
2790+
prefix=effective_prefix,
2791+
file_formats=payload.file_formats,
2792+
max_files=payload.max_files,
2793+
)
2794+
2795+
missing_selected = None
2796+
if payload.selected_keys is not None:
2797+
selected_set = set(payload.selected_keys)
2798+
discovered_key_set = {df.key for df in discovered}
2799+
missing_selected = sorted(selected_set - discovered_key_set)
2800+
discovered = [df for df in discovered if df.key in selected_set]
2801+
2802+
if not discovered:
2803+
return PortalFileAutoRegisterResponse(
2804+
registered_count=0,
2805+
skipped_count=0,
2806+
errors=["No matching files found to register"],
2807+
missing_selected_keys=missing_selected,
2808+
)
2809+
2810+
# Mark existing registrations (idempotent)
2811+
discovered = bfd.check_registration_status(discovered, file_registry, customer_id)
2812+
registered_count, skipped_count, errors = bfd.auto_register_files(
2813+
discovered,
2814+
file_registry,
2815+
customer_id,
2816+
biosample_id=payload.biosample_id,
2817+
subject_id=payload.subject_id,
2818+
sequencing_platform=payload.sequencing_platform,
2819+
)
2820+
2821+
return PortalFileAutoRegisterResponse(
2822+
registered_count=registered_count,
2823+
skipped_count=skipped_count,
2824+
errors=errors,
2825+
missing_selected_keys=missing_selected,
2826+
)
2827+
26852828
@app.get("/portal/files/upload", response_class=HTMLResponse, tags=["portal"])
26862829
async def portal_files_upload(request: Request):
26872830
"""File upload page."""

0 commit comments

Comments
 (0)