@@ -222,6 +222,46 @@ class WorkYamlGenerateRequest(BaseModel):
222222 estimated_coverage : float = 30.0
223223
224224
225+ # ========== Portal File Registration Models ==========
226+
227+
228+ class PortalFileAutoRegisterRequest (BaseModel ):
229+ """Request model for auto-registering discovered files from the portal.
230+
231+ Notes:
232+ - `customer_id` is intentionally omitted; the server derives it from the
233+ authenticated portal session to prevent cross-customer registration.
234+ - Either `bucket_id` (preferred) or `bucket_name` must be provided.
235+ """
236+
237+ bucket_id : Optional [str ] = Field (None , description = "Linked bucket ID" )
238+ bucket_name : Optional [str ] = Field (None , description = "S3 bucket name (fallback if bucket_id not provided)" )
239+
240+ prefix : str = Field ("" , description = "Prefix to scan" )
241+ file_formats : Optional [List [str ]] = Field (None , description = "Filter by formats (e.g. fastq,bam,vcf)" )
242+ selected_keys : Optional [List [str ]] = Field (
243+ None ,
244+ description = "Optional list of S3 object keys to register (subset of discovered files)" ,
245+ )
246+ max_files : int = Field (1000 , ge = 1 , le = 10000 , description = "Maximum files to scan in the bucket" )
247+
248+ biosample_id : str = Field (..., min_length = 1 , description = "Biosample ID to apply to all registered files" )
249+ subject_id : str = Field (..., min_length = 1 , description = "Subject ID to apply to all registered files" )
250+ sequencing_platform : str = Field (
251+ "NOVASEQX" ,
252+ description = "Sequencing platform (prefer SequencingPlatform enum values like NOVASEQX, NOVASEQ6000)" ,
253+ )
254+
255+
256+ class PortalFileAutoRegisterResponse (BaseModel ):
257+ """Response model for portal auto-registration."""
258+
259+ registered_count : int
260+ skipped_count : int
261+ errors : List [str ]
262+ missing_selected_keys : Optional [List [str ]] = None
263+
264+
225265def create_app (
226266 state_db : WorksetStateDB ,
227267 scheduler : Optional [WorksetScheduler ] = None ,
@@ -264,8 +304,11 @@ def create_app(
264304 region = os .getenv ("AWS_DEFAULT_REGION" , "us-west-2" )
265305 profile = os .getenv ("AWS_PROFILE" , None )
266306
307+
267308 # Initialize LinkedBucketManager early so portal routes can use it
268309 linked_bucket_manager = None
310+ # BucketFileDiscovery is optional; keep a stable binding for portal routes
311+ bucket_file_discovery = None
269312 if FILE_MANAGEMENT_AVAILABLE and LinkedBucketManager :
270313 try :
271314 linked_bucket_manager = LinkedBucketManager (
@@ -2635,21 +2678,17 @@ async def portal_files_buckets(request: Request):
26352678 ),
26362679 )
26372680
2681+
26382682 @app .get ("/portal/files/register" , response_class = HTMLResponse , tags = ["portal" ])
26392683 async def portal_files_register (request : Request ):
26402684 """File registration page."""
26412685 auth_redirect = require_portal_auth (request )
26422686 if auth_redirect :
26432687 return auth_redirect
26442688
2645- customer = None
2689+ customer , _customer_config = get_customer_for_session ( request )
26462690 buckets = []
26472691
2648- if customer_manager :
2649- customers = customer_manager .list_customers ()
2650- if customers :
2651- customer = _convert_customer_for_template (customers [0 ])
2652-
26532692 if FILE_MANAGEMENT_AVAILABLE and linked_bucket_manager :
26542693 try :
26552694 customer_id = customer .customer_id if customer else None
@@ -2664,7 +2703,9 @@ async def portal_files_register(request: Request):
26642703 "is_validated" : b .is_validated ,
26652704 "can_read" : b .can_read ,
26662705 "can_write" : b .can_write ,
2706+ "can_list" : b .can_list ,
26672707 "read_only" : b .read_only ,
2708+ "prefix_restriction" : b .prefix_restriction ,
26682709 }
26692710 for b in linked_buckets
26702711 ]
@@ -2682,6 +2723,108 @@ async def portal_files_register(request: Request):
26822723 ),
26832724 )
26842725
2726+ @app .post (
2727+ "/portal/files/register" ,
2728+ response_model = PortalFileAutoRegisterResponse ,
2729+ tags = ["portal" ],
2730+ )
2731+ async def portal_files_register_submit (request : Request , payload : PortalFileAutoRegisterRequest ):
2732+ """Register selected discovered files from a linked bucket.
2733+
2734+ Used by the portal UI (auto-discover flow). `customer_id` is derived from
2735+ the authenticated session.
2736+ """
2737+
2738+ # For JSON endpoints, prefer explicit 401 over a redirect
2739+ user_email = request .session .get ("user_email" )
2740+ if not user_email :
2741+ raise HTTPException (status_code = 401 , detail = "Not authenticated" )
2742+
2743+ if not (FILE_MANAGEMENT_AVAILABLE and file_registry and BucketFileDiscovery ):
2744+ raise HTTPException (status_code = 501 , detail = "File management is not configured" )
2745+ if not linked_bucket_manager :
2746+ raise HTTPException (status_code = 501 , detail = "LinkedBucketManager is not configured" )
2747+ if not customer_manager :
2748+ raise HTTPException (status_code = 501 , detail = "Customer manager is not configured" )
2749+
2750+ customer_config = customer_manager .get_customer_by_email (user_email )
2751+ if not customer_config :
2752+ raise HTTPException (status_code = 403 , detail = "Customer not found for current session" )
2753+ customer_id = customer_config .customer_id
2754+
2755+ # Resolve bucket and enforce that it belongs to the session customer
2756+ bucket = None
2757+ if payload .bucket_id :
2758+ bucket = linked_bucket_manager .get_bucket (payload .bucket_id )
2759+ if not bucket :
2760+ raise HTTPException (status_code = 404 , detail = "Linked bucket not found" )
2761+ if bucket .customer_id != customer_id :
2762+ raise HTTPException (status_code = 403 , detail = "Bucket does not belong to current customer" )
2763+ elif payload .bucket_name :
2764+ # Fallback: ensure the bucket_name is among customer's linked buckets
2765+ linked_buckets = linked_bucket_manager .list_customer_buckets (customer_id )
2766+ for b in linked_buckets :
2767+ if b .bucket_name == payload .bucket_name :
2768+ bucket = b
2769+ break
2770+ if not bucket :
2771+ raise HTTPException (status_code = 404 , detail = "Bucket name is not linked to current customer" )
2772+ else :
2773+ raise HTTPException (status_code = 422 , detail = "Either bucket_id or bucket_name is required" )
2774+
2775+ bucket_name = bucket .bucket_name
2776+ effective_prefix = payload .prefix or ""
2777+ if bucket .prefix_restriction :
2778+ if not effective_prefix :
2779+ effective_prefix = bucket .prefix_restriction
2780+ elif not effective_prefix .startswith (bucket .prefix_restriction ):
2781+ raise HTTPException (
2782+ status_code = 400 ,
2783+ detail = "Prefix is outside of this bucket's allowed prefix restriction" ,
2784+ )
2785+
2786+ # Discover files and optionally filter to selected keys
2787+ bfd = BucketFileDiscovery (region = region , profile = profile )
2788+ discovered = bfd .discover_files (
2789+ bucket_name = bucket_name ,
2790+ prefix = effective_prefix ,
2791+ file_formats = payload .file_formats ,
2792+ max_files = payload .max_files ,
2793+ )
2794+
2795+ missing_selected = None
2796+ if payload .selected_keys is not None :
2797+ selected_set = set (payload .selected_keys )
2798+ discovered_key_set = {df .key for df in discovered }
2799+ missing_selected = sorted (selected_set - discovered_key_set )
2800+ discovered = [df for df in discovered if df .key in selected_set ]
2801+
2802+ if not discovered :
2803+ return PortalFileAutoRegisterResponse (
2804+ registered_count = 0 ,
2805+ skipped_count = 0 ,
2806+ errors = ["No matching files found to register" ],
2807+ missing_selected_keys = missing_selected ,
2808+ )
2809+
2810+ # Mark existing registrations (idempotent)
2811+ discovered = bfd .check_registration_status (discovered , file_registry , customer_id )
2812+ registered_count , skipped_count , errors = bfd .auto_register_files (
2813+ discovered ,
2814+ file_registry ,
2815+ customer_id ,
2816+ biosample_id = payload .biosample_id ,
2817+ subject_id = payload .subject_id ,
2818+ sequencing_platform = payload .sequencing_platform ,
2819+ )
2820+
2821+ return PortalFileAutoRegisterResponse (
2822+ registered_count = registered_count ,
2823+ skipped_count = skipped_count ,
2824+ errors = errors ,
2825+ missing_selected_keys = missing_selected ,
2826+ )
2827+
26852828 @app .get ("/portal/files/upload" , response_class = HTMLResponse , tags = ["portal" ])
26862829 async def portal_files_upload (request : Request ):
26872830 """File upload page."""
0 commit comments