diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f8fb3f1..3c5697d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Functionality to generate coverage information using Sambamba depth [#752](https://github.com/nf-core/raredisease/pull/752) - Parameter to pass a file containing new sample ids to use with multiqc [#764](https://github.com/nf-core/raredisease/pull/764) - A helper function channelFromPath to create channels in a readable fashion in main.nf [#766](https://github.com/nf-core/raredisease/pull/766) +- A helper function channelFromPathWithMeta to create channels in a readable fashion in main.nf [#767](https://github.com/nf-core/raredisease/pull/767) +- A helper function channelFromSamplesheet to create channels in a readable fashion in main.nf [#767](https://github.com/nf-core/raredisease/pull/767) ### `Changed` diff --git a/main.nf b/main.nf index c33b74ea..f50f626f 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,9 @@ include { samplesheetToList } from 'plugin/nf-schema' include { CREATE_HGNCIDS_FILE } from './modules/local/create_hgncids_file' include { CREATE_PEDIGREE_FILE } from './modules/local/create_pedigree_file' +include { channelFromPath } from './subworkflows/local/utils_nfcore_raredisease_pipeline' +include { channelFromPathWithMeta } from './subworkflows/local/utils_nfcore_raredisease_pipeline' +include { channelFromSamplesheet } from './subworkflows/local/utils_nfcore_raredisease_pipeline' include { parseSkipList } from './subworkflows/local/utils_nfcore_raredisease_pipeline' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_raredisease_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_raredisease_pipeline' @@ -46,37 +49,70 @@ workflow NFCORE_RAREDISEASE { val_bwamem2 val_bwameme val_cadd_resources + val_call_interval + val_concatenate_snv_calls + val_extract_alignments val_fai val_fasta + val_gcnvcaller_model + val_genome val_gens_gnomad_pos val_gens_interval_list + val_gens_pon_female + val_gens_pon_male val_gnomad_af val_gnomad_af_idx val_intervals_wgs val_intervals_y val_known_dbsnp val_known_dbsnp_tbi + val_mbuffer_mem + val_ml_model + val_mobile_element_references val_mobile_element_svdb_annotations val_mt_aligner val_mt_fasta + val_mt_subsample_approach + val_mt_subsample_rd + val_mt_subsample_seed val_multiqc_samples + val_ngsbits_samplegender_method + val_par_bed + val_platform + val_ploidy_model val_readcount_intervals val_reduced_penetrance val_rtg_truthvcfs val_run_mt_for_wes val_run_rtgvcfeval + val_sambamba_regions + val_sample_id_map + val_samtools_sort_threads + val_save_mapped_as_cram val_score_config_mt val_score_config_snv val_score_config_sv val_sdf + val_sentieon_dnascope_pcr_indel_model val_sequence_dictionary + val_skip_tools + val_skip_subworkflows val_svdb_query_bedpedbs val_svdb_query_dbs val_target_bed + val_variant_caller + val_variant_catalog + val_variant_consequences_snv + val_variant_consequences_sv val_vcf2cytosure_blacklist val_vcfanno_extra_resources val_vcfanno_lua + val_vcfanno_resources val_vcfanno_toml + val_vep_cache_version + val_vep_filters + val_vep_filters_scout_fmt + val_vep_plugin_files val_verifybamid_svd_bed val_verifybamid_svd_mu val_verifybamid_svd_ud @@ -168,58 +204,50 @@ workflow NFCORE_RAREDISEASE { ch_svd_mu = channelFromPath(val_verifybamid_svd_mu) ch_svd_ud = channelFromPath(val_verifybamid_svd_ud) + // Using channelFromPathWithMeta helper (with simpleName). If filepath is null, returns, [[:],[]] + ch_call_interval = channelFromPathWithMeta(val_call_interval, true) + ch_ml_model = channelFromPathWithMeta(val_ml_model, true) + ch_variant_catalog = channelFromPathWithMeta(val_variant_catalog, true) + ch_variant_consequences_snv = channelFromPathWithMeta(val_variant_consequences_snv, true) + ch_variant_consequences_sv = channelFromPathWithMeta(val_variant_consequences_sv, true) + + // Using channelFromPathWithMeta helper (with simpleName). If filepath is null, returns, empty channel + ch_gens_pon_female = channelFromPathWithMeta(val_gens_pon_female) + ch_gens_pon_male = channelFromPathWithMeta(val_gens_pon_male) + ch_ploidy_model = channelFromPathWithMeta(val_ploidy_model) + + // Using channelFromPathWithMeta helper. Returns either an empty channel or [[:],[]] or a channel with custom ID. + ch_par_bed = channelFromPathWithMeta(val_par_bed, true, "par_bed") + ch_sambamba_bed = channelFromPathWithMeta(val_sambamba_regions, false, 'sambamba') + ch_vep_filters_std_fmt = channelFromPathWithMeta(val_vep_filters, false, 'standard') + ch_vep_filters_scout_fmt = channelFromPathWithMeta(val_vep_filters_scout_fmt, false, 'scout') + + // Using channelFromSamplesheet helper. Returns either an empty channel or validated channel. + ch_me_references = channelFromSamplesheet(val_mobile_element_references, "${projectDir}/assets/mobile_element_references_schema.json", false) + ch_me_svdb_resources = channelFromSamplesheet(val_mobile_element_svdb_annotations, "${projectDir}/assets/svdb_query_vcf_schema.json") + ch_sample_id_map = channelFromSamplesheet(val_sample_id_map, "${projectDir}/assets/sample_id_map.json") + ch_svdb_bedpedbs = channelFromSamplesheet(val_svdb_query_bedpedbs, "${projectDir}/assets/svdb_query_bedpe_schema.json") + ch_svdb_dbs = channelFromSamplesheet(val_svdb_query_dbs, "${projectDir}/assets/svdb_query_vcf_schema.json") + ch_cadd_header = channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() - ch_call_interval = params.call_interval ? channel.fromPath(params.call_interval).map {it -> [[id:it.simpleName], it]}.collect() - : channel.value([[:],[]]) ch_foundin_header = channel.fromPath("$projectDir/assets/foundin.hdr", checkIfExists: true).collect() - ch_gcnvcaller_model = params.gcnvcaller_model ? channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) - .map { row -> - return [[id:file(row.models).simpleName], row.models] - } - : channel.empty() - ch_gens_pon_female = params.gens_pon_female ? channel.fromPath(params.gens_pon_female).map { it -> [ [id:it.simpleName], it ] }.collect() - : channel.empty() - ch_gens_pon_male = params.gens_pon_male ? channel.fromPath(params.gens_pon_male).map { it -> [ [id:it.simpleName], it ] }.collect() - : channel.empty() - ch_me_references = params.mobile_element_references ? channel.fromList(samplesheetToList(params.mobile_element_references, "${projectDir}/assets/mobile_element_references_schema.json")) - : channel.empty() - ch_me_svdb_resources = val_mobile_element_svdb_annotations ? channel.fromList(samplesheetToList(val_mobile_element_svdb_annotations, "assets/svdb_query_vcf_schema.json")).collect() - : channel.empty() - ch_ml_model = params.variant_caller.equals("sentieon") ? channel.fromPath(params.ml_model).map {it -> [[id:it.simpleName], it]}.collect() - : channel.value([[:],[]]) - ch_ngsbits_method = channel.value(params.ngsbits_samplegender_method) - ch_par_bed = params.par_bed ? channel.fromPath(params.par_bed).map{ it -> [[id:'par_bed'], it] }.collect() - : channel.value([[],[]]) - ch_sentieon_pcr_indel_model = channel.value(params.sentieon_dnascope_pcr_indel_model) - ch_ploidy_model = params.ploidy_model ? channel.fromPath(params.ploidy_model).map{ it -> [[id:it.simpleName], it] }.collect() - : channel.empty() - ch_sambamba_bed = params.sambamba_regions ? channel.fromPath(params.sambamba_regions).map{ it -> [[id:'sambamba'], it] }.collect() - : channel.empty() - ch_sample_id_map = params.sample_id_map ? channel.fromList(samplesheetToList(params.sample_id_map, "${projectDir}/assets/sample_id_map.json")) - : channel.empty() - ch_svdb_bedpedbs = params.svdb_query_bedpedbs ? channel.fromList(samplesheetToList(params.svdb_query_bedpedbs, "assets/svdb_query_bedpe_schema.json")).collect() - : channel.empty() - ch_svdb_dbs = params.svdb_query_dbs ? channel.fromList(samplesheetToList(params.svdb_query_dbs, "assets/svdb_query_vcf_schema.json")).collect() - : channel.empty() - ch_variant_catalog = params.variant_catalog ? channel.fromPath(params.variant_catalog).map { it -> [[id:it.simpleName],it]}.collect() - : channel.value([[],[]]) - ch_variant_consequences_snv = params.variant_consequences_snv ? channel.fromPath(params.variant_consequences_snv).map { it -> [[id:it.simpleName],it]}.collect() - : channel.value([[],[]]) - ch_variant_consequences_sv = params.variant_consequences_sv ? channel.fromPath(params.variant_consequences_sv).map { it -> [[id:it.simpleName],it]}.collect() - : channel.value([[],[]]) - ch_vcfanno_resources = params.vcfanno_resources ? channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() - : channel.value([]) - ch_vep_filters_std_fmt = params.vep_filters ? channel.fromPath(params.vep_filters).map { it -> [[id:'standard'],it]}.collect() - : channel.empty() - ch_vep_filters_scout_fmt = params.vep_filters_scout_fmt ? channel.fromPath(params.vep_filters_scout_fmt).map { it -> [[id:'scout'],it]}.collect() - : channel.empty() + ch_ngsbits_method = channel.value(val_ngsbits_samplegender_method) + ch_sentieon_pcr_indel_model = channel.value(val_sentieon_dnascope_pcr_indel_model) + ch_vcfanno_resources = val_vcfanno_resources ? channel.fromPath(val_vcfanno_resources).splitText().map{it -> it.trim()}.collect() + : channel.value([]) + ch_gcnvcaller_model = val_gcnvcaller_model ? channel.fromPath(val_gcnvcaller_model) + .splitCsv ( header:true ) + .map { row -> + return [[id:file(row.models).simpleName], row.models] + } + : channel.empty() // // Read and store paths in the vep_plugin_files file // ch_vep_extra_files = channel.empty() - if (params.vep_plugin_files) { - channel.fromPath(params.vep_plugin_files) + if (val_vep_plugin_files) { + channel.fromPath(val_vep_plugin_files) .collect() .splitCsv ( header:true ) .map { row -> @@ -252,42 +280,42 @@ workflow NFCORE_RAREDISEASE { ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions) // Tools - skip_eklipse = parseSkipList(params.skip_tools, 'eklipse') - skip_fastp = parseSkipList(params.skip_tools, 'fastp') - skip_fastqc = parseSkipList(params.skip_tools, 'fastqc') - skip_gens = parseSkipList(params.skip_tools, 'gens') - skip_germlinecnvcaller = parseSkipList(params.skip_tools, 'germlinecnvcaller') - skip_haplogrep3 = parseSkipList(params.skip_tools, 'haplogrep3') - skip_ngsbits = parseSkipList(params.skip_tools, 'ngsbits') - skip_peddy = parseSkipList(params.skip_tools, 'peddy') - skip_qualimap = parseSkipList(params.skip_tools, 'qualimap') - skip_smncopynumbercaller = parseSkipList(params.skip_tools, 'smncopynumbercaller') - skip_vcf2cytosure = parseSkipList(params.skip_tools, 'vcf2cytosure') + skip_eklipse = parseSkipList(val_skip_tools, 'eklipse') + skip_fastp = parseSkipList(val_skip_tools, 'fastp') + skip_fastqc = parseSkipList(val_skip_tools, 'fastqc') + skip_gens = parseSkipList(val_skip_tools, 'gens') + skip_germlinecnvcaller = parseSkipList(val_skip_tools, 'germlinecnvcaller') + skip_haplogrep3 = parseSkipList(val_skip_tools, 'haplogrep3') + skip_ngsbits = parseSkipList(val_skip_tools, 'ngsbits') + skip_peddy = parseSkipList(val_skip_tools, 'peddy') + skip_qualimap = parseSkipList(val_skip_tools, 'qualimap') + skip_smncopynumbercaller = parseSkipList(val_skip_tools, 'smncopynumbercaller') + skip_vcf2cytosure = parseSkipList(val_skip_tools, 'vcf2cytosure') // Subworkflows - skip_me_annotation = parseSkipList(params.skip_subworkflows, 'me_annotation') - skip_me_calling = parseSkipList(params.skip_subworkflows, 'me_calling') - skip_mt_annotation = parseSkipList(params.skip_subworkflows, 'mt_annotation') - skip_mt_subsample = parseSkipList(params.skip_subworkflows, 'mt_subsample') - skip_repeat_annotation = parseSkipList(params.skip_subworkflows, 'repeat_annotation') - skip_repeat_calling = parseSkipList(params.skip_subworkflows, 'repeat_calling') - skip_snv_annotation = parseSkipList(params.skip_subworkflows, 'snv_annotation') - skip_snv_calling = parseSkipList(params.skip_subworkflows, 'snv_calling') - skip_sv_annotation = parseSkipList(params.skip_subworkflows, 'sv_annotation') - skip_sv_calling = parseSkipList(params.skip_subworkflows, 'sv_calling') - skip_generate_clinical_set = parseSkipList(params.skip_subworkflows, 'generate_clinical_set') + skip_me_annotation = parseSkipList(val_skip_subworkflows, 'me_annotation') + skip_me_calling = parseSkipList(val_skip_subworkflows, 'me_calling') + skip_mt_annotation = parseSkipList(val_skip_subworkflows, 'mt_annotation') + skip_mt_subsample = parseSkipList(val_skip_subworkflows, 'mt_subsample') + skip_repeat_annotation = parseSkipList(val_skip_subworkflows, 'repeat_annotation') + skip_repeat_calling = parseSkipList(val_skip_subworkflows, 'repeat_calling') + skip_snv_annotation = parseSkipList(val_skip_subworkflows, 'snv_annotation') + skip_snv_calling = parseSkipList(val_skip_subworkflows, 'snv_calling') + skip_sv_annotation = parseSkipList(val_skip_subworkflows, 'sv_annotation') + skip_sv_calling = parseSkipList(val_skip_subworkflows, 'sv_calling') + skip_generate_clinical_set = parseSkipList(val_skip_subworkflows, 'generate_clinical_set') // // SV caller priority // if (skip_germlinecnvcaller) { - if (params.analysis_type.equals("wgs")) { + if (val_analysis_type.equals("wgs")) { ch_svcaller_priority = channel.value(["tiddit", "manta", "cnvnator"]) } else { ch_svcaller_priority = channel.value([]) } } else { - if (params.analysis_type.equals("wgs")) { + if (val_analysis_type.equals("wgs")) { ch_svcaller_priority = channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) } else { ch_svcaller_priority = channel.value(["manta", "gcnvcaller"]) @@ -408,28 +436,28 @@ workflow NFCORE_RAREDISEASE { skip_qualimap, skip_smncopynumbercaller, skip_vcf2cytosure, - params.aligner, - params.analysis_type, - params.cadd_resources, - params.concatenate_snv_calls, - params.extract_alignments, - params.genome, - params.mbuffer_mem, - params.mt_aligner, - params.mt_subsample_approach, - params.mt_subsample_rd, - params.mt_subsample_seed, - params.platform, - params.run_mt_for_wes, - params.run_rtgvcfeval, - params.sample_id_map, - params.samtools_sort_threads, - params.save_mapped_as_cram, - params.svdb_query_bedpedbs, - params.svdb_query_dbs, - params.target_bed, - params.variant_caller, - params.vep_cache_version + val_aligner, + val_analysis_type, + val_cadd_resources, + val_concatenate_snv_calls, + val_extract_alignments, + val_genome, + val_mbuffer_mem, + val_mt_aligner, + val_mt_subsample_approach, + val_mt_subsample_rd, + val_mt_subsample_seed, + val_platform, + val_run_mt_for_wes, + val_run_rtgvcfeval, + val_sample_id_map, + val_samtools_sort_threads, + val_save_mapped_as_cram, + val_svdb_query_bedpedbs, + val_svdb_query_dbs, + val_target_bed, + val_variant_caller, + val_vep_cache_version ) emit: multiqc_report = RAREDISEASE.out.multiqc_report // channel: /path/to/multiqc_report.html @@ -470,37 +498,70 @@ workflow { params.bwamem2, params.bwameme, params.cadd_resources, + params.call_interval, + params.concatenate_snv_calls, + params.extract_alignments, params.fai, params.fasta, + params.gcnvcaller_model, + params.genome, params.gens_gnomad_pos, params.gens_interval_list, + params.gens_pon_female, + params.gens_pon_male, params.gnomad_af, params.gnomad_af_idx, params.intervals_wgs, params.intervals_y, params.known_dbsnp, params.known_dbsnp_tbi, + params.mbuffer_mem, + params.ml_model, + params.mobile_element_references, params.mobile_element_svdb_annotations, params.mt_aligner, params.mt_fasta, + params.mt_subsample_approach, + params.mt_subsample_rd, + params.mt_subsample_seed, params.multiqc_samples, + params.ngsbits_samplegender_method, + params.par_bed, + params.platform, + params.ploidy_model, params.readcount_intervals, params.reduced_penetrance, params.rtg_truthvcfs, params.run_mt_for_wes, params.run_rtgvcfeval, + params.sambamba_regions, + params.sample_id_map, + params.samtools_sort_threads, + params.save_mapped_as_cram, params.score_config_mt, params.score_config_snv, params.score_config_sv, params.sdf, + params.sentieon_dnascope_pcr_indel_model, params.sequence_dictionary, + params.skip_tools, + params.skip_subworkflows, params.svdb_query_bedpedbs, params.svdb_query_dbs, params.target_bed, + params.variant_caller, + params.variant_catalog, + params.variant_consequences_snv, + params.variant_consequences_sv, params.vcf2cytosure_blacklist, params.vcfanno_extra_resources, params.vcfanno_lua, + params.vcfanno_resources, params.vcfanno_toml, + params.vep_cache_version, + params.vep_filters, + params.vep_filters_scout_fmt, + params.vep_plugin_files, params.verifybamid_svd_bed, params.verifybamid_svd_mu, params.verifybamid_svd_ud, @@ -520,25 +581,6 @@ workflow { ) } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - HELPER FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -/** - * Creates a channel from a file path if provided, otherwise returns a fallback channel - * @param filePath The path to the file (can be null) - * @param valueFallback If true, returns channel.value([]) when filePath is null; otherwise returns channel.empty() (default: false) - * @return Channel with collected file path or fallback channel - */ -def channelFromPath(filePath, valueFallback = false) { - if (!filePath) { - return valueFallback ? channel.value([]) : channel.empty() - } - return channel.fromPath(filePath).collect() -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf index e711d5c5..a933ee92 100644 --- a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -217,6 +217,52 @@ workflow PIPELINE_COMPLETION { FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +/** + * Creates a channel from a file path if provided, otherwise returns a fallback channel + * @param filePath The path to the file (can be null) + * @param valueFallback If true, returns channel.value([]) when filePath is null; otherwise returns channel.empty() (default: false) + * @return Channel with collected file path or fallback channel + */ +def channelFromPath(filePath, valueFallback = false) { + if (!filePath) { + return valueFallback ? channel.value([]) : channel.empty() + } + return channel.fromPath(filePath).collect() +} + +/** + * Creates a channel from a file path, maps it to [id, file] format, and collects + * @param filePath The path to the file (can be null) + * @param doubleEmpty If true, returns channel.value([[:], []]) when filePath is null; otherwise returns channel.empty() (default: false) + * @param customId The custom ID to be used in meta.id (default: null) + * @return Channel with [[id:name], file] format and collected, or fallback channel + */ +def channelFromPathWithMeta(filePath, doubleEmpty = false, customId = null) { + if (!filePath) { + return doubleEmpty ? channel.value([[:], []]) : channel.empty() + } + return channel.fromPath(filePath).map { file -> + def meta_id = customId ?: file.simpleName + return [[id: meta_id], file] + }.collect() +} + +/** + * Creates a channel from a samplesheet file using samplesheetToList, or returns a fallback channel + * @param samplesheetPath The path to the samplesheet file (can be null) + * @param schemaPath The path to the JSON schema file for validation + * @param collect If true, calls .collect() on the channel (default: true) + * @return Channel from samplesheet list or channel.empty() + */ +def channelFromSamplesheet(samplesheetPath, schemaPath, collect = true) { + if (!samplesheetPath) { + return channel.empty() + } + def ch_out = channel.fromList(samplesheetToList(samplesheetPath, schemaPath)) + return collect ? ch_out.collect() : ch_out +} + def generateReadGroupLine(file, meta, params) { return "\'@RG\\tID:" + file.simpleName + "_" + meta.lane + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'" }