diff --git a/bin/validate_metadata.py b/bin/validate_metadata.py index 079bfc27..d131011d 100755 --- a/bin/validate_metadata.py +++ b/bin/validate_metadata.py @@ -73,13 +73,10 @@ def metadata_validation_main(): sample_df = final_df.iloc[row].to_frame().transpose() sample_df = sample_df.set_index('sequence_name') sample_dfs[final_df.iloc[row]['sequence_name']] = sample_df - # now export the .xlsx file as a .tsv and csv + # now export the .xlsx file as a .tsv for sample in sample_dfs.keys(): tsv_file = f'{parameters["output_dir"]}/{parameters["file_name"]}/tsv_per_sample/{sample}.tsv' sample_dfs[sample].to_csv(tsv_file, sep="\t") - # *** Added this to export to csv as well *** # - csv_file = f'{parameters["output_dir"]}/{parameters["file_name"]}/tsv_per_sample/{sample}.csv' - sample_dfs[sample].to_csv(csv_file) print(f'\nMetadata Validation was Successful!!!\n') else: print(f'\nMetadata Validation Failed Please Consult : {parameters["output_dir"]}/{parameters["file_name"]}/errors/full_error.txt for a Detailed List\n') diff --git a/conf/test_params.config b/conf/test_params.config index 4808be52..67effabd 100644 --- a/conf/test_params.config +++ b/conf/test_params.config @@ -191,7 +191,7 @@ params { submission_prod_or_test = "test" // "prod" if submitting submission_wait_time = 'calc' send_submission_email = false - submission_config = "${projectDir}/bin/config_files/config_kk.yaml" + submission_config = "${projectDir}/bin/config_files/default_config.yaml" submission_database = "submit" // must be either: 'submit' (follows existing config), 'genbank', 'sra', 'gisaid', 'biosample', 'joint_sra_biosample', or 'all' // batch_name = "batch1" diff --git a/modules/local/general_util/merge_upload_log/main.nf b/modules/local/general_util/merge_upload_log/main.nf index db0c8cd7..ca65780e 100644 --- a/modules/local/general_util/merge_upload_log/main.nf +++ b/modules/local/general_util/merge_upload_log/main.nf @@ -5,7 +5,7 @@ */ process MERGE_UPLOAD_LOG { - publishDir "$params.output_dir/$params.submission_output_dir/$annotation_name", mode: 'copy', overwrite: params.overwrite_output + publishDir "$params.output_dir/$params.submission_output_dir/", mode: 'copy', overwrite: params.overwrite_output conda (params.enable_conda ? params.env_yml : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -14,7 +14,6 @@ process MERGE_UPLOAD_LOG { input: path submission_files path submission_log - val annotation_name script: """ diff --git a/modules/local/initial_submission/main_full.nf b/modules/local/initial_submission/main_full.nf index 931f7c76..95dda45a 100644 --- a/modules/local/initial_submission/main_full.nf +++ b/modules/local/initial_submission/main_full.nf @@ -16,7 +16,6 @@ process SUBMISSION_FULL { input: tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path) path submission_config - val annotation_name // define the command line arguments based on the value of params.submission_test_or_prod, params.send_submission_email def test_flag = params.submission_prod_or_test == 'test' ? '--test' : '' diff --git a/modules/local/initial_submission/main_genbank.nf b/modules/local/initial_submission/main_genbank.nf index 23a21be2..9b9f4bac 100644 --- a/modules/local/initial_submission/main_genbank.nf +++ b/modules/local/initial_submission/main_genbank.nf @@ -16,7 +16,6 @@ process SUBMISSION_GENBANK { input: tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2), path(annotations_path) path submission_config - val annotation_name // define the command line arguments based on the value of params.submission_test_or_prod, params.send_submission_email def test_flag = params.submission_prod_or_test == 'test' ? '--test' : '' diff --git a/modules/local/initial_submission/main_sra.nf b/modules/local/initial_submission/main_sra.nf index 7697771b..e2079929 100644 --- a/modules/local/initial_submission/main_sra.nf +++ b/modules/local/initial_submission/main_sra.nf @@ -16,7 +16,6 @@ process SUBMISSION_SRA { input: tuple val(meta), path(validated_meta_path), path(fasta_path), path(fastq_1), path(fastq_2) path submission_config - val annotation_name // define the command line arguments based on the value of params.submission_test_or_prod def test_flag = params.submission_prod_or_test == 'test' ? '--test' : '' diff --git a/modules/local/metadata_validation/main.nf b/modules/local/metadata_validation/main.nf index 4e25ec24..ced231d2 100644 --- a/modules/local/metadata_validation/main.nf +++ b/modules/local/metadata_validation/main.nf @@ -23,14 +23,13 @@ process METADATA_VALIDATION { """ validate_metadata.py \ --meta_path $meta_path \ - --output_dir $params.val_output_dir \ + --output_dir . \ --custom_fields_file $params.custom_fields_file \ --validate_custom_fields $params.validate_custom_fields """ output: - path "$params.val_output_dir/*/tsv_per_sample/*.tsv", emit: tsv_Files - path "$params.val_output_dir/*/tsv_per_sample/*.csv", emit: csv_Files - path "$params.val_output_dir/*/tsv_per_sample", emit: tsv_dir - path "$params.val_output_dir/*/errors", emit: errors + path "*/tsv_per_sample/*.tsv", emit: tsv_Files + // path "*/tsv_per_sample", emit: tsv_dir + path "*/errors", emit: errors } \ No newline at end of file diff --git a/modules/local/update_submission/main.nf b/modules/local/update_submission/main.nf index 8ffde5fa..0bb6606d 100644 --- a/modules/local/update_submission/main.nf +++ b/modules/local/update_submission/main.nf @@ -7,18 +7,17 @@ process UPDATE_SUBMISSION { // label 'main' - publishDir "$params.output_dir/$params.submission_output_dir/$annotation_name", mode: 'copy', overwrite: true + publishDir "$params.output_dir/$params.submission_output_dir/", mode: 'copy', overwrite: true conda (params.enable_conda ? params.env_yml : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'cdcgov/seqsender-dev' : 'cdcgov/seqsender-dev' }" input: - val wait + val wait_time path submission_config path submission_output path submission_log - val annotation_name def test_flag = params.submission_prod_or_test == 'test' ? '--test' : '' script: diff --git a/subworkflows/local/submission.nf b/subworkflows/local/submission.nf index 8d515a74..ebab8ee5 100644 --- a/subworkflows/local/submission.nf +++ b/subworkflows/local/submission.nf @@ -23,48 +23,39 @@ workflow INITIAL_SUBMISSION { // submit the files to database of choice (after fixing config and getting wait time) if ( params.genbank && params.sra ){ // genbank and sra // submit the files to database of choice (after fixing config and getting wait time) - SUBMISSION_FULL ( submission_ch, submission_config, '' ) + SUBMISSION_FULL ( submission_ch, submission_config ) // actual process to initiate wait WAIT ( SUBMISSION_FULL.out.submission_files.collect(), wait_time ) // process for updating the submitted samples - UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_FULL.out.submission_files, SUBMISSION_FULL.out.submission_log, '' ) - - // combine the different upload_log csv files together - // MERGE_UPLOAD_LOG ( UPDATE_SUBMISSION.out.submission_files.collect(), '' ) + UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_FULL.out.submission_files, SUBMISSION_FULL.out.submission_log ) } if ( !params.genbank && params.sra ){ //only sra - SUBMISSION_SRA ( submission_ch, submission_config, '' ) + SUBMISSION_SRA ( submission_ch, submission_config ) // actual process to initiate wait WAIT ( SUBMISSION_SRA.out.submission_files.collect(), wait_time ) // process for updating the submitted samples - UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_SRA.out.submission_files, SUBMISSION_SRA.out.submission_log, '' ) - - // combine the different upload_log csv files together - // MERGE_UPLOAD_LOG ( UPDATE_SUBMISSION.out.submission_files.collect(), '' ) + UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_SRA.out.submission_files, SUBMISSION_SRA.out.submission_log ) } - if ( params.genbank && !params.sra ){ //only genbank, fastq_ch can be empty + if ( params.genbank && !params.sra ){ //only genbank // submit the files to database of choice (after fixing config and getting wait time) - SUBMISSION_GENBANK ( submission_ch, submission_config, '' ) + SUBMISSION_GENBANK ( submission_ch, submission_config ) // actual process to initiate wait WAIT ( SUBMISSION_GENBANK.out.submission_files.collect(), wait_time ) // process for updating the submitted samples - UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_GENBANK.out.submission_files, SUBMISSION_GENBANK.out.submission_log, '' ) - - // combine the different upload_log csv files together - // MERGE_UPLOAD_LOG ( UPDATE_SUBMISSION.out.submission_files.collect(), '' ) + UPDATE_SUBMISSION ( WAIT.out, submission_config, SUBMISSION_GENBANK.out.submission_files, SUBMISSION_GENBANK.out.submission_log ) } emit: submission_files = UPDATE_SUBMISSION.out.submission_files submission_log = UPDATE_SUBMISSION.out.submission_log - //ToDo add GISAID module + //to do: add GISAID module } diff --git a/workflows/tostadas.nf b/workflows/tostadas.nf index b6c16316..c281bfed 100644 --- a/workflows/tostadas.nf +++ b/workflows/tostadas.nf @@ -66,14 +66,14 @@ workflow TOSTADAS { // Generate the fasta and fastq paths reads_ch = - METADATA_VALIDATION.out.csv_Files + METADATA_VALIDATION.out.tsv_Files .flatten() - .splitCsv(header: true) + .splitCsv(header: true, sep: "\t") .map { row -> - meta = [id:row.sequence_name] fasta_path = row.fasta_path ? file(row.fasta_path) : null fastq1 = row.fastq_path_1 ? file(row.fastq_path_1) : null fastq2 = row.fastq_path_2 ? file(row.fastq_path_2) : null + meta = [id:row.sequence_name] [meta, fasta_path, fastq1, fastq2] } @@ -179,10 +179,10 @@ workflow TOSTADAS { // todo test update submission if ( params.update_submission ) { UPDATE_SUBMISSION ( + '', params.submission_config, INITIAL_SUBMISSION.out.submission_files, INITIAL_SUBMISSION.out.submission_log, - '' ) } // combine the different upload_log csv files together @@ -190,13 +190,12 @@ workflow TOSTADAS { MERGE_UPLOAD_LOG ( INITIAL_SUBMISSION.out.submission_files.collect(), INITIAL_SUBMISSION.out.submission_log.collect(), - '' ) + ) } else { MERGE_UPLOAD_LOG ( UPDATE_SUBMISSION.out.submission_files.collect(), UPDATE_SUBMISSION.out.submission_log.collect(), - '' ) }