diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..91114bf
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.Rproj.user/39CB7C5D/cpp-definition-cache b/.Rproj.user/39CB7C5D/cpp-definition-cache
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/cpp-definition-cache
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/pcs/files-pane.pper b/.Rproj.user/39CB7C5D/pcs/files-pane.pper
new file mode 100644
index 0000000..4f335ba
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/pcs/files-pane.pper
@@ -0,0 +1,9 @@
+{
+ "sortOrder": [
+ {
+ "columnIndex": 2,
+ "ascending": true
+ }
+ ],
+ "path": "~/Github/seqsender/docs/articles"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/pcs/source-pane.pper b/.Rproj.user/39CB7C5D/pcs/source-pane.pper
new file mode 100644
index 0000000..6a747e8
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/pcs/source-pane.pper
@@ -0,0 +1,4 @@
+{
+ "activeTab": 1,
+ "activeTabSourceWindow0": 2
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/pcs/windowlayoutstate.pper b/.Rproj.user/39CB7C5D/pcs/windowlayoutstate.pper
new file mode 100644
index 0000000..bc98681
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/pcs/windowlayoutstate.pper
@@ -0,0 +1,14 @@
+{
+ "left": {
+ "splitterpos": 315,
+ "topwindowstate": "NORMAL",
+ "panelheight": 892,
+ "windowheight": 966
+ },
+ "right": {
+ "splitterpos": 580,
+ "topwindowstate": "NORMAL",
+ "panelheight": 892,
+ "windowheight": 966
+ }
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/pcs/workbench-pane.pper b/.Rproj.user/39CB7C5D/pcs/workbench-pane.pper
new file mode 100644
index 0000000..75e70e9
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/pcs/workbench-pane.pper
@@ -0,0 +1,5 @@
+{
+ "TabSet1": 0,
+ "TabSet2": 0,
+ "TabZoom": {}
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/persistent-state b/.Rproj.user/39CB7C5D/persistent-state
new file mode 100644
index 0000000..8dfdf4d
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/persistent-state
@@ -0,0 +1,11 @@
+activeClientUrl="http://localhost:8787/"
+build-last-errors="[]"
+build-last-errors-base-dir=""
+build-last-outputs="[]"
+compile_pdf_state="{\"tab_visible\":false,\"running\":false,\"target_file\":\"\",\"output\":\"\",\"errors\":[]}"
+displayName="snu3"
+files.monitored-path=""
+find-in-files-state="{\"handle\":\"\",\"input\":\"\",\"path\":\"\",\"regex\":false,\"ignoreCase\":false,\"results\":{\"file\":[],\"line\":[],\"lineValue\":[],\"matchOn\":[],\"matchOff\":[],\"replaceMatchOn\":[],\"replaceMatchOff\":[]},\"running\":false,\"replace\":false,\"preview\":false,\"gitFlag\":false,\"replacePattern\":\"\"}"
+imageDirtyState="1"
+portToken="a60fe80fe2a7"
+saveActionState="-1"
diff --git a/.Rproj.user/39CB7C5D/saved_source_markers b/.Rproj.user/39CB7C5D/saved_source_markers
new file mode 100644
index 0000000..2b1bef1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/saved_source_markers
@@ -0,0 +1 @@
+{"active_set":"","sets":[]}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/0476E19C b/.Rproj.user/39CB7C5D/sources/prop/0476E19C
new file mode 100644
index 0000000..52d4fa5
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/0476E19C
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "12,0",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/11FFA47D b/.Rproj.user/39CB7C5D/sources/prop/11FFA47D
new file mode 100644
index 0000000..68492b8
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/11FFA47D
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,0",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/133055F3 b/.Rproj.user/39CB7C5D/sources/prop/133055F3
new file mode 100644
index 0000000..3d549f2
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/133055F3
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,3",
+ "scrollLine": "2"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/145F7FB1 b/.Rproj.user/39CB7C5D/sources/prop/145F7FB1
new file mode 100644
index 0000000..b8bf8c6
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/145F7FB1
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,3",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/23CF97DB b/.Rproj.user/39CB7C5D/sources/prop/23CF97DB
new file mode 100644
index 0000000..b384e82
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/23CF97DB
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "11,0",
+ "scrollLine": "4"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/31DDB8C3 b/.Rproj.user/39CB7C5D/sources/prop/31DDB8C3
new file mode 100644
index 0000000..b8bf8c6
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/31DDB8C3
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,3",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/852CBDEC b/.Rproj.user/39CB7C5D/sources/prop/852CBDEC
new file mode 100644
index 0000000..79c8b4d
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/852CBDEC
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "133,7",
+ "scrollLine": "128"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/8D5998D2 b/.Rproj.user/39CB7C5D/sources/prop/8D5998D2
new file mode 100644
index 0000000..f495ea4
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/8D5998D2
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "41,0",
+ "scrollLine": "11"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/8D7E8C0C b/.Rproj.user/39CB7C5D/sources/prop/8D7E8C0C
new file mode 100644
index 0000000..b8bf8c6
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/8D7E8C0C
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,3",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/96F7E2CE b/.Rproj.user/39CB7C5D/sources/prop/96F7E2CE
new file mode 100644
index 0000000..5a77caf
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/96F7E2CE
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "11,1",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/A381AFDE b/.Rproj.user/39CB7C5D/sources/prop/A381AFDE
new file mode 100644
index 0000000..fba54c1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/A381AFDE
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "135,7",
+ "scrollLine": "118"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/CA2A269A b/.Rproj.user/39CB7C5D/sources/prop/CA2A269A
new file mode 100644
index 0000000..bb27690
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/CA2A269A
@@ -0,0 +1,4 @@
+{
+ "source_window_id": "",
+ "Source": "Source"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/CB36CFA0 b/.Rproj.user/39CB7C5D/sources/prop/CB36CFA0
new file mode 100644
index 0000000..c697bc1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/CB36CFA0
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "130,7",
+ "scrollLine": "117"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/ED898A07 b/.Rproj.user/39CB7C5D/sources/prop/ED898A07
new file mode 100644
index 0000000..965320d
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/ED898A07
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "159,7",
+ "scrollLine": "148"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/FB37969B b/.Rproj.user/39CB7C5D/sources/prop/FB37969B
new file mode 100644
index 0000000..360375b
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/FB37969B
@@ -0,0 +1,6 @@
+{
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "4,0",
+ "scrollLine": "0"
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/prop/INDEX b/.Rproj.user/39CB7C5D/sources/prop/INDEX
new file mode 100644
index 0000000..cd1b937
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/prop/INDEX
@@ -0,0 +1,21 @@
+~%2FGithub%2Fseqsender%2F.gitignore="11FFA47D"
+~%2FGithub%2Fseqsender%2FDESCRIPTION="8D5998D2"
+~%2FGithub%2Fseqsender%2FLICENSE="CA2A269A"
+~%2FGithub%2Fseqsender%2FNEWS.md="FB37969B"
+~%2FGithub%2Fseqsender%2FREADME.Rmd="707D8EA7"
+~%2FGithub%2Fseqsender%2F_pkgdown.yml="0476E19C"
+~%2FGithub%2Fseqsender%2Fconfig%2Fmain_config.yaml="67478E68"
+~%2FGithub%2Fseqsender%2Finst%2FCITATION="96F7E2CE"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fbiosample_submission.Rmd="133055F3"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fcompose_installation.Rmd="852CBDEC"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fdocker_installation.Rmd="CB36CFA0"
+~%2FGithub%2Fseqsender%2Fvignettes%2Ffaqs.Rmd="DE820693"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fgenbank_submission.Rmd="8D7E8C0C"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fgisaid_cov_submission.Rmd="145F7FB1"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fgisaid_flu_submission.Rmd="23CF97DB"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fgisaid_options.Rmd="ABEE9B5E"
+~%2FGithub%2Fseqsender%2Fvignettes%2Flocal_installation.Rmd="ED898A07"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fprerequisites.Rmd="3EA705BD"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fsingularity_installation.Rmd="A381AFDE"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fsra_options.Rmd="D3D73A1D"
+~%2FGithub%2Fseqsender%2Fvignettes%2Fsra_submission.Rmd="31DDB8C3"
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7
new file mode 100644
index 0000000..b96f528
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7
@@ -0,0 +1,26 @@
+{
+ "id": "042D06D7",
+ "path": "~/Github/seqsender/NEWS.md",
+ "project_path": "NEWS.md",
+ "type": "markdown",
+ "hash": "427892802",
+ "contents": "",
+ "dirty": false,
+ "created": 1707162056710.0,
+ "source_on_save": false,
+ "relative_order": 12,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "4,0",
+ "scrollLine": "0"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707499282,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707499282031,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7-contents
new file mode 100644
index 0000000..b96c15c
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/042D06D7-contents
@@ -0,0 +1,4 @@
+
+# seqsender 1.1.0
+* Github Repo: https://github.com/CDCgov/seqsender
+* Documentation: https://cdcgov.github.io/seqsender
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E b/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E
new file mode 100644
index 0000000..e69a3b4
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E
@@ -0,0 +1,26 @@
+{
+ "id": "0CBC917E",
+ "path": "~/Github/seqsender/vignettes/compose_installation.Rmd",
+ "project_path": "vignettes/compose_installation.Rmd",
+ "type": "r_markdown",
+ "hash": "1875304587",
+ "contents": "",
+ "dirty": false,
+ "created": 1707157109242.0,
+ "source_on_save": false,
+ "relative_order": 9,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "133,7",
+ "scrollLine": "128"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707512280,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707512280458,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E-contents
new file mode 100644
index 0000000..5a0a7c7
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/0CBC917E-contents
@@ -0,0 +1,535 @@
+---
+output: rmarkdown::html_document
+title: "How to run seqsender with Compose"
+vignette: >
+ %\VignetteIndexEntry{How to run seqsender with Compose}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(yaml) # for yaml file
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+```
+
+
+
+**SOFTWARE REQUIREMENTS:**
+
+- Linux (64-bit) or Mac OS X (64-bit)
+- Git version 2.25.1 or later
+- Docker version 20.10.14 or later
+- Docker Compose version 2.21 or later
+- Standard utilities: curl, tar, unzip
+
+**ADDITIONAL REQUIREMENTS:**
+
+See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) and [REQUIREMENT FILES](`r github_pages_url`/index.html#requirement-files) before proceeding to the next steps
+
+### (1) Clone ``r program`` repo to your $HOME directory
+
+``` bash
+cd $HOME
+git clone `r github_repo`.git
+```
+
+### (2) Navigate to ``r program`` folder where `docker-compose.yml` is stored and edit that file to link the data inputs to run ``r program``
+
+``` bash
+cd `r program`
+```
+
+Here is a quick look of the `docker-compose.yaml` file:
+
+```bash
+version: "3.9"
+
+x-data-volumes:
+ &data-volume
+ type: bind
+ source: $HOME
+ target: /data
+
+services:
+ seqsender:
+ container_name: seqsender
+ image: cdcgov/seqsender-dev:latest
+ restart: always
+ volumes:
+ - *data-volume
+ command: tail -f /dev/null
+```
+
+_**NOTE:** `source` is the storage location of your local machine. This location will be mapped to `/data` directory inside the container. Here we are mounting the local `$HOME` directory to `/data` inside the container._
+
+### (3) Start up the ``r program`` container
+
+```bash
+docker-compose up -d
+```
+
+**`-d`**: run the container in detached mode
+
+For more information about the docker-compose syntax, see docker-compose up reference
+
+
+### (4) Check if the container is running
+
+``` bash
+docker container ps
+
+
+CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
+b37b6b19c4e8 `r program`:latest "/bin/bash" 5 hours ago Up 5 hours `r program`
+```
+
+### (5) See a list of commands in ``r program`` container
+
+``` bash
+docker exec -it `r program` bash `r program`-kickoff --help
+```
+
+**`-t`**: allocate a pseudo-tty **-i**: keep STDIN open even if not attached
+**`-h`**, **`--help`**: show help messages and exit
+
+``` bash
+usage: `r program`.py [-h]
+ {prep,submit,check_submission_status,template,version} ...
+
+Automate the process of batch uploading consensus sequences and metadata to
+databases of your choices
+
+positional arguments:
+ {prep,submit,check_submission_status,template,version}
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+Rather than hastily jump in and submit a `production` submission right away, we can utilize GISAID's and NCBI's **“TEST-SERVER”** to upload a `test` submission first. That way submitter can familiarize themselves with the submission process prior to make a real submission.
+
+**Note:** Duplicate test submissions will result in an error. Please create new sequence names each time you plan to run test submissions to avoid this issue.
+
+### Submit a `test` submission with a pre-processed dataset
+
+
+
+Here we will go over the steps of preparing and batch uploading meta- and sequence-data to GISAID and NCBI databases using a pre-processed dataset provided with the software.
+
+The `template` command will allow you to output examples of metadata and config files so you can base your submission on prior to upload a real submission. To get more help on the command, run
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff template --help
+```
+
+```bash
+usage: seqsender.py template [-h] [--biosample] [--sra] [--genbank] [--gisaid]
+ --organism {FLU,COV} --submission_dir
+ SUBMISSION_DIR --submission_name SUBMISSION_NAME
+
+Return a set of files (e.g., config file, metadata file, fasta files, etc.)
+that are needed to make a submission
+
+optional arguments:
+ -h, --help show this help message and exit
+ --biosample, -b Submit to BioSample. (default: )
+ --sra, -s Submit to SRA. (default: )
+ --genbank, -n Submit to Genbank. (default: )
+ --gisaid, -g Submit to GISAID. (default: )
+ --organism {FLU,COV} Type of organism data (default: FLU)
+ --submission_dir SUBMISSION_DIR
+ Directory to where all required files (such as
+ metadata, fasta, etc.) are stored (default: None)
+ --submission_name SUBMISSION_NAME
+ Name of the submission (default: None)
+```
+
+
+
+#### 1. Download the pre-processed meta- and sequence-data
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff template \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission
+```
+
+- **`--organism`** specifies the type of data to download. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options. Additional datasets for other organisms will be provided in future updates or requests.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to generate an unified meta- and sequence-data into one file so we can perform batch upload to all databases simultaneously.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+
+A quick look at the output files:
+
+![](images/submission_dir.png)
+
+Here is the standard out of the command.
+
+```bash
+Generating submission template
+Files are stored at: /data/flu-test-submission
+
+Total runtime (HRS:MIN:SECS): 0:00:00.115140
+```
+
+#### 2. Set up the config file -- `config.yaml`
+
+After the template is downloaded in `(1)`, you can find `config.yaml` in your local `$HOME/flu-test-submission` directory. The `config.yaml` yaml file provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+Open that file with a text editor of your choice and fill in the appropriate information about your submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of the database in which we will submit first. For instance, if GISAID is set as `1`, ``r program`` will submit to GISAID first, then after all samples are assigned with a GISAID accession number, ``r program`` will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases after submission.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/articles/index.html#prerequisites) for more details.
+:::
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **ADDITIONAL REQUIREMENTS:**
+
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+A quick look of where to store the downloaded **GISAID CLI** package,
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 3. Upload a test submission
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /data/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /data/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /data/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /data/flu-test-submission/submission_report_status.csv
+Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 4. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+Before you can perform a `test` submission with your own dataset, make sure you have the required files (such as **config.yaml**, **metadata.csv**, **sequence.fasta**, **raw reads**, etc.) already prepared and stored in the submission directory of your choice.
+
+
+
+#### 1. Assemble your meta- and sequence-data
+
+(a) To prep for FLU submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+(b) To prep for COV submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+After you have finished prepping for your database of choices in `(a)` or `(b)`, create a submission folder and store all your metadata and sequence files there.
+
+Here is a quick look at the folder structure
+
+![](images/submission_dir.png)
+
+Finally, make sure additional requirements below are met before you can proceed to the next steps.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+Here is an example of where to place the **GISAID CLI** package.
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 2. Upload a test submission
+
+After all files are (i) are prepared, we can go ahead and upload the submission
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /data/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /data/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /data/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /data/flu-test-submission/submission_report_status.csv
+Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 3. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
+
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/11A85592-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11A85592-contents
new file mode 100644
index 0000000..e5c780e
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11A85592-contents
@@ -0,0 +1,163 @@
+---
+title: "NCBI - SRA"
+output: rmarkdown::html_document
+vignette: >
+ %\VignetteIndexEntry{NCBI - SRA}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "SRA"
+prefix <- "sra-"
+prefix_examples <- c("sra-loader", "sra-platform")
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("Influenza A Virus", "SARS-COV-2")
+organism_abbrev <- c("FLU", "COV")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!title)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+```
+
+## Overview
+
+**Sequence Read Archive (SRA)** data, available through multiple cloud providers and NCBI servers, is the largest publicly available repository of high throughput sequencing data. The archive accepts data from all branches of life as well as metagenomic and environmental surveys. **SRA** stores raw sequencing data and alignment information to enhance reproducibility and facilitate new discoveries through data analysis.
+
+Before one can upload sequence read archives to **`r title`** database using ``r program``, they must ensure the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) are prepared in advance and stored in a submission directory of choice.
+
+## Requirement files
+
+- [Config file](#config-file) in a `yaml` format
+- [Sequence read archives](#sequence-read-archives) in a `bam/sff/hdf5/fastq` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+
+## Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `Primary`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+## Sequence read archives
+
+Currently, NCBI accepts binary files such as BAM, SFF, and HDF5 formats and text formats such as FASTQ. See [SRA Submit Formats](https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/) for more details.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+` **NOTE:**
+
+- Sequence read archive for all samples must be stored in a subfolder called `raw_reads` inside a submission directory of choice
+:::
+
+
+
+## Metadata file
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **`r title`** submissions
+
+To include additional attributes to **`r title`** submissions, just append ``r prefix`` in front of the desired attributes, e.g. ``r paste0( prefix_examples, collapse=", ")``, etc. See [SRA metadata section](https://www.ncbi.nlm.nih.gov/sra/docs/submitmeta/) for more details.
+
+
+
+### [* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D
new file mode 100644
index 0000000..23b73b1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D
@@ -0,0 +1,26 @@
+{
+ "id": "11FC1D0D",
+ "path": "~/Github/seqsender/vignettes/docker_installation.Rmd",
+ "project_path": "vignettes/docker_installation.Rmd",
+ "type": "r_markdown",
+ "hash": "1058644308",
+ "contents": "",
+ "dirty": false,
+ "created": 1707157092942.0,
+ "source_on_save": false,
+ "relative_order": 8,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "130,7",
+ "scrollLine": "117"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707512280,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707512280138,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D-contents
new file mode 100644
index 0000000..680c19d
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/11FC1D0D-contents
@@ -0,0 +1,533 @@
+---
+output: rmarkdown::html_document
+title: "How to run seqsender with Docker"
+vignette: >
+ %\VignetteIndexEntry{How to run seqsender with Docker"}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(yaml) # for yaml file
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+```
+
+
+
+**SOFTWARE REQUIREMENTS:**
+
+- Linux (64-bit) or Mac OS X (64-bit)
+- Git version 2.25.1 or later
+- Docker version 20.10.14 or later
+- Standard utilities: curl, tar, unzip
+
+**ADDITIONAL REQUIREMENTS:**
+
+See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) and [REQUIREMENT FILES](`r github_pages_url`/index.html#requirement-files) before proceeding to the next steps
+
+### (1) Clone ``r program`` repo to your $HOME directory
+
+``` bash
+cd $HOME
+git clone `r github_repo`.git
+```
+
+### (2) Navigate to ``r program`` folder where `Dockerfile` file is stored and build its docker image
+
+``` bash
+cd `r program`
+docker build -t `r program`:latest .
+```
+
+**-t**: add a tag to an image, e.g. *`r program`:1.0.0* or
+*`r program`:latest*
+
+### (3) After the build is completed, you can check if the image is built successfully
+
+``` bash
+docker images
+
+REPOSITORY TAG IMAGE ID CREATED SIZE
+`r program` latest d9e2578d2211 2 weeks ago 581GB
+```
+
+### (4) Run ``r program`` container
+
+``` bash
+docker run \
+-v $HOME:/data \
+-t -d `r program`:latest \
+--name `r program`
+```
+
+**`-t`**: allocate a pseudo-tty
+**`-d`**: run the container in detached mode
+**`-v`**: mount data files from host directory to container directory **[host_div]:[container_dir]**. By exposing the host directory to docker container, docker will be able to access data files within that mounted directory and use it to fire up the ``r program``workflows. **NOTE:** Here we are mounting the local `$HOME` directory to `/data` directory inside the container.
+**`--name`**: give an identity to the container
+
+For more information about the Docker syntax, see
+Docker
+run reference
+
+To check if the container is built successfully
+
+``` bash
+docker container ps
+
+
+CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
+b37b6b19c4e8 `r program`:latest "/bin/bash" 5 hours ago Up 5 hours `r program`
+```
+
+### (5) See a list of commands in ``r program`` container
+
+``` bash
+docker exec -it `r program` bash `r program`-kickoff --help
+```
+
+**`-t`**: allocate a pseudo-tty
+**`-i`**: keep STDIN open even if not attached
+**`-h`**, **`--help`**: show help messages and exit
+
+``` bash
+usage: `r program`.py [-h]
+ {prep,submit,check_submission_status,template,version} ...
+
+Automate the process of batch uploading consensus sequences and metadata to
+databases of your choices
+
+positional arguments:
+ {prep,submit,check_submission_status,template,version}
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+Rather than hastily jump in and submit a `production` submission right away, we can utilize GISAID's and NCBI's **“TEST-SERVER”** to upload a `test` submission first. That way submitter can familiarize themselves with the submission process prior to make a real submission.
+
+**Note:** Duplicate test submissions will result in an error. Please create new sequence names each time you plan to run test submissions to avoid this issue.
+
+### Submit a `test` submission with a pre-processed dataset
+
+
+
+Here we will go over the steps of preparing and batch uploading meta- and sequence-data to GISAID and NCBI databases using a pre-processed dataset provided with the software.
+
+The `template` command will allow you to output examples of metadata and config files so you can base your submission on prior to upload a real submission. To get more help on the command, run
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff template --help
+```
+
+```bash
+usage: seqsender.py template [-h] [--biosample] [--sra] [--genbank] [--gisaid]
+ --organism {FLU,COV} --submission_dir
+ SUBMISSION_DIR --submission_name SUBMISSION_NAME
+
+Return a set of files (e.g., config file, metadata file, fasta files, etc.)
+that are needed to make a submission
+
+optional arguments:
+ -h, --help show this help message and exit
+ --biosample, -b Submit to BioSample. (default: )
+ --sra, -s Submit to SRA. (default: )
+ --genbank, -n Submit to Genbank. (default: )
+ --gisaid, -g Submit to GISAID. (default: )
+ --organism {FLU,COV} Type of organism data (default: FLU)
+ --submission_dir SUBMISSION_DIR
+ Directory to where all required files (such as
+ metadata, fasta, etc.) are stored (default: None)
+ --submission_name SUBMISSION_NAME
+ Name of the submission (default: None)
+```
+
+
+
+#### 1. Download the pre-processed meta- and sequence-data
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff template \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission
+```
+
+- **`--organism`** specifies the type of data to download. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options. Additional datasets for other organisms will be provided in future updates or requests.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to generate an unified meta- and sequence-data into one file so we can perform batch upload to all databases simultaneously.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+
+A quick look at the output files:
+
+![](images/submission_dir.png)
+
+Here is the standard out of the command.
+
+```bash
+Generating submission template
+Files are stored at: /home/snu3/flu-test-submission
+
+Total runtime (HRS:MIN:SECS): 0:00:00.115140
+```
+
+#### 2. Set up the config file -- `config.yaml`
+
+After the template is downloaded in `(1)`, you can find `config.yaml` in your local `$HOME/flu-test-submission` directory. The `config.yaml` yaml file provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+Open that file with a text editor of your choice and fill in the appropriate information about your submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of the database in which we will submit first. For instance, if GISAID is set as `1`, ``r program`` will submit to GISAID first, then after all samples are assigned with a GISAID accession number, ``r program`` will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases after submission.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/articles/index.html#prerequisites) for more details.
+:::
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **ADDITIONAL REQUIREMENTS:**
+
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+A quick look of where to store the downloaded **GISAID CLI** package,
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 3. Upload a test submission
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 4. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+Before you can perform a `test` submission with your own dataset, make sure you have the required files (such as **config.yaml**, **metadata.csv**, **sequence.fasta**, **raw reads**, etc.) already prepared and stored in the submission directory of your choice.
+
+
+
+#### 1. Assemble your meta- and sequence-data
+
+(a) To prep for FLU submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+(b) To prep for COV submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+After you have finished prepping for your database of choices in `(a)` or `(b)`, create a submission folder and store all your metadata and sequence files there.
+
+Here is a quick look at the folder structure
+
+![](images/submission_dir.png)
+
+Finally, make sure additional requirements below are met before you can proceed to the next steps.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+Here is an example of where to place the **GISAID CLI** package.
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 2. Upload a test submission
+
+After all files are (i) are prepared, we can go ahead and upload the submission
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 3. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+docker exec -it seqsender bash seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir /data \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories (e.g. `/data` -> our `$HOME` directory).
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
+
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45
new file mode 100644
index 0000000..d15748c
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45
@@ -0,0 +1,26 @@
+{
+ "id": "23D64F45",
+ "path": "~/Github/seqsender/vignettes/biosample_submission.Rmd",
+ "project_path": "vignettes/biosample_submission.Rmd",
+ "type": "r_markdown",
+ "hash": "1471836763",
+ "contents": "",
+ "dirty": false,
+ "created": 1707160700849.0,
+ "source_on_save": false,
+ "relative_order": 3,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,3",
+ "scrollLine": "2"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707516426,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707516426689,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45-contents
new file mode 100644
index 0000000..3d5eac6
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/23D64F45-contents
@@ -0,0 +1,155 @@
+---
+title: "NCBI - BioSample"
+output: rmarkdown::html_document
+vignette: >
+ %\VignetteIndexEntry{NCBI - BioSample}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "BioSample"
+prefix <- "bs-"
+prefix_examples <- c("bs-host_age", "bs-host_sex")
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("Influenza A Virus", "SARS-COV-2")
+organism_abbrev <- c("FLU", "COV")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!title)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+
+```
+
+## Overview
+
+**`r title`** is a database containing aggregated information pertaining to reference samples and samples stored in the [European Bioinformatics Institute](https://www.ebi.ac.uk/) assay databases.
+
+Before submitters can upload their experimental samples to **`r title`** database using ``r program``, they must ensure the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) are already prepared ahead of time and stored them in a submission folder of choice (e.g., `submission_name`) within a parent submission directory (e.g., `submission_dir`). That way ``r program`` will be able to scoop up the necessary files in that folder, generate submission files, and then batch uploading them to the submitting database of choices.
+
+## Requirement files
+
+- [Config file](#config-file) in a `yaml` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+A quick look of where to store all of the requirement files
+
+![](images/submission_dir.png)
+
+### Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `1`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+### Metadata file
+
+The metadata worksheet is a comma-delimited (csv) file that contains required attributes that are useful for the rapid analysis and trace back of **`r paste0(organism, collapse=" or ")`** cases.
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **`r title`** submissions
+
+To include additional attributes to **`r title`** submissions, just append ``r prefix`` in front of the desired attributes, e.g. ``r paste0(prefix_examples, collapse=", ")``, etc. See [Pathogen.cl.1.0](https://www.ncbi.nlm.nih.gov/biosample/docs/packages/Pathogen.cl.1.0/) package for more attributes.
+
+
+
+
[* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/26EC7DA2-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/26EC7DA2-contents
new file mode 100644
index 0000000..22e66a1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/26EC7DA2-contents
@@ -0,0 +1,12 @@
+
+citHeader("To cite seqsender in publications use:")
+
+citEntry(
+ entry = "manual",
+ title = "seqsender: Public Database Submission Pipeline",
+ author = "Dakota Howard, Reina Chau, Peter Cook, Kristine Lacek, Amanda Sullivan, Vikram Setlur, Thomas Stark, Brian Lee, Benjamin Rambo-Martin",
+ institution = "Centers for Disease Control and Prevention",
+ address = "1600 Clifton Road NE, Building 21, 8th Floor, Atlanta, Georgia 30333",
+ year = "2023",
+ textVersion = "Howard, D. et al. seqsender: Public Database Submission Pipeline."
+)
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/44A17F56-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/44A17F56-contents
new file mode 100644
index 0000000..2369973
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/44A17F56-contents
@@ -0,0 +1,161 @@
+---
+title: "NCBI - GenBank"
+output: rmarkdown::html_document
+vignette: >
+ %\VignetteIndexEntry{NCBI - GenBank}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "GenBank"
+prefix <- "gb-"
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("Influenza A Virus", "SARS-COV-2")
+organism_abbrev <- c("FLU", "COV")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!title)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+```
+
+## Overview
+
+The **GenBank** sequence database is an open access, annotated collection of all publicly available nucleotide sequences and their protein translations. It is produced and maintained by the **National Center for Biotechnology Information** (NCBI; a part of the **National Institutes of Health** in the United States) as part of the **International Nucleotide Sequence Database Collaboration (INSDC)**.
+
+Before submitters can batch uploading meta- and sequence-data to **`r title`** database using ``r program``, they must ensure the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) are already prepared ahead of time and stored them in a submission folder of choice (e.g., `submission_name`) within a parent submission directory (e.g., `submission_dir`). That way ``r program`` will be able to scoop up the necessary files in that folder, generate submission files, and then batch uploading them to the submitting database of choices.
+
+## Requirement files
+
+- [Config file](#config-file) in a `yaml` format
+- [Fasta file](#fasta-file) in a`fasta` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+A quick look of where to store all of the requirement files
+
+![](images/submission_dir.png)
+
+### Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `1`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+### Fasta file
+
+Fasta file contains nucleotide sequences for all samples. See [Genbank Fasta Format](https://www.ncbi.nlm.nih.gov/genbank/fastaformat/) for more details.
+
+### Metadata file
+
+The metadata worksheet is a comma-delimited (csv) file that contains required attributes that are useful for the rapid analysis and trace back of **`r paste0(organism, collapse=" or ")`** cases.
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **`r title`** submissions. The prefix of **“src-”** is used to identity attributes for **Source Information Table**. Likewise, the prefix of **“cmt-”** is used to identity attributes for **Structured Comment Table**.
+
+To include additional attributes to **Source Information table**, just append `src-` in front of the desired attributes, e.g. `src-subtype`, `src-passage`, etc. See [Genbank Source Table Modifier](https://www.ncbi.nlm.nih.gov/WebSub/html/help/genbank-source-table.html#modifiers) for more details.
+
+To include additional attributes to **Structured Comment Table**, just append `cmt-` in front of the desired attributes, and most importantly, the fields must be sandwiched between `cmt-StructuredCommentPrefix` and `cmt-StructuredCommentSuffix`. For examples, `cmt-StructuredCommentPrefix`, `cmt-Assembly Method`, `cmt-Coverage`, `...`, `cmt-Sequencing Technology`, `cmt-StructuredCommentSuffix`. See [Genbank Structured Comment](https://www.ncbi.nlm.nih.gov/genbank/structuredcomment/#GenBank) for more details.
+
+
+
+
[* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/492378CA-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/492378CA-contents
new file mode 100644
index 0000000..f8a33cd
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/492378CA-contents
@@ -0,0 +1,194 @@
+---
+output: rmarkdown::html_document
+title: "GISAID - EpiCoV"
+vignette: >
+ %\VignetteIndexEntry{GISAID - EpiCoV}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "EpiCoV"
+prefix <- "gs-"
+cli <- "covCLI"
+cli_list <- c("EpiFlu", "EpiCoV", "EpiRSV", "EpiArbo")
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("SARS-COV-2")
+organism_abbrev <- c("COV")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!organism_abbrev)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+
+optional_attributes_df <- read.csv("./data/cov_metadata_optional_fields.csv", header=TRUE)
+
+```
+
+## Overview
+
+**GISAID**, short for the **Global Initiative on Sharing All Influenza Data**, is an organization that manages a restricted-access database containing genomic sequence data of select virus, primarily influenza viruses. The database has expanded to include the coronavirus responsible for the COVID-19 pandemic as well as other pathogens.
+
+## Prerequisites
+
+For all GISAID submissions, ``r program`` makes use of GISAID's Command Line Interface Tools (CLIs) to batch uploading meta- and sequence-data to their databases. Prior to perform a batch upload to **`r title` database**, submitters must
+
+1. Download the **`r paste(title, "CLI")`** package from the **GISAID Platform** that is compatible with their machine (e.g., Linux, macOS, or Windows).
+
+![](images/`r cli`_download.png)
+![](images/`r cli`_download_2.png)
+
+
+
+
+2. Unzip the downloaded package and store it in a subfolder called **`gisaid_cli`** within a submission directory of choice (e.g., `submission_dir`).
+
+![](images/gisaid_cli_dir.png)
+
+
+
+## Requirement files
+
+After submitters had obtained the **GISAID CLI** for **`r title`**, they must also prepare the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) and store them in a submission folder of choice (e.g., `submission_name`) within a parent submission directory (e.g., `submission_dir`). That way ``r program`` will be able to scoop up the necessary files in that folder, generate submission files, and then batch uploading them to the submitting database of choices.
+
+Here is a list of the requirement files and where to store them:
+
+- [Config file](#config-file) in a `yaml` format
+- [Fasta file](#fasta-file) in a`fasta` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+
+![](images/submission_dir.png)
+
+### Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `1`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+### Fasta file
+
+Fasta file contains nucleotide sequences for all samples. See [Genbank Fasta Format](https://www.ncbi.nlm.nih.gov/genbank/fastaformat/) for more details.
+
+![](images/`r cli`_fasta.png)
+
+### Metadata file
+
+The metadata worksheet is a comma-delimited (csv) file that contains required attributes that are useful for the rapid analysis and trace back of **`r paste0(organism, collapse=" or ")`** cases.
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **GISAID** submissions.
+
+
+
+#### Optional Attributes
+
+To include additional attributes to **`r title`** submissions, just append ``r prefix`` in front of the desired attributes. Here is a list of optional attributes:
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(optional_attributes_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+
+
+
[* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80
new file mode 100644
index 0000000..7e038a1
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80
@@ -0,0 +1,26 @@
+{
+ "id": "49C60C80",
+ "path": "~/Github/seqsender/vignettes/gisaid_flu_submission.Rmd",
+ "project_path": "vignettes/gisaid_flu_submission.Rmd",
+ "type": "r_markdown",
+ "hash": "0",
+ "contents": "",
+ "dirty": false,
+ "created": 1707160572458.0,
+ "source_on_save": false,
+ "relative_order": 2,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "11,0",
+ "scrollLine": "4"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707516365,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707516365012,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80-contents
new file mode 100644
index 0000000..4e7cd51
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/49C60C80-contents
@@ -0,0 +1,189 @@
+---
+output: rmarkdown::html_document
+title: "GISAID - EpiFlu"
+vignette: >
+ %\VignetteIndexEntry{GISAID - EpiFlu}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "EpiFlu"
+prefix <- "gs-"
+cli <- "fluCLI"
+cli_list <- c("EpiFlu", "EpiCoV", "EpiRSV", "EpiArbo")
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("Influenza A Virus")
+organism_abbrev <- c("FLU")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!organism_abbrev)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+
+optional_attributes_df <- read.csv("./data/flu_metadata_optional_fields.csv", header=TRUE)
+
+```
+
+## Overview
+
+**GISAID**, short for the **Global Initiative on Sharing All Influenza Data**, is an organization that manages a restricted-access database containing genomic sequence data of select virus, primarily influenza viruses. The database has expanded to include the coronavirus responsible for the COVID-19 pandemic as well as other pathogens.
+
+## Prerequisites
+
+For all GISAID submissions, ``r program`` makes use of GISAID's Command Line Interface Tools (CLIs) to batch uploading meta- and sequence-data to their databases. Prior to perform a batch upload to **`r title` database**, submitters must
+
+1. Download the **`r paste(title, "CLI")`** package from the **GISAID Platform** that is compatible with their machine (e.g., Linux, macOS, or Windows).
+
+
+![](images/`r cli`_download.png)
+
+
+
+2. Unzip the downloaded package and store it in a subfolder called **`gisaid_cli`** within a submission directory of choice (e.g., `submission_dir`).
+
+![](images/gisaid_cli_dir.png)
+
+
+
+## Requirement files
+
+After submitters had obtained the **GISAID CLI** for **`r title`**, they must also prepare the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) and store them in a submission foler of choice (e.g., `submission_name`) within a parent submission directory (e.g., `submission_dir`). That way ``r program`` will be able to scoop up the necessary files in that folder, generate submission files, and then batch uploading them to the submitting database of choices.
+
+Here is a list of the requirement files and where to store them:
+
+- [Config file](#config-file) in a `yaml` format
+- [Fasta file](#fasta-file) in a`fasta` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+![](images/submission_dir.png)
+
+### Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `1`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+### Fasta file
+
+Fasta file contains nucleotide sequences for all samples. See [Genbank Fasta Format](https://www.ncbi.nlm.nih.gov/genbank/fastaformat/) for more details.
+
+![](images/`r cli`_fasta.png)
+
+### Metadata file
+
+The metadata worksheet is a comma-delimited (csv) file that contains required attributes that are useful for the rapid analysis and trace back of **`r paste0(organism, collapse=" or ")`** cases.
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **GISAID** submissions.
+
+
+
+#### Optional Attributes
+
+To include additional attributes to **`r title`** submissions, just append ``r prefix`` in front of the desired attributes. Here is a list of optional attributes:
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(optional_attributes_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+
[* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE b/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE
new file mode 100644
index 0000000..0ca3ee3
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE
@@ -0,0 +1,26 @@
+{
+ "id": "7944D2DE",
+ "path": "~/Github/seqsender/_pkgdown.yml",
+ "project_path": "_pkgdown.yml",
+ "type": "yaml",
+ "hash": "1818887783",
+ "contents": "",
+ "dirty": false,
+ "created": 1707156312052.0,
+ "source_on_save": false,
+ "relative_order": 1,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "12,0",
+ "scrollLine": "0"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707498977,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707498977652,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE-contents
new file mode 100644
index 0000000..759910d
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/7944D2DE-contents
@@ -0,0 +1,52 @@
+url: https://github.com/CDCgov/seqsender
+
+template:
+ params:
+ bootswatch: cosmo
+
+news:
+ one_page: false
+
+navbar:
+ structure:
+ right: [news, github]
+ components:
+ left:
+ - icon: fa-home
+ href: index.html
+ - text: Getting Started
+ menu:
+ - text: Prerequisites
+ href: articles/prerequisites.html
+ - text: Installation
+ - text: How to run seqsender locally
+ href: articles/local_installation.html
+ - text: How to run seqsender with Docker
+ href: articles/docker_installation.html
+ - text: How to run seqsender with Compose
+ href: articles/compose_installation.html
+ - text: How to run seqsender with Singularity
+ href: articles/singularity_installation.html
+ - text: Databases
+ menu:
+ - text: NCBI
+ - text: BioSample
+ href: articles/biosample_submission.html
+ - text: SRA
+ href: articles/sra_submission.html
+ - text: GenBank
+ href: articles/genbank_submission.html
+ - text: GISAID
+ - text: EpiFlu
+ href: articles/gisaid_flu_submission.html
+ - text: EpiCoV
+ href: articles/gisaid_cov_submission.html
+ - text: Support
+ menu:
+ - text: FAQs
+ href: articles/faqs.html
+
+footer:
+ structure:
+ left: developed_by
+ right: built_with
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9
new file mode 100644
index 0000000..41e9d95
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9
@@ -0,0 +1,26 @@
+{
+ "id": "8DC9B3D9",
+ "path": "~/Github/seqsender/inst/CITATION",
+ "project_path": "inst/CITATION",
+ "type": "text",
+ "hash": "0",
+ "contents": "",
+ "dirty": false,
+ "created": 1707162478476.0,
+ "source_on_save": false,
+ "relative_order": 14,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "11,1",
+ "scrollLine": "0"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707499229,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707499229446,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9-contents
new file mode 100644
index 0000000..fb434fe
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/8DC9B3D9-contents
@@ -0,0 +1,12 @@
+
+citHeader("To cite seqsender in publications use:")
+
+citEntry(
+ entry = "manual",
+ title = "seqsender: Public Database Submission Pipeline",
+ author = "Dakota Howard, Reina Chau, Peter Cook, Kristine Lacek, Amanda Sullivan, Vikram Setlur, Thomas Stark, Brian Lee, Benjamin Rambo-Martin",
+ institution = "Centers for Disease Control and Prevention",
+ address = "1600 Clifton Road NE, Building 21, 8th Floor, Atlanta, Georgia 30333",
+ year = format(Sys.Date(), "%Y"),
+ textVersion = "Howard, D. et al. seqsender: Public Database Submission Pipeline."
+)
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/9233AB25-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/9233AB25-contents
new file mode 100644
index 0000000..8dada3e
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/9233AB25-contents
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1
new file mode 100644
index 0000000..b873f02
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1
@@ -0,0 +1,26 @@
+{
+ "id": "B0A067F1",
+ "path": "~/Github/seqsender/DESCRIPTION",
+ "project_path": "DESCRIPTION",
+ "type": "dcf",
+ "hash": "2980913655",
+ "contents": "",
+ "dirty": false,
+ "created": 1707156178289.0,
+ "source_on_save": false,
+ "relative_order": 3,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "41,0",
+ "scrollLine": "11"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707497400,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707497400958,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1-contents
new file mode 100644
index 0000000..1a7ddf7
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B0A067F1-contents
@@ -0,0 +1,43 @@
+Package: seqsender
+Type: Package
+Title: Public Database Submission Pipeline
+Version: 1.1.0
+Authors@R:
+ c(
+ person(given="Dakota", family="Howard", role=c("aut","cre"),
+ email="psv4@cdc.gov", comment=c(ORCID="0000-0002-7674-0385")),
+ person(given="Reina", family="Chau", role=c("aut"),
+ email="snu3@cdc.gov", comment=c(ORCID="0000-0003-3012-1404")),
+ person(given="Peter", family="Cook", role=c("aut"),
+ email="ooj4@cdc.gov"),
+ person(given="Kristine", family="Lacek", role=c("aut"),
+ email="qgx6@cdc.gov", comment=c(ORCID="0000-0002-6247-5082")),
+ person(given="Amanda", family="Sullivan", role=c("aut"),
+ email="xpa3@cdc.gov"),
+ person(given="Vikram", family="Setlur", role=c("aut"),
+ email="xoe7@cdc.gov"),
+ person(given="Thomas", family="Stark", role=c("aut"),
+ email="ynh4@cdc.gov"),
+ person(given="Brian", family="Lee", role=c("aut"),
+ email="fya1@cdc.gov"),
+ person(given="Benjamin", family="Rambo-Martin", role=c("aut"),
+ email="nbx0@cdc.gov", comment=c(ORCID="0000-0002-8591-3954"))
+ )
+Description: seqsender is a Python program that is designed to automate the process of generating
+ necessary submission files (e.g. submission.xml, submission.zip, etc.)
+ and then bulk uploading them via FTP to NCBI archives such as Genbank, BioSample, and SRA.
+ Additionally, the program can batch uploading submissions of meta- and sequence-data to GISAID
+ using their Command Line Interface Tools (e.g., EpiFlu and EpiCoV CLI).
+ Currently, the pipeline is capable of uploading Influenza A Virus and SARS-COV-2 data.
+RoxygenNote: 7.2.3
+License: Apache License (== 2.0) + file LICENSE
+URL: https://github.com/CDCgov/seqsender
+GITHUB_PAGES: https://cdcgov.github.io/seqsender
+Docker: cdcgov/seqsender-dev:latest
+Encoding: UTF-8
+VignetteBuilder: knitr
+BugReports: https://github.com/CDCgov/seqsender/issues
+
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC
new file mode 100644
index 0000000..ebabc70
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC
@@ -0,0 +1,26 @@
+{
+ "id": "B451F5CC",
+ "path": "~/Github/seqsender/vignettes/local_installation.Rmd",
+ "project_path": "vignettes/local_installation.Rmd",
+ "type": "r_markdown",
+ "hash": "3358203343",
+ "contents": "",
+ "dirty": false,
+ "created": 1707156979196.0,
+ "source_on_save": false,
+ "relative_order": 6,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "159,7",
+ "scrollLine": "148"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707512277,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707512277383,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC-contents
new file mode 100644
index 0000000..e7f9e29
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B451F5CC-contents
@@ -0,0 +1,544 @@
+---
+output: rmarkdown::html_document
+title: "How to run seqsender locally"
+vignette: >
+ %\VignetteIndexEntry{How to run seqsender locally}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(yaml) # for yaml file
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+```
+
+
+
+**SOFTWARE REQUIREMENTS:**
+
+- Linux (64-bit) or Mac OS X (64-bit)
+- Git version 2.25.1 or later
+- Standard utilities: curl, tar, unzip
+
+**ADDITIONAL REQUIREMENTS:**
+
+See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) and [REQUIREMENT FILES](`r github_pages_url`/index.html#requirement-files) before proceeding to the next steps
+
+## Micromamba Installation
+
+Here we recommend using **micromamba** to set up a virtual environment to run ``r program``. **Micromamba** is a tiny, statically linked C++ reimplementation of mamba which is an alternative to conda. The tool works as a standalone package manager that supports a subset of all mamba or conda commands, but it also has its own separate command line interfaces. For more information, visit [micromamba documentation](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html).
+
+To manually install, download and unzip the executable from the official **conda-forge** package to your `$HOME` directory using `tar`.
+
+```bash
+cd $HOME
+```
+
+- LINUX
+
+```bash
+# Linux Intel (x86_64):
+curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+# Linux ARM64:
+curl -Ls https://micro.mamba.pm/api/micromamba/linux-aarch64/latest | tar -xvj bin/micromamba
+# Linux Power:
+curl -Ls https://micro.mamba.pm/api/micromamba/linux-ppc64le/latest | tar -xvj bin/micromamba
+```
+
+- macOS
+
+```bash
+# macOS Intel (x86_64):
+curl -Ls https://micro.mamba.pm/api/micromamba/osx-64/latest | tar -xvj bin/micromamba
+# macOS Silicon/M1 (ARM64):
+curl -Ls https://micro.mamba.pm/api/micromamba/osx-arm64/latest | tar -xvj bin/micromamba
+```
+
+After the extraction is completed, you can find the executable at `$HOME/bin/micromamba`
+
+- To quickly use `micromamba`, you can simply run
+
+```bash
+export MAMBA_ROOT_PREFIX="$HOME/micromamba"
+eval "$($HOME/bin/micromamba shell hook -s posix)"
+```
+
+- To persist using `micromamba`, you can append the following script to your `.bashrc` (or `.zshrc`)
+
+```bash
+# >>> mamba initialize >>>
+export MAMBA_EXE="$HOME/bin/micromamba";
+export MAMBA_ROOT_PREFIX="$HOME/micromamba";
+__mamba_setup="$("$MAMBA_EXE" shell hook --shell bash --root-prefix "$MAMBA_ROOT_PREFIX" 2> /dev/null)"
+if [ $? -eq 0 ]; then
+ eval "$__mamba_setup"
+else
+ alias micromamba="$MAMBA_EXE" # Fallback on help from mamba activate
+fi
+unset __mamba_setup
+# <<< mamba initialize <<<
+```
+
+- To check the current version of `micromamba`
+
+```bash
+micromamba --version
+1.5.6
+```
+
+## Set up a `micromamba` environment
+
+1. Clone this repository to your `$HOME` directory
+
+```bash
+cd $HOME
+git clone `r github_repo`.git
+```
+
+2. `CD` to **seqsender** folder where the `env.yaml` file is stored. Let's create a virtual environment named **mamba** that contains all dependencies needed to run ``r program`` from the source file.
+
+```bash
+cd seqsender
+micromamba create --name mamba --file env.yaml
+```
+
+![](images/micromamba-env.png)
+
+3. Activate the named environment -- **mamba**
+
+```bash
+micromamba activate mamba
+```
+
+## Run ``r program`` within the `mamba` environment
+
+First, let's look a list of commands in ``r program``. Currently, there are five implemented commands in ``r program``: `prep`, `submit`, `check_submission_status`, `template`, `version`.
+
+```bash
+python seqsender.py --help
+```
+
+![](images/seqsender.png)
+
+
+To see the arguments required for each command, for example, the `submit` command, run
+
+```bash
+python seqsender.py submit --help
+```
+
+![](images/seqsender-submit.png)
+
+
+## Submit a `test` submission
+
+Rather than hastily jump in and submit a `production` submission right away, we can utilize GISAID's and NCBI's **“TEST-SERVER”** to upload a `test` submission first. That way submitter can familiarize themselves with the submission process prior to make a real submission.
+
+**Note:** Duplicate test submissions will result in an error. Please create new sequence names each time you plan to run test submissions to avoid this issue.
+
+### Submit a `test` submission with a pre-processed dataset
+
+
+
+Here we will go over the steps of preparing and batch uploading meta- and sequence-data to GISAID and NCBI databases using a pre-processed dataset provided with the software.
+
+The `template` command will allow you to output examples of metadata and config files so you can base your submission on prior to upload a real submission. To get more help on the command, run
+
+```bash
+python seqsender.py template --help
+```
+
+![](images/seqsender-template.png)
+
+
+#### 1. Download the pre-processed meta- and sequence-data
+
+```bash
+python seqsender.py template \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission
+```
+
+- **`--organism`** specifies the type of data to download. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options. Additional datasets for other organisms will be provided in future updates or requests.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to generate an unified meta- and sequence-data into one file so we can perform batch upload to all databases simultaneously.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+
+A quick look at the output files:
+
+![](images/submission_dir.png)
+
+Here is the standard out of the command.
+
+```bash
+Generating submission template
+Files are stored at: /home/snu3/flu-test-submission
+
+Total runtime (HRS:MIN:SECS): 0:00:00.115140
+```
+
+
+#### 2. Set up the config file -- `config.yaml`
+
+After the template is downloaded in `(1)`, you can find `config.yaml` in your local `$HOME/flu-test-submission` directory. The `config.yaml` yaml file provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+Open that file with a text editor of your choice and fill in the appropriate information about your submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of the database in which we will submit first. For instance, if GISAID is set as `1`, ``r program`` will submit to GISAID first, then after all samples are assigned with a GISAID accession number, ``r program`` will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases after submission.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/articles/index.html#prerequisites) for more details.
+:::
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **ADDITIONAL REQUIREMENTS:**
+
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+A quick look of where to store the downloaded **GISAID CLI** package,
+
+![](images/gisaid_cli_dir.png)
+
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 3. Upload a test submission
+
+```bash
+python seqsender.py submit \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `python seqsender.py submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 4. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+python seqsender.py check_submission_status \
+--organism FLU \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+Before you can perform a `test` submission with your own dataset, make sure you have the required files (such as **config.yaml**, **metadata.csv**, **sequence.fasta**, **raw reads**, etc.) already prepared and stored in the submission directory of your choice.
+
+
+
+#### 1. Assemble your meta- and sequence-data
+
+(a) To prep for FLU submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+(b) To prep for COV submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+After you have finished prepping for your database of choices in `(a)` or `(b)`, create a submission folder and store all your metadata and sequence files there.
+
+Here is a quick look at the folder structure
+
+![](images/submission_dir.png)
+
+Finally, make sure additional requirements below are met before you can proceed to the next steps.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+Here is an example of where to place the **GISAID CLI** package.
+
+![](images/gisaid_cli_dir.png)
+
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 2. Upload a test submission
+
+After all files are (i) are prepared, we can go ahead and upload the submission
+
+```bash
+python seqsender.py submit \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `python seqsender.py submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 3. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+python seqsender.py check_submission_status \
+--organism FLU \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
+
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01 b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01
new file mode 100644
index 0000000..d4dc436
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01
@@ -0,0 +1,26 @@
+{
+ "id": "B5A31C01",
+ "path": "~/Github/seqsender/.gitignore",
+ "project_path": ".gitignore",
+ "type": "gitignore",
+ "hash": "0",
+ "contents": "",
+ "dirty": false,
+ "created": 1707162178817.0,
+ "source_on_save": false,
+ "relative_order": 13,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "18,0",
+ "scrollLine": "0"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707163152,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707163152318,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01-contents
new file mode 100644
index 0000000..ebbfb40
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/B5A31C01-contents
@@ -0,0 +1,18 @@
+config_files/test_config_*yaml
+~$*
+__pycache__
+__pycache__/biosample_sra_submission.cpython-36.pyc
+__pycache__/genbank_submission.cpython-36.pyc
+__pycache__/gisaid_submission.cpython-36.pyc
+__pycache__/submission_preparation.cpython-36.pyc
+test_input/~$st Submission Instructions.docx
+submit.ready
+*report.xml
+test_input/test_metadata.tsv
+upload_log.csv
+*.vscode
+*.Rproj
+.Rproj.user
+.Rhistory
+.Rbuildignore
+docker-compose-*.yaml
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B b/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B
new file mode 100644
index 0000000..cf9d6c9
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B
@@ -0,0 +1,26 @@
+{
+ "id": "BF48D78B",
+ "path": "~/Github/seqsender/vignettes/singularity_installation.Rmd",
+ "project_path": "vignettes/singularity_installation.Rmd",
+ "type": "r_markdown",
+ "hash": "562811111",
+ "contents": "",
+ "dirty": false,
+ "created": 1707157073615.0,
+ "source_on_save": false,
+ "relative_order": 7,
+ "properties": {
+ "source_window_id": "",
+ "Source": "Source",
+ "cursorPosition": "135,7",
+ "scrollLine": "118"
+ },
+ "folds": "",
+ "lastKnownWriteTime": 1707512279,
+ "encoding": "UTF-8",
+ "collab_server": "",
+ "source_window": "",
+ "last_content_update": 1707512279812,
+ "read_only": false,
+ "read_only_alternatives": []
+}
\ No newline at end of file
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B-contents
new file mode 100644
index 0000000..760653c
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/BF48D78B-contents
@@ -0,0 +1,540 @@
+---
+output: rmarkdown::html_document
+title: "How to run seqsender with Singularity"
+vignette: >
+ %\VignetteIndexEntry{How to run seqsender with Singularity"}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(yaml) # for yaml file
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+
+# Get the docker image
+docker_image <- description$Docker
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+```
+
+
+
+**SOFTWARE REQUIREMENTS:**
+
+- Linux (64-bit) or Mac OS X (64-bit)
+- Git version 2.25.1 or later
+- Singularity version 3.8.7 or later
+- Standard utilities: curl, tar, unzip
+
+**ADDITIONAL REQUIREMENTS:**
+
+See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) and [REQUIREMENT FILES](`r github_pages_url`/index.html#requirement-files) before proceeding to the next steps
+
+
+### (1) Convert `r program` Docker image into a Singularity image
+
+There is a ``r program`` Docker image already built and stored on our DockerHub registry: **`r docker_image`**. You can directly pull the Docker Image down from the registry, convert it into a Singularity image, and store it in a destination of your choice.
+
+``` bash
+singularity build ~/singularity/seqsender.sif docker://cdcgov/seqsender-dev:latest
+```
+
+### (2) After the Singularity image is built successfully, we can go ahead and use it to run ``r program``.
+
+Here is the command that shows the help messages of ``r program``
+
+``` bash
+mkdir ~/singularity
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff --help
+```
+
+Below is the standard out of the command.
+
+``` bash
+usage: `r program`.py [-h]
+ {prep,submit,check_submission_status,template,version} ...
+
+Automate the process of batch uploading consensus sequences and metadata to
+databases of your choices
+
+positional arguments:
+ {prep,submit,check_submission_status,template,version}
+
+optional arguments:
+ -h, --help show this help message and exit
+```
+
+To see the arguments required for each command, for example, the `submit` command, run
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff submit --help
+```
+
+```bash
+usage: seqsender.py submit [-h] [--biosample] [--sra] [--genbank] [--gisaid]
+ --organism {FLU,COV} --submission_name
+ SUBMISSION_NAME --submission_dir SUBMISSION_DIR
+ --config_file CONFIG_FILE --metadata_file
+ METADATA_FILE --fasta_file FASTA_FILE [--table2asn]
+ [--gff_file GFF_FILE] [--test]
+
+Create submission files and then batch uploading them to databases of choices.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --biosample, -b Submit to BioSample database. (default: )
+ --sra, -s Submit to SRA database. (default: )
+ --genbank, -n Submit to Genbank database. (default: )
+ --gisaid, -g Submit to GISAID database. (default: )
+ --organism {FLU,COV} Type of organism data (default: FLU)
+ --submission_name SUBMISSION_NAME
+ Name of the submission (default: None)
+ --submission_dir SUBMISSION_DIR
+ Directory to where all required files (such as
+ metadata, fasta, etc.) are stored (default: None)
+ --config_file CONFIG_FILE
+ Config file stored in submission directory (default:
+ None)
+ --metadata_file METADATA_FILE
+ Metadata file stored in submission directory (default:
+ None)
+ --fasta_file FASTA_FILE
+ Fasta file stored in submission directory (default:
+ None)
+ --table2asn Whether to prepare a Table2asn submission. (default:
+ False)
+ --gff_file GFF_FILE An annotation file to add to a Table2asn submission
+ (default: None)
+ --test Whether to perform a test submission. (default: False)
+```
+
+### (3) Submit a `test` submission
+
+Rather than hastily jump in and submit a `production` submission right away, we can utilize GISAID's and NCBI's **“TEST-SERVER”** to upload a `test` submission first. That way submitter can familiarize themselves with the submission process prior to make a real submission.
+
+**Note:** Duplicate test submissions will result in an error. Please create new sequence names each time you plan to run test submissions to avoid this issue.
+
+### Submit a `test` submission with a pre-processed dataset
+
+
+
+Here we will go over the steps of preparing and batch uploading meta- and sequence-data to GISAID and NCBI databases using a pre-processed dataset provided with the software.
+
+The `template` command will allow you to output examples of metadata and config files so you can base your submission on prior to upload a real submission. To get more help on the command, run
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff template --help
+```
+
+```bash
+usage: seqsender.py template [-h] [--biosample] [--sra] [--genbank] [--gisaid]
+ --organism {FLU,COV} --submission_dir
+ SUBMISSION_DIR --submission_name SUBMISSION_NAME
+
+Return a set of files (e.g., config file, metadata file, fasta files, etc.)
+that are needed to make a submission
+
+optional arguments:
+ -h, --help show this help message and exit
+ --biosample, -b Submit to BioSample. (default: )
+ --sra, -s Submit to SRA. (default: )
+ --genbank, -n Submit to Genbank. (default: )
+ --gisaid, -g Submit to GISAID. (default: )
+ --organism {FLU,COV} Type of organism data (default: FLU)
+ --submission_dir SUBMISSION_DIR
+ Directory to where all required files (such as
+ metadata, fasta, etc.) are stored (default: None)
+ --submission_name SUBMISSION_NAME
+ Name of the submission (default: None)
+```
+
+
+
+#### 1. Download the pre-processed meta- and sequence-data
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff template \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission
+```
+
+- **`--organism`** specifies the type of data to download. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options. Additional datasets for other organisms will be provided in future updates or requests.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to generate an unified meta- and sequence-data into one file so we can perform batch upload to all databases simultaneously.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+
+A quick look at the output files:
+
+![](images/submission_dir.png)
+
+Here is the standard out of the command.
+
+```bash
+Generating submission template
+Files are stored at: /home/snu3/flu-test-submission
+
+Total runtime (HRS:MIN:SECS): 0:00:00.115140
+```
+
+#### 2. Set up the config file -- `config.yaml`
+
+After the template is downloaded in `(1)`, you can find `config.yaml` in your local `$HOME/flu-test-submission` directory. The `config.yaml` yaml file provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+Open that file with a text editor of your choice and fill in the appropriate information about your submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of the database in which we will submit first. For instance, if GISAID is set as `1`, ``r program`` will submit to GISAID first, then after all samples are assigned with a GISAID accession number, ``r program`` will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases after submission.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/articles/index.html#prerequisites) for more details.
+:::
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **ADDITIONAL REQUIREMENTS:**
+
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+A quick look of where to store the downloaded **GISAID CLI** package,
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 3. Upload a test submission
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+#### 4. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+Before you can perform a `test` submission with your own dataset, make sure you have the required files (such as **config.yaml**, **metadata.csv**, **sequence.fasta**, **raw reads**, etc.) already prepared and stored in the submission directory of your choice.
+
+
+
+#### 1. Assemble your meta- and sequence-data
+
+(a) To prep for FLU submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+(b) To prep for COV submissions, select one of the databases below for more details
+
+> BioSample
+> SRA
+> Genbank
+> GISAID
+> Multiple databases
+
+After you have finished prepping for your database of choices in `(a)` or `(b)`, create a submission folder and store all your metadata and sequence files there.
+
+Here is a quick look at the folder structure
+
+![](images/submission_dir.png)
+
+Finally, make sure additional requirements below are met before you can proceed to the next steps.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+- If **SRA** is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called `raw_reads` inside your submission directory of choice.
+- If **GISAID** is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, **Influenza A Virus** (FLU) or **SARS-COV-2** (COV)) from the GISAID platform and stored them in a subfolder called `gisaid_cli` inside your submission directory of choice.
+
+Here is an example of where to place the **GISAID CLI** package.
+
+![](images/gisaid_cli_dir.png)
+
+_**Important:** Make sure you binary CLI package are executable. To allow executable permissions, run_
+```bash
+chmod a+x
+```
+:::
+
+
+
+#### 2. Upload a test submission
+
+After all files are (i) are prepared, we can go ahead and upload the submission
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff submit \
+--organism FLU \
+-bsng \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--config_file config.yaml \
+--metadata_file metadata.csv \
+--fasta_file sequence.fasta \
+--test
+```
+
+- **`--organism`** specifies the type of data to upload. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`-bsng`** is a combination flag of databases: **Biosample** *(`-b` or `--biosample`)*, **SRA** *(`-s` or `--sra`)*, **Genbank** *(`-n` or `--genbank`)*, and **GISAID** *(`-g` or `--gisaid`)*. This combination flag tells ``r program`` to prep and submit to each given database. See `docker exec -it seqsender bash seqsender-kickoff submit --help` for more details.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--config_file`** is the config file inside the `--submission_name` directory.
+- **`--metadata_file`** is the metadata file inside the `--submission_name` directory.
+- **`--fasta_file`** is the fasta file inside the `--submission_name` directory.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+A quick look at the standard output.
+
+```bash
+Creating submission files for BIOSAMPLE
+Files are stored at: /home/snu3/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /home/snu3/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /home/snu3/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting 'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /home/snu3/flu-test-submission/submission_report_status.csv
+Log file is stored at: /home/snu3/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+```
+
+
+#### 3. Check the status of a submission
+
+After a submission is submitted, you can routinely check the status of the submission.
+
+```bash
+singularity exec ~/singularity/seqsender.sif seqsender-kickoff check_submission_status \
+--organism FLU \
+--submission_dir $HOME \
+--submission_name flu-test-submission \
+--test
+```
+
+- **`--organism`** specifies the type of data. Currently, **Influenza A Virus** (FLU) and **SARS-COV-2** (COV) are the only two options.
+- **`--submission_dir`** is the directory where you store all of the submission histories.
+- **`--submission_name`** is the submission folder inside the `--submission_dir` directory where it contains all necessary files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) in order to make a submission.
+- **`--test`** is used to submit to **“TEST-SERVER ONLY”** . For `production` submission, please remove this flag.
+
+Here is a quick look at the standard output:
+
+```bash
+Checking submission status for:
+
+Submission name: flu-test-submission
+Submission organism: FLU
+Submission type: Test
+
+Submission database: GISAID
+Submission status: processed-ok
+
+Submission database: BIOSAMPLE
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: SRA
+Pulling down report.xml
+Submission status: submitted
+
+Submission database: GENBANK
+Submission status: ---
+
+Total runtime (HRS:MIN:SECS): 0:00:08.213955
+```
+
+Here is a list of submission statuses and its meanings:
+
+> 1. If at least one action has **Processed-error**, submission status is **Processed-error**
+> 2. Otherwise if at least one action has **Processing** state, the whole submission is **Processing**
+> 3. Otherwise, if at least one action has **Queued** state, the whole submission is **Queued**
+> 4. Otherwise, if at least one action has **Deleted** state, the whole submission is **Deleted**
+> 5. If all actions have **Processed-ok**, submission status is **Processed-ok**
+> 6. Otherwise submission status is **Submitted**
+
+
+
+
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
+
+
+
+
diff --git a/.Rproj.user/39CB7C5D/sources/session-644ed55e/C69D906D-contents b/.Rproj.user/39CB7C5D/sources/session-644ed55e/C69D906D-contents
new file mode 100644
index 0000000..3858421
--- /dev/null
+++ b/.Rproj.user/39CB7C5D/sources/session-644ed55e/C69D906D-contents
@@ -0,0 +1,168 @@
+---
+title: "NCBI - SRA"
+output: rmarkdown::html_document
+vignette: >
+ %\VignetteIndexEntry{NCBI - SRA}
+ %\VignetteEngine{knitr::rmarkdown}
+ %\VignetteEncoding{UTF-8}
+---
+
+
+
+
+```{r, include=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
+# R libraries
+library(knitr) # for html table
+library(yaml) # for yaml file
+library(tidyverse) # for pipe
+library(reshape2) # for data manipulation
+
+# Read in the DESCRIPTION file
+description <- yaml::read_yaml("../DESCRIPTION")
+
+# Define variables
+program <- description$Package
+title <- "SRA"
+prefix <- "sra-"
+prefix_examples <- c("sra-loader", "sra-platform")
+portals <- c("NCBI", "NCBI", "NCBI", "GISAID", "GISAID")
+databases <- c("BIOSAMPLE", "SRA", "GENBANK", "FLU", "COV")
+organism <- c("Influenza A Virus", "SARS-COV-2")
+organism_abbrev <- c("FLU", "COV")
+
+# Define github repo
+github_repo <- description$URL
+
+# Define github pages URL
+github_pages_url <- description$GITHUB_PAGES
+
+# Create main config data frame
+main_config_df <- data.frame(
+ portals = portals,
+ databases = databases
+) %>%
+dplyr::filter(
+ databases %in% toupper(!!title)
+)
+
+# Read in data files
+main_config_file <- yaml::read_yaml("../config/main_config.yaml")
+
+# Store all required fields
+metadata_df <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+# Combine all fields in given databases and portals
+for(d in 1:nrow(main_config_df)){
+ #d=1
+ database <- main_config_df$databases[d]
+ portal <- main_config_df$portals[which(main_config_df$databases %in% database)]
+
+ if("COMMON_FIELDS" %in% names(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]])){
+ portal_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$COMMON_FIELDS) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(portal_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+ }
+
+ database_fields <- reshape2::melt(main_config_file$SUBMISSION_PORTAL$PORTAL_NAMES[[portal]]$DATABASE[[database]]) %>%
+ dplyr::transmute(
+ Column_name = gsub("[*&?#]", "", L1),
+ Description = value
+ )
+
+ metadata_df <- metadata_df %>%
+ dplyr::bind_rows(database_fields) %>%
+ dplyr::distinct(.keep_all = TRUE)
+
+}
+```
+
+## Overview
+
+**Sequence Read Archive (SRA)** data, available through multiple cloud providers and NCBI servers, is the largest publicly available repository of high throughput sequencing data. The archive accepts data from all branches of life as well as metagenomic and environmental surveys. **SRA** stores raw sequencing data and alignment information to enhance reproducibility and facilitate new discoveries through data analysis.
+
+Before submitters can upload sequence read archives to **`r title`** database using ``r program``,they must ensure the requirement files (such as `config.yaml`, `metadata.csv`, `sequence.fasta`, `raw reads`, etc.) are already prepared ahead of time and stored them in a submission folder of choice (e.g., `submission_name`) within a parent submission directory (e.g., `submission_dir`). That way ``r program`` will be able to scoop up the necessary files in that folder, generate submission files, and then batch uploading them to the submitting database of choices.
+
+## Requirement files
+
+- [Config file](#config-file) in a `yaml` format
+- [Sequence read archives](#sequence-read-archives) in a `bam/sff/hdf5/fastq` format
+- [Metadata file](#metadata-file) in a `csv` format
+
+A quick look of where to store all of the requirement files
+
+![](images/submission_dir.png)
+
+### Config file
+
+Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow ``r program`` to authenticate the database prior to upload a submission.
+
+![](images/config_file.png)
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- To submit to NCBI only, one can remove the **GISAID Submission (b)** section from the config file. Vice versa, to submit to GISAID only, just remove the **NCBI Submission (a)** section.
+- **Submission_Position** determines the order of databases in which we will submit to first. For instance, if GISAID is set as `1`, **_`r program`_** will submit to GISAID first, then after all samples are assigned with a GISAID accession number, **_`r program`_** will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+- **Username** and **Password** under the **NCBI Submission (b)** section are the credentials used to authenticate the **NCBI FTP Server** (not to mistake with individual NCBI account). See [PRE-REQUISITES](`r github_pages_url`/index.html#prerequisites) for more details.
+:::
+
+### Sequence read archives
+
+Currently, NCBI accepts binary files such as BAM, SFF, and HDF5 formats and text formats such as FASTQ. See [SRA Submit Formats](https://www.ncbi.nlm.nih.gov/sra/docs/submitformats/) for more details.
+
+:::{style="padding: 10px; border: 1px solid blue !important;"}
+ **NOTE:**
+
+- Sequence read archive for all samples must be stored in a subfolder called `raw_reads` inside a submission folder of choice
+:::
+
+
+
+### Metadata file
+
+The metadata worksheet is a comma-delimited (csv) file that contains required attributes that are useful for the rapid analysis and trace back of **`r paste0(organism, collapse=" or ")`** cases.
+
+Here is a short description about the fields in the metadata worksheet.
+
+```{r include=TRUE, echo=FALSE, message=FALSE, warning=FALSE}
+knitr::kable(metadata_df, format = "html", row.names = FALSE, escape = FALSE)
+```
+
+
+
+**NOTE:** The prefix of **“`r prefix`”** is used to identity attributes for **`r title`** submissions
+
+To include additional attributes to **`r title`** submissions, just append ``r prefix`` in front of the desired attributes, e.g. ``r paste0( prefix_examples, collapse=", ")``, etc. See [SRA metadata section](https://www.ncbi.nlm.nih.gov/sra/docs/submitmeta/) for more details.
+
+
+
+
[* You are now ready to install ``r program`` and batch upload your submission*](`r github_pages_url`/articles/local_installation.html)
+
+
+
+Any questions or issues? Please report them on our Github issue tracker.
+
+
+
diff --git a/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..08ff90e
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160579}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/s/chunks.json b/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/s/chunks.json
new file mode 100644
index 0000000..08ff90e
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/24F5CC26-gisaid_flu_submission/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160579}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/289371A6-genbank_submission/1/s/chunks.json b/.Rproj.user/shared/notebooks/289371A6-genbank_submission/1/s/chunks.json
new file mode 100644
index 0000000..38b3f11
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/289371A6-genbank_submission/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160691}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..b8dafdd
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157055}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/s/chunks.json b/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/s/chunks.json
new file mode 100644
index 0000000..b8dafdd
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/5ED2DC74-local_installation/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157055}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..a3db2e2
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157101}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/s/chunks.json b/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/s/chunks.json
new file mode 100644
index 0000000..a3db2e2
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/67CC0C7B-docker_installation/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157101}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..d1a4f71
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157120}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/s/chunks.json b/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/s/chunks.json
new file mode 100644
index 0000000..d1a4f71
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/917137EC-compose_installation/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157120}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..6d07b4a
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157083}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/s/chunks.json b/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/s/chunks.json
new file mode 100644
index 0000000..6d07b4a
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/9FCF16B4-singularity_installation/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707157083}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/39CB7C5D644ed55e/chunks.json b/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/39CB7C5D644ed55e/chunks.json
new file mode 100644
index 0000000..85a5415
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/39CB7C5D644ed55e/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160709}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/s/chunks.json b/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/s/chunks.json
new file mode 100644
index 0000000..85a5415
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/D111BEFE-biosample_submission/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160709}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/D5A86BB8-gisaid_cov_submission/1/s/chunks.json b/.Rproj.user/shared/notebooks/D5A86BB8-gisaid_cov_submission/1/s/chunks.json
new file mode 100644
index 0000000..34b3a00
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/D5A86BB8-gisaid_cov_submission/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160672}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/EAFD2BFE-sra_submission/1/s/chunks.json b/.Rproj.user/shared/notebooks/EAFD2BFE-sra_submission/1/s/chunks.json
new file mode 100644
index 0000000..4990000
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/EAFD2BFE-sra_submission/1/s/chunks.json
@@ -0,0 +1 @@
+{"chunk_definitions":[],"doc_write_time":1707160381}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/patch-chunk-names b/.Rproj.user/shared/notebooks/patch-chunk-names
new file mode 100644
index 0000000..e69de29
diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths
new file mode 100644
index 0000000..4b9eedf
--- /dev/null
+++ b/.Rproj.user/shared/notebooks/paths
@@ -0,0 +1,16 @@
+/home/snu3/Github/seqsender/LICENSE="68DAA5BB"
+/home/snu3/Github/seqsender/README.Rmd="45B22F6D"
+/home/snu3/Github/seqsender/inst/CITATION="94DDA62B"
+/home/snu3/Github/seqsender/vignettes/biosample_submission.Rmd="D111BEFE"
+/home/snu3/Github/seqsender/vignettes/compose_installation.Rmd="917137EC"
+/home/snu3/Github/seqsender/vignettes/docker_installation.Rmd="67CC0C7B"
+/home/snu3/Github/seqsender/vignettes/faqs.Rmd="57260CA3"
+/home/snu3/Github/seqsender/vignettes/genbank_submission.Rmd="289371A6"
+/home/snu3/Github/seqsender/vignettes/gisaid_cov_submission.Rmd="D5A86BB8"
+/home/snu3/Github/seqsender/vignettes/gisaid_flu_submission.Rmd="24F5CC26"
+/home/snu3/Github/seqsender/vignettes/gisaid_options.Rmd="EDA2E52D"
+/home/snu3/Github/seqsender/vignettes/local_installation.Rmd="5ED2DC74"
+/home/snu3/Github/seqsender/vignettes/prerequisites.Rmd="A849AD38"
+/home/snu3/Github/seqsender/vignettes/singularity_installation.Rmd="9FCF16B4"
+/home/snu3/Github/seqsender/vignettes/sra_options.Rmd="25888850"
+/home/snu3/Github/seqsender/vignettes/sra_submission.Rmd="EAFD2BFE"
diff --git a/.gitignore b/.gitignore
index f29e64d..ebbfb40 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,9 @@ submit.ready
*report.xml
test_input/test_metadata.tsv
upload_log.csv
+*.vscode
+*.Rproj
+.Rproj.user
+.Rhistory
+.Rbuildignore
+docker-compose-*.yaml
diff --git a/DESCRIPTION b/DESCRIPTION
index 6a2064f..1a7ddf7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: seqsender
Type: Package
Title: Public Database Submission Pipeline
-Version: 1.0.0
+Version: 1.1.0
Authors@R:
c(
person(given="Dakota", family="Howard", role=c("aut","cre"),
@@ -20,17 +20,17 @@ Authors@R:
email="ynh4@cdc.gov"),
person(given="Brian", family="Lee", role=c("aut"),
email="fya1@cdc.gov"),
- person(given="Ben", family="Rambo-Martin", role=c("aut"),
+ person(given="Benjamin", family="Rambo-Martin", role=c("aut"),
email="nbx0@cdc.gov", comment=c(ORCID="0000-0002-8591-3954"))
)
Description: seqsender is a Python program that is designed to automate the process of generating
- necessary submission files (e.g. submission.xml, submission.zip, etc.)
- and then bulk uploading them via FTP to NCBI archives such as Genbank, BioSample, and SRA.
- Additionally, the program can batch uploading submissions of meta- and sequence-data to GISAID
- using their Command Line Interface Tools (e.g., EpiFlu and EpiCoV CLI).
- Currently, the pipeline is capable of uploading Influenza A Virus and SARS-COV-2 data.
+ necessary submission files (e.g. submission.xml, submission.zip, etc.)
+ and then bulk uploading them via FTP to NCBI archives such as Genbank, BioSample, and SRA.
+ Additionally, the program can batch uploading submissions of meta- and sequence-data to GISAID
+ using their Command Line Interface Tools (e.g., EpiFlu and EpiCoV CLI).
+ Currently, the pipeline is capable of uploading Influenza A Virus and SARS-COV-2 data.
RoxygenNote: 7.2.3
-License: GPL-3 + file LICENSE
+License: Apache License (== 2.0) + file LICENSE
URL: https://github.com/CDCgov/seqsender
GITHUB_PAGES: https://cdcgov.github.io/seqsender
Docker: cdcgov/seqsender-dev:latest
diff --git a/NEWS.md b/NEWS.md
index 59f1eeb..b96c15c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-
-# MIRA 1.0.0
-* Github Repo: https://github.com/CDCgov/MIRA
-* Documentation: https://cdcgov.github.io/MIRA
+
+# seqsender 1.1.0
+* Github Repo: https://github.com/CDCgov/seqsender
+* Documentation: https://cdcgov.github.io/seqsender
diff --git a/README.Rmd b/README.Rmd
index d3047b9..5de7a98 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -68,7 +68,7 @@ Here is a quick look of where to store the downloaded **GISAID CLI** package.
## Requirement Files
-Before submitter can perform a submission using ``r program``, make sure the requirement files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) are already prepared and stored in a submission directory of choice.
+Before submitters can perform a batch submission using ``r program``, they must make sure the requirement files (such as *config.yaml*, *metadata.csv*, *sequence.fasta*, *raw reads*, etc.) are already prepared and stored in a submission directory of choice.
(a) To prep for FLU submissions, select one of the databases below to get started:
diff --git a/README.md b/README.md
index a44b250..b02dd5c 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,14 @@
-
-
-
-
Public Database Submission Pipeline
-
**Beta Version**: 1.1.0. This pipeline is currently in Beta testing, and
issues could appear during submission. Please use it at your own risk.
-Feedback and suggestions are welcome\!
+Feedback and suggestions are welcome!
**General Disclaimer**: This repository was created for use by CDC
programs to collaborate on public health related projects in support of
@@ -36,7 +31,7 @@ organisms in future updates or requests.
## Prerequisites
- - **NCBI Submissions**
+- **NCBI Submissions**
`seqsender` utilizes an UI-Less Data Submission Protocol to bulk upload
submission files (e.g., *submission.xml*, *submission.zip*, etc.) to
@@ -68,11 +63,11 @@ FTP on the command line. Before attempting to submit a submission using
gb-admin@ncbi.nlm.nih.gov
to discuss requirements for submissions.
-5. Coordinate a NCBI namespace name (**spuid\_namespace**) that will be
+5. Coordinate a NCBI namespace name (**spuid_namespace**) that will be
used with Submitter Provided Unique Identifiers (**spuid**) in the
- submission. The liaison of **spuid\_namespace** and **spuid** is
- used to report back assigned accessions as well as for cross-linking
- objects within submission. The values of **spuid\_namespace** are up
+ submission. The liaison of **spuid_namespace** and **spuid** is used
+ to report back assigned accessions as well as for cross-linking
+ objects within submission. The values of **spuid_namespace** are up
to the submitter to decide but they must be unique and
well-coordinated prior to make a submission. For more information
about these two fields, see
@@ -83,9 +78,7 @@ FTP on the command line. Before attempting to submit a submission using
[GENBANK](https://cdcgov.github.io/seqsender/articles/genbank_submission.html#metadata)
metadata requirements.
-
-
- - **GISAID Submissions**
+- **GISAID Submissions**
`seqsender` makes use of GISAID’s Command Line Interface tools to bulk
uploading meta- and sequence-data to GISAID databases. Presently, the
@@ -119,10 +112,10 @@ package.
## Requirement Files
-Before submitter can perform a submission using `seqsender`, make sure
-the requirement files (such as *config.yaml*, *metadata.csv*,
-*sequence.fasta*, *raw reads*, etc.) are already prepared and stored in
-a submission directory of choice.
+Before submitters can perform a batch submission using `seqsender`, they
+must make sure the requirement files (such as *config.yaml*,
+*metadata.csv*, *sequence.fasta*, *raw reads*, etc.) are already
+prepared and stored in a submission directory of choice.
1) To prep for FLU submissions, select one of the databases below to
get started:
@@ -152,14 +145,14 @@ a submission directory of choice.
## Quick Start
- - [How to run seqsender
- locally](https://cdcgov.github.io/seqsender/articles/local_installation.html)
- - [How to run seqsender with
- Docker](https://cdcgov.github.io/seqsender/articles/docker_installation.html)
- - [How to run seqsender with
- Compose](https://cdcgov.github.io/seqsender/articles/compose_installation.html)
- - [How to run seqsender with
- Singularity](https://cdcgov.github.io/seqsender/articles/singularity_installation.html)
+- [How to run seqsender
+ locally](https://cdcgov.github.io/seqsender/articles/local_installation.html)
+- [How to run seqsender with
+ Docker](https://cdcgov.github.io/seqsender/articles/docker_installation.html)
+- [How to run seqsender with
+ Compose](https://cdcgov.github.io/seqsender/articles/compose_installation.html)
+- [How to run seqsender with
+ Singularity](https://cdcgov.github.io/seqsender/articles/singularity_installation.html)
## Public Domain Standard Notice
@@ -237,5 +230,3 @@ repository](https://github.com/CDCgov/template/blob/master/CONTRIBUTING.md),
disclaimers](https://github.com/CDCgov/template/blob/master/DISCLAIMER.md),
and [code of
conduct](https://github.com/CDCgov/template/blob/master/code-of-conduct.md).
-
-test
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 8512501..759910d 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -9,7 +9,7 @@ news:
navbar:
structure:
- right: [github]
+ right: [news, github]
components:
left:
- icon: fa-home
diff --git a/config/main_config.yaml b/config/main_config.yaml
index bb0f952..97393ec 100644
--- a/config/main_config.yaml
+++ b/config/main_config.yaml
@@ -104,6 +104,10 @@ SUBMISSION_PORTAL:
- text: 'E.g., "United Kingdom", "Japan", "China", "United States", etc.'
gs-Host:
- text: 'Host or source name., E.g. "human", "avian", "chicken", "Anas Acuta", "environment", etc.'
+ gs-Collection_Month:
+ - text: 'For incomplete collection dates, use this field instead of "Collection_Date". Month of year: "1" = Jan, "2" = Feb, so forth, "12" = Dec'
+ gs-Collection_Year:
+ - text: 'For incomplete collection dates, use this field instead of "Collection_Date". Four digit year as string: e.g. "2023"'
gs-Originating_Lab_Id:
- text: 'The numeric ID of the sample"s originating laboratory, e.g. "2698"'
COV:
diff --git a/docs/404.html b/docs/404.html
index 22e106b..8e20859 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -1,66 +1,27 @@
-
-
-
-
+
+
+
+
-
Page not found (404) • seqsender
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
-
Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright {yyyy} {name of copyright owner}
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
BioSample is a database containing aggregated information pertaining to reference samples and samples stored in the European Bioinformatics Institute assay databases.
-
Before one can upload experimental samples to BioSample database using seqsender, they must ensure the requirement files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) are prepared ahead of time and stored in a submission directory of choice.
+
+
Overview
+
+
BioSample is a database containing aggregated
+information pertaining to reference samples and samples stored in the European Bioinformatics Institute
+assay databases.
+
Before submitters can upload their experimental samples to
+BioSample database using seqsender, they
+must ensure the requirement files (such as config.yaml,
+metadata.csv, sequence.fasta,
+raw reads, etc.) are already prepared ahead of time and
+stored them in a submission folder of choice (e.g.,
+submission_name) within a parent submission directory
+(e.g., submission_dir). That way seqsender
+will be able to scoop up the necessary files in that folder, generate
+submission files, and then batch uploading them to the submitting
+database of choices.
Config file is a yaml file that provides a brief description about the submission and contains user credentials that allow seqsender to authenticate the database prior to upload a submission.
+
A quick look of where to store all of the requirement files
+
+
+
Config file
+
+
Config file is a yaml file that provides a brief description about
+the submission and contains user credentials that allow
+seqsender to authenticate the database prior to upload a
+submission.
-
` NOTE:
+
+NOTE:
-
To submit to NCBI only, one can remove the GISAID Submission (b) section from the config file. Vice versa, to submit to GISAID only, just remove the NCBI Submission (a) section.
+
To submit to NCBI only, one can remove the GISAID Submission
+(b) section from the config file. Vice versa, to submit to
+GISAID only, just remove the NCBI Submission (a)
+section.
-Submission_Position determines the order of databases in which we will submit to first. For instance, if GISAID is set as Primary, seqsender will submit to GISAID first, then after all samples are assigned with a GISAID accession number, seqsender will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases.
+Submission_Position determines the order of
+databases in which we will submit to first. For instance, if GISAID is
+set as 1, seqsender will submit
+to GISAID first, then after all samples are assigned with a GISAID
+accession number, seqsender will proceed to
+submit to NCBI. This order of submission ensures samples are linked
+correctly between the two databases.
-Username and Password under the NCBI Submission (b) section are the credentials used to authenticate the NCBI FTP Server (not to mistake with individual NCBI account). See PRE-REQUISITES for more details.
+Username and Password under the
+NCBI Submission (b) section are the credentials used to
+authenticate the NCBI FTP Server (not to mistake with
+individual NCBI account). See PRE-REQUISITES
+for more details.
-
-
-Metadata file
-
Here is a short description about the fields in the metadata worksheet.
+
+
Metadata file
+
+
The metadata worksheet is a comma-delimited (csv) file that contains
+required attributes that are useful for the rapid analysis and trace
+back of Influenza A Virus or SARS-COV-2 cases.
+
Here is a short description about the fields in the metadata
+worksheet.
@@ -202,7 +243,8 @@
sequence_name
-Sequence identifier used in fasta file. This is used to create the fasta file for Genbank or GISAID.
+Sequence identifier used in fasta file. This is used to create the fasta
+file for Genbank or GISAID.
@@ -210,7 +252,12 @@
organism
-The most descriptive organism name for the samples. If relevant, you can search the organism name in the NCBI Taxonomy database. For FLU, organism must be “Influenza A Virus”. For COV, organism must be “Severe acute respiratory syndrome coronavirus 2”.
+The most descriptive organism name for the samples. If relevant, you can
+search the organism name in the
+NCBI
+Taxonomy database. For FLU, organism must be “Influenza A
+Virus”. For COV, organism must be “Severe acute respiratory
+syndrome coronavirus 2”.
@@ -218,7 +265,8 @@
collection_date
-The date on which the sample was collected; must be in the ISO format: YYYY-MM-DD. For example: 2020-03-25
+The date on which the sample was collected; must be in the ISO format:
+YYYY-MM-DD. For example: 2020-03-25
@@ -226,7 +274,9 @@
authors
-Citing authors. List of Last, First Middle, suffix separated by a semicolon “;” E.g.: “Baker, Howard Henry, Jr.; Powell, Earl Alexander, III.;”
+Citing authors. List of Last, First Middle, suffix separated by a
+semicolon “;” E.g.: “Baker, Howard Henry, Jr.; Powell, Earl Alexander,
+III.;”
@@ -234,7 +284,9 @@
ncbi-spuid
-Submitter Provided Unique Identifiers. This is used to report back assigned accessions as well as for cross-linking objects within submission.
+Submitter Provided Unique Identifiers. This is used to report back
+assigned accessions as well as for cross-linking objects within
+submission.
@@ -242,7 +294,9 @@
ncbi-spuid_namespace
-If SPUID is used, spuid_namespace has to be provided. The values of spuid_namespace are from controlled vocabulary and need to be coordinated with NCBI prior to submission.
+If SPUID is used, spuid_namespace has to be provided. The values of
+spuid_namespace are from controlled vocabulary and need to be
+coordinated with NCBI prior to submission.
@@ -258,7 +312,8 @@
bs-description
-A brief description about the sample, e.g. SARS-CoV-2 Sequencing Baseline Constellation.
+A brief description about the sample, e.g. SARS-CoV-2 Sequencing
+Baseline Constellation.
@@ -274,7 +329,12 @@
bs-geo_loc_name
-Geographical origin of the sample; use the appropriate name from this list. Use a colon to separate the country or ocean from more detailed information about the location, eg “Canada: Vancouver” or “Germany: halfway down Zugspitze, Alps”. Entering multiple localities in one attribute is not allowed.
+Geographical origin of the sample; use the appropriate name from
+this
+list. Use a colon to separate the country or ocean from more
+detailed information about the location, eg “Canada: Vancouver” or
+“Germany: halfway down Zugspitze, Alps”. Entering multiple localities in
+one attribute is not allowed.
@@ -282,7 +342,8 @@
bs-host
-The natural (as opposed to laboratory) host to the organism from which the sample was obtained. Use the full taxonomic name, eg, Homo sapiens.
+The natural (as opposed to laboratory) host to the organism from which
+the sample was obtained. Use the full taxonomic name, eg, Homo sapiens.
@@ -290,7 +351,11 @@
bs-host_disease
-Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled vocabulary, please see Human Disease Ontology or MeSH
+Name of relevant disease, e.g. Salmonella gastroenteritis. Controlled
+vocabulary, please see
+Human
+Disease Ontology or
+MeSH
@@ -298,7 +363,8 @@
bs-isolate
-Identification or description of the specific individual from which this sample was obtained.
+Identification or description of the specific individual from which this
+sample was obtained.
@@ -306,7 +372,8 @@
bs-isolation_source
-Describes the physical, environmental and/or local geographical source of the biological sample from which the sample was derived.
+Describes the physical, environmental and/or local geographical source
+of the biological sample from which the sample was derived.
@@ -314,21 +381,30 @@
bs-lat_lon
-The geographical coordinates of the location where the sample was collected. Specify as degrees latitude and longitude in format “d[d.dddd] N|S d[dd.dddd] W|E”, eg, 38.98 N 77.11 W
+The geographical coordinates of the location where the sample was
+collected. Specify as degrees latitude and longitude in format
+“d[d.dddd] N|S d[dd.dddd] W|E”, eg, 38.98 N 77.11 W
-
NOTE: The prefix of “bs-” is used to identity attributes for BioSample submissions
-
To include additional attributes to BioSample submissions, just append bs- in front of the desired attributes, e.g. bs-host_age, bs-host_sex, etc. See Pathogen.cl.1.0 package for more attributes.
+
NOTE: The prefix of “bs-” is used
+to identity attributes for BioSample submissions
+
To include additional attributes to BioSample
+submissions, just append bs- in front of the desired
+attributes, e.g. bs-host_age, bs-host_sex, etc. See Pathogen.cl.1.0
+package for more attributes.
NOTE:source is the storage location of your local machine. This location will be mapped to /data directory inside the container. Here we are mounting the local $HOME directory to /data inside the container.
+
cd seqsender
+
Here is a quick look of the docker-compose.yaml
+file:
NOTE:source is the storage
+location of your local machine. This location will be mapped to
+/data directory inside the container. Here we are mounting
+the local $HOME directory to /data inside the
+container.
docker container ps
-
-
-CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
-b37b6b19c4e8 seqsender:latest "/bin/bash" 5 hours ago Up 5 hours seqsender
+
+
(4) Check if the container is running
+
+
docker container ps
+
+
+CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
+b37b6b19c4e8 seqsender:latest "/bin/bash" 5 hours ago Up 5 hours seqsender
-
-
-(5) See a list of commands in seqsender container
-t: allocate a pseudo-tty -i: keep STDIN open even if not attached -h, --help: show help messages and exit
-
usage: seqsender.py [-h]
-{prep,submit,check_submission_status,template,version}...
-
-Automate the process of batch uploading consensus sequences and metadata to
-databases of your choices
-
-positional arguments:
-{prep,submit,check_submission_status,template,version}
-
-optional arguments:
--h, --help show this help message and exit
-
Rather than hastily jump in and submit a production submission right away, we can utilize GISAID’s and NCBI’s “TEST-SERVER” to upload a test submission first. That way submitter can familiarize themselves with the submission process prior to make a real submission.
+
+
(5) See a list of commands in seqsender container
+
-t: allocate a pseudo-tty -i: keep STDIN open even if not attached -h, --help:
+show help messages and exit
+
usage: seqsender.py [-h]
+{prep,submit,check_submission_status,template,version} ...
+
+Automate the process of batch uploading consensus sequences and metadata to
+databases of your choices
+
+positional arguments:
+{prep,submit,check_submission_status,template,version}
+
+optional arguments:
+-h,--help show this help message and exit
+
Rather than hastily jump in and submit a production
+submission right away, we can utilize GISAID’s and NCBI’s
+“TEST-SERVER” to upload a test submission
+first. That way submitter can familiarize themselves with the submission
+process prior to make a real submission.
+
Note: Duplicate test submissions will result in an
+error. Please create new sequence names each time you plan to run test
+submissions to avoid this issue.
Here we will go over the steps of preparing and batch uploading meta- and sequence-data to GISAID and NCBI databases using a pre-processed dataset provided with the software.
-
The template command will allow you to output examples of metadata and config files so you can base your submission on prior to upload a real submission. To get more help on the command, run
usage: seqsender.py template [-h] [--biosample] [--sra] [--genbank] [--gisaid]
- --organism {FLU,COV} --submission_dir
- SUBMISSION_DIR --submission_name SUBMISSION_NAME
-
-Return a set of files (e.g., config file, metadata file, fasta files, etc.)
-that are needed to make a submission
-
-optional arguments:
- -h, --help show this help message and exit
- --biosample, -b Submit to BioSample. (default: )
- --sra, -s Submit to SRA. (default: )
- --genbank, -n Submit to Genbank. (default: )
- --gisaid, -g Submit to GISAID. (default: )
- --organism {FLU,COV} Type of organism data (default: FLU)
- --submission_dir SUBMISSION_DIR
- Directory to where all required files (such as
- metadata, fasta, etc.) are stored (default: None)
- --submission_name SUBMISSION_NAME
- Name of the submission (default: None)
+
Here we will go over the steps of preparing and batch uploading meta-
+and sequence-data to GISAID and NCBI databases using a pre-processed
+dataset provided with the software.
+
The template command will allow you to output examples
+of metadata and config files so you can base your submission on prior to
+upload a real submission. To get more help on the command, run
usage: seqsender.py template [-h][--biosample][--sra][--genbank][--gisaid]
+--organism{FLU,COV}--submission_dir
+SUBMISSION_DIR--submission_name SUBMISSION_NAME
+
+Return a set of files (e.g., config file, metadata file, fasta files, etc.)
+that are needed to make a submission
+
+optional arguments:
+-h,--help show this help message and exit
+--biosample,-b Submit to BioSample. (default:)
+--sra,-s Submit to SRA. (default:)
+--genbank,-n Submit to Genbank. (default:)
+--gisaid,-g Submit to GISAID. (default:)
+--organism{FLU,COV} Type of organism data (default: FLU)
+--submission_dir SUBMISSION_DIR
+Directory to where all required files (such as
+metadata, fasta, etc.)are stored (default: None)
+--submission_name SUBMISSION_NAME
+Name of the submission (default: None)
-
-1. Download the pre-processed meta- and sequence-data
---organism specifies the type of data to download. Currently, Influenza A Virus (FLU) and SARS-COV-2 (COV) are the only two options. Additional datasets for other organisms will be provided in future updates or requests.
+--organism specifies the type of data
+to download. Currently, Influenza A Virus (FLU) and
+SARS-COV-2 (COV) are the only two options. Additional
+datasets for other organisms will be provided in future updates or
+requests.
--bsng is a combination flag of databases: Biosample(-b or --biosample), SRA(-s or --sra), Genbank(-n or --genbank), and GISAID(-g or --gisaid). This combination flag tells seqsender to generate an unified meta- and sequence-data into one file so we can perform batch upload to all databases simultaneously.
+-bsng is a combination flag of
+databases: Biosample(-b or
+--biosample), SRA
+(-s or --sra),
+Genbank(-n or
+--genbank), and GISAID
+(-g or --gisaid). This combination
+flag tells seqsender to generate an unified meta- and
+sequence-data into one file so we can perform batch upload to all
+databases simultaneously.
---submission_dir is the directory where you would like to dump all the submission files (e.g. /data -> our $HOME directory).
+--submission_dir is the directory
+where you store all of the submission histories (e.g. /data
+-> our $HOME directory).
---submission_name is the submission folder inside the --submission_dir directory where it contains all necessary files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) in order to make a submission.
+--submission_name is the submission
+folder inside the --submission_dir directory where it
+contains all necessary files (such as config.yaml,
+metadata.csv, sequence.fasta, raw reads,
+etc.) in order to make a submission.
After the template is downloaded in (1), you can find config.yaml in your local $HOME/flu-test-submission directory. The config.yaml yaml file provides a brief description about the submission and contains user credentials that allow seqsender to authenticate the database prior to upload a submission.
-
Open that file with a text editor of your choice and fill in the appropriate information about your submission.
+
After the template is downloaded in (1), you can find
+config.yaml in your local
+$HOME/flu-test-submission directory. The
+config.yaml yaml file provides a brief description about
+the submission and contains user credentials that allow
+seqsender to authenticate the database prior to upload a
+submission.
+
Open that file with a text editor of your choice and fill in the
+appropriate information about your submission.
-
NOTE:
+
+NOTE:
-
To submit to NCBI only, one can remove the GISAID Submission (b) section from the config file. Vice versa, to submit to GISAID only, just remove the NCBI Submission (a) section.
-
-
-Submission_Position determines the order of the database in which we will submit first. For instance, if GISAID is set as 1, seqsender will submit to GISAID first, then after all samples are assigned with a GISAID accession number, seqsender will proceed to submit to NCBI. This order of submission ensures samples are linked correctly between the two databases after submission.
-
-
-Username and Password under the NCBI Submission (b) section are the credentials used to authenticate the NCBI FTP Server (not to mistake with individual NCBI account). See PRE-REQUISITES for more details.
+
To submit to NCBI only, one can remove the GISAID Submission
+(b) section from the config file. Vice versa, to submit to
+GISAID only, just remove the NCBI Submission (a)
+section.
+
+
+Submission_Position determines the order of the
+database in which we will submit first. For instance, if GISAID is set
+as 1, seqsender will submit to GISAID first,
+then after all samples are assigned with a GISAID accession number,
+seqsender will proceed to submit to NCBI. This order of
+submission ensures samples are linked correctly between the two
+databases after submission.
+
+
+Username and Password under the
+NCBI Submission (b) section are the credentials used to
+authenticate the NCBI FTP Server (not to mistake with
+individual NCBI account). See PRE-REQUISITES
+for more details.
-
ADDITIONAL REQUIREMENTS:
+
+ADDITIONAL REQUIREMENTS:
-
If SRA is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called raw_reads inside your submission directory of choice.
-
If GISAID is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, Influenza A Virus (FLU) or SARS-COV-2 (COV)) from the GISAID platform and stored them in a subfolder called gisaid_cli inside your submission directory of choice.
+
If SRA is in your list of submitting databases, the
+raw reads for all samples must be provided and stored in a subfolder
+called raw_reads inside your submission directory of
+choice.
+
If GISAID is in your list of submitting databases,
+download the CLI package that associated with your organism of interest
+(e.g,
+Influenza A
+Virus (FLU) or
+SARS-COV-2
+(COV)) from the GISAID platform and stored them in a subfolder
+called gisaid_cli inside your submission directory of
+choice.
-
A quick look of where to store the downloaded GISAID CLI package,
+
A quick look of where to store the downloaded GISAID
+CLI package,
-
Important: Make sure you binary CLI package are executable. To allow executable permissions, run
-
chmod a+x <your_gisaid_cli_file>
+
Important: Make sure you binary CLI package are
+executable. To allow executable permissions, run
---organism specifies the type of data to upload. Currently, Influenza A Virus (FLU) and SARS-COV-2 (COV) are the only two options.
+--organism specifies the type of data
+to upload. Currently, Influenza A Virus (FLU) and
+SARS-COV-2 (COV) are the only two options.
--bsng is a combination flag of databases: Biosample(-b or --biosample), SRA(-s or --sra), Genbank(-n or --genbank), and GISAID(-g or --gisaid). This combination flag tells seqsender to prep and submit to each given database. See docker exec -it seqsender bash seqsender-kickoff submit --help for more details.
+-bsng is a combination flag of
+databases: Biosample(-b or
+--biosample), SRA
+(-s or --sra),
+Genbank(-n or
+--genbank), and GISAID
+(-g or --gisaid). This combination
+flag tells seqsender to prep and submit to each given
+database. See
+docker exec -it seqsender bash seqsender-kickoff submit --help
+for more details.
---submission_dir is the directory where you store all of the submission histories (e.g. /data -> our $HOME directory).
+--submission_dir is the directory
+where you store all of the submission histories (e.g. /data
+-> our $HOME directory).
---submission_name is the submission folder inside the --submission_dir directory where it contains all necessary files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) in order to make a submission.
+--submission_name is the submission
+folder inside the --submission_dir directory where it
+contains all necessary files (such as config.yaml,
+metadata.csv, sequence.fasta, raw reads,
+etc.) in order to make a submission.
---config_file is the config file inside the --submission_name directory.
+--config_file is the config file
+inside the --submission_name directory.
---metadata_file is the metadata file inside the --submission_name directory.
+--metadata_file is the metadata file
+inside the --submission_name directory.
---fasta_file is the fasta file inside the --submission_name directory.
+--fasta_file is the fasta file inside
+the --submission_name directory.
---test is used to submit to “TEST-SERVER ONLY” . For production submission, please remove this flag.
+--test is used to submit to
+“TEST-SERVER ONLY” . For production
+submission, please remove this flag.
A quick look at the standard output.
-
Creating submission files for BIOSAMPLE
-Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
-
-Creating submission files for SRA
-Files are stored at: /data/flu-test-submission/submission_files/SRA
-
-Creating submission files for GENBANK
-Files are stored at: /data/flu-test-submission/submission_files/GENBANK
-
-Creating submission files for GISAID
-Files are stored at: /data/flu-test-submission/submission_files/GISAID
-
-Uploading submission files to NCBI-BIOSAMPLE
-Performing a 'Test' submission
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Connecting to NCBI FTP Server
-Submission name: rc-flu-test-submission
-Submitting 'rc-flu-test-submission'
-
-Uploading submission files to NCBI-SRA
-Performing a 'Test' submission
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Connecting to NCBI FTP Server
-Submission name: rc-flu-test-submission
-Submitting 'rc-flu-test-submission'
-
-Uploading submission files to GISAID-FLU
-Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Submission attempt: 1
-Uploading successfully
-Status report is stored at: /data/flu-test-submission/submission_report_status.csv
-Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
-
4. Check the status of a submission
-
After a submission is submitted, you can routinely check the status of the submission.
Creating submission files for BIOSAMPLE
+Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /data/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /data/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /data/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /data/flu-test-submission/submission_report_status.csv
+Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+
+
+
4. Check the status of a submission
+
+
After a submission is submitted, you can routinely check the status
+of the submission.
---organism specifies the type of data. Currently, Influenza A Virus (FLU) and SARS-COV-2 (COV) are the only two options.
+--organism specifies the type of data.
+Currently, Influenza A Virus (FLU) and
+SARS-COV-2 (COV) are the only two options.
---submission_dir is the directory where you store all of the submission histories.
+--submission_dir is the directory
+where you store all of the submission histories.
---submission_name is the submission folder inside the --submission_dir directory where it contains all necessary files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) in order to make a submission.
+--submission_name is the submission
+folder inside the --submission_dir directory where it
+contains all necessary files (such as config.yaml,
+metadata.csv, sequence.fasta, raw reads,
+etc.) in order to make a submission.
---test is used to submit to “TEST-SERVER ONLY” . For production submission, please remove this flag.
+--test is used to submit to
+“TEST-SERVER ONLY” . For production
+submission, please remove this flag.
Before you can perform a test submission with your own dataset, make sure you have the required files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) already prepared and stored in the submission directory of your choice.
+
Before you can perform a test submission with your own
+dataset, make sure you have the required files (such as
+config.yaml, metadata.csv,
+sequence.fasta, raw reads, etc.)
+already prepared and stored in the submission directory of your
+choice.
-
-1. Assemble your meta- and sequence-data
+
+
1. Assemble your meta- and sequence-data
+
-
To prep for FLU submissions, select one of the databases below for more details
+
To prep for FLU submissions, select one of the databases below for
+more details
After you have finished prepping for your database of choices in (a) or (b), create a submission folder and store all your metadata and sequence files there.
+
After you have finished prepping for your database of choices in
+(a) or (b), create a submission folder and
+store all your metadata and sequence files there.
Here is a quick look at the folder structure
-
Finally, make sure additional requirements below are met before you can proceed to the next steps.
+
Finally, make sure additional requirements below are met before you
+can proceed to the next steps.
-
If SRA is in your list of submitting databases, the raw reads for all samples must be provided and stored in a subfolder called raw_reads inside your submission directory of choice.
-
If GISAID is in your list of submitting databases, download the CLI package that associated with your organism of interest (e.g, Influenza A Virus (FLU) or SARS-COV-2 (COV)) from the GISAID platform and stored them in a subfolder called gisaid_cli inside your submission directory of choice.
+
If SRA is in your list of submitting databases, the
+raw reads for all samples must be provided and stored in a subfolder
+called raw_reads inside your submission directory of
+choice.
+
If GISAID is in your list of submitting databases,
+download the CLI package that associated with your organism of interest
+(e.g,
+Influenza A
+Virus (FLU) or
+SARS-COV-2
+(COV)) from the GISAID platform and stored them in a subfolder
+called gisaid_cli inside your submission directory of
+choice.
-
Here is an example of where to place the GISAID CLI package.
+
Here is an example of where to place the GISAID CLI
+package.
-
Important: Make sure you binary CLI package are executable. To allow executable permissions, run
-
chmod a+x <your_gisaid_cli_file>
+
Important: Make sure you binary CLI package are
+executable. To allow executable permissions, run
+
chmod a+x <your_gisaid_cli_binary>
-
2. Upload a test submission
-
After all files are (i) are prepared, we can go ahead and upload the submission
---organism specifies the type of data to upload. Currently, Influenza A Virus (FLU) and SARS-COV-2 (COV) are the only two options.
+--organism specifies the type of data
+to upload. Currently, Influenza A Virus (FLU) and
+SARS-COV-2 (COV) are the only two options.
--bsng is a combination flag of databases: Biosample(-b or --biosample), SRA(-s or --sra), Genbank(-n or --genbank), and GISAID(-g or --gisaid). This combination flag tells seqsender to prep and submit to each given database. See docker exec -it seqsender bash seqsender-kickoff submit --help for more details.
+-bsng is a combination flag of
+databases: Biosample(-b or
+--biosample), SRA
+(-s or --sra),
+Genbank(-n or
+--genbank), and GISAID
+(-g or --gisaid). This combination
+flag tells seqsender to prep and submit to each given
+database. See
+docker exec -it seqsender bash seqsender-kickoff submit --help
+for more details.
---submission_dir is the directory where you store all of the submission histories (e.g. /data -> our $HOME directory).
+--submission_dir is the directory
+where you store all of the submission histories (e.g. /data
+-> our $HOME directory).
---submission_name is the submission folder inside the --submission_dir directory where it contains all necessary files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) in order to make a submission.
+--submission_name is the submission
+folder inside the --submission_dir directory where it
+contains all necessary files (such as config.yaml,
+metadata.csv, sequence.fasta, raw reads,
+etc.) in order to make a submission.
---config_file is the config file inside the --submission_name directory.
+--config_file is the config file
+inside the --submission_name directory.
---metadata_file is the metadata file inside the --submission_name directory.
+--metadata_file is the metadata file
+inside the --submission_name directory.
---fasta_file is the fasta file inside the --submission_name directory.
+--fasta_file is the fasta file inside
+the --submission_name directory.
---test is used to submit to “TEST-SERVER ONLY” . For production submission, please remove this flag.
+--test is used to submit to
+“TEST-SERVER ONLY” . For production
+submission, please remove this flag.
A quick look at the standard output.
-
Creating submission files for BIOSAMPLE
-Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
-
-Creating submission files for SRA
-Files are stored at: /data/flu-test-submission/submission_files/SRA
-
-Creating submission files for GENBANK
-Files are stored at: /data/flu-test-submission/submission_files/GENBANK
-
-Creating submission files for GISAID
-Files are stored at: /data/flu-test-submission/submission_files/GISAID
-
-Uploading submission files to NCBI-BIOSAMPLE
-Performing a 'Test' submission
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Connecting to NCBI FTP Server
-Submission name: rc-flu-test-submission
-Submitting 'rc-flu-test-submission'
-
-Uploading submission files to NCBI-SRA
-Performing a 'Test' submission
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Connecting to NCBI FTP Server
-Submission name: rc-flu-test-submission
-Submitting 'rc-flu-test-submission'
-
-Uploading submission files to GISAID-FLU
-Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
-If this is not a 'Test' submission, interrupts submission immediately.
-
-Submission attempt: 1
-Uploading successfully
-Status report is stored at: /data/flu-test-submission/submission_report_status.csv
-Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
-
3. Check the status of a submission
-
After a submission is submitted, you can routinely check the status of the submission.
Creating submission files for BIOSAMPLE
+Files are stored at: /data/flu-test-submission/submission_files/BIOSAMPLE
+
+Creating submission files for SRA
+Files are stored at: /data/flu-test-submission/submission_files/SRA
+
+Creating submission files for GENBANK
+Files are stored at: /data/flu-test-submission/submission_files/GENBANK
+
+Creating submission files for GISAID
+Files are stored at: /data/flu-test-submission/submission_files/GISAID
+
+Uploading submission files to NCBI-BIOSAMPLE
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting'flu-test-submission'
+
+Uploading submission files to NCBI-SRA
+Performing a 'Test' submission
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Connecting to NCBI FTP Server
+Submission name: flu-test-submission
+Submitting'flu-test-submission'
+
+Uploading submission files to GISAID-FLU
+Performing a 'Test' submission with Client-Id: TEST-EA76875B00C3
+If this is not a 'Test' submission, interrupts submission immediately.
+
+Submission attempt: 1
+Uploading successfully
+Status report is stored at: /data/flu-test-submission/submission_report_status.csv
+Log file is stored at: /data/flu-test-submission/submission_files/GISAID/gisaid_upload_log_attempt_1.txt
+
+
+
3. Check the status of a submission
+
+
After a submission is submitted, you can routinely check the status
+of the submission.
---organism specifies the type of data. Currently, Influenza A Virus (FLU) and SARS-COV-2 (COV) are the only two options.
+--organism specifies the type of data.
+Currently, Influenza A Virus (FLU) and
+SARS-COV-2 (COV) are the only two options.
---submission_dir is the directory where you store all of the submission histories (e.g. /data -> our $HOME directory).
+--submission_dir is the directory
+where you store all of the submission histories (e.g. /data
+-> our $HOME directory).
---submission_name is the submission folder inside the --submission_dir directory where it contains all necessary files (such as config.yaml, metadata.csv, sequence.fasta, raw reads, etc.) in order to make a submission.
+--submission_name is the submission
+folder inside the --submission_dir directory where it
+contains all necessary files (such as config.yaml,
+metadata.csv, sequence.fasta, raw reads,
+etc.) in order to make a submission.
---test is used to submit to “TEST-SERVER ONLY” . For production submission, please remove this flag.
+--test is used to submit to
+“TEST-SERVER ONLY” . For production
+submission, please remove this flag.