[
    {
        "id": "fastStructure",
        "name": "fastStructure",
        "article": "Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589",
        "website": "https:\/\/github.com\/jashapiro\/fastStructure\/tree\/py3",
        "git": "https:\/\/github.com\/jashapiro\/fastStructure",
        "description": "A fast algorithm for inferring population structure from large SNP genotype data",
        "version": "1.0",
        "documentation": "",
        "multiqc": "custom",
        "commands": [
            {
                "name": "fastStructure",
                "command": "structure.py",
                "category": "genet_pop",
                "output_dir": "fastStructure",
                "inputs": [
                    {
                        "name": "vcf",
                        "type": "vcf_file",
                        "file": "",
                        "description": "Multisample vcf file"
                    },
                    {
                        "name": "popmap_file",
                        "type": "popmap",
                        "file": "",
                        "description": "Path to tsv file with samples group"
                    }
                ],
                "outputs": [
                    {
                        "name": "chooseK",
                        "type": "tsv",
                        "file": "chooseK.txt",
                        "description": "choosek on the posterior mean of admixture proportions for the range of K"
                    }
                ],
                "options": [
                    {
                        "name": "fastStructure_threads",
                        "prefix": "--threads",
                        "value": 16,
                        "min": 1,
                        "max": 64,
                        "step": 1,
                        "label": "Threads to use",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_Kmin",
                        "prefix": "-K",
                        "value": 2,
                        "min": 2,
                        "max": 64,
                        "step": 1,
                        "label": "number of populations range min value to test",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_Kmax",
                        "prefix": "-K",
                        "value": 4,
                        "min": 2,
                        "max": 64,
                        "step": 1,
                        "label": "number of populations range max value to test",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_cv",
                        "prefix": "--cv",
                        "value": 0,
                        "min": 0,
                        "max": 5,
                        "step": 1,
                        "label": "number of test sets for cross-validation, 0 implies no CV step",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_prior",
                        "prefix": "--prior",
                        "type": "select",
                        "choices": [
                            {
                                "simple": "simple"
                            },
                            {
                                "logistic": "logistic"
                            }
                        ],
                        "value": "simple",
                        "label": "perform inference for the simplest, independent-loci, admixture model, with this prior"
                    },
                    {
                        "name": "fastStructure_tol",
                        "prefix": "--tol",
                        "type": "select",
                        "choices": [
                            {
                                "1e_6": "1e-6"
                            },
                            {
                                "1e_7": "1e-7"
                            },
                            {
                                "1e_8": "1e-8"
                            }
                        ],
                        "value": "1e-6",
                        "label": "convergence criterion"
                    }
                ]
            }
        ],
        "prepare_report_script": "fastStructure.prepare.report.R",
        "prepare_report_outputs": [
            "distruct_plot_mqc.png"
        ],
        "install": [],
        "citations": {
            "SeqArray": [
                "Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093\/bioinformatics\/btx145."
            ],
            "numpy": [
                "S. van der Walt, S. C. Colbert and G. Varoquaux, 'The NumPy Array: A Structure for Efficient Numerical Computation,' in Computing in Science & Engineering, vol. 13, no. 2, pp. 22-30, March-April 2011, doi: 10.1109\/MCSE.2011.37."
            ],
            "scipy": [
                "Virtanen, P., Gommers, R., Oliphant, T.E. et al. SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat Methods 17, 261-272 (2020). https:\/\/doi.org\/10.1038\/s41592-019-0686-2"
            ],
            "matplotlib": [
                "J. D. Hunter, 'Matplotlib: A 2D Graphics Environment,' in Computing in Science & Engineering, vol. 9, no. 3, pp. 90-95, May-June 2007, doi: 10.1109\/MCSE.2007.55."
            ],
            "fastStructure": [
                "Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589. "
            ],
            "vcftools": [
                "The Variant Call Format and VCFtools, Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, Bioinformatics, 2011 http:\/\/dx.doi.org\/10.1093\/bioinformatics\/btr330"
            ],
            "plink": [
                " Second-generation PLINK: rising to the challenge of larger and richer datasets. Christopher C Chang, Carson C Chow, Laurent CAM Tellier, Shashaank Vattikuti, Shaun M Purcell, James J Lee. GigaScience, Volume 4, Issue 1, December 2015, 13742-015-0047-8"
            ],
            "structure_threader": [
                " PINA\u2010MARTINS, Francisco, SILVA, Diogo N., FINO, Joana, et al. Structure_threader: an improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular ecology resources, 2017, vol. 17, no 6, p. e268-e274."
            ]
        },
        "yaml": "{\n  id: fastStructure,\n  name: fastStructure,\n  article: \"Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589\",\n  website: \"https:\/\/github.com\/jashapiro\/fastStructure\/tree\/py3\",\n  git: \"https:\/\/github.com\/jashapiro\/fastStructure\",\n  description: \"A fast algorithm for inferring population structure from large SNP genotype data\",\n  version: \"1.0\",\n  documentation: \"\",\n  multiqc: \"custom\",\n  commands:\n    [ \n      {\n        name: fastStructure,\n        command: \"structure.py\",\n        category: \"genet_pop\",\n        output_dir: fastStructure,\n        inputs: [\n                 { name: vcf, type: \"vcf_file\", file: \"\",description: \"Multisample vcf file\"} ,\n                 { name: popmap_file, type: \"popmap\", file: \"\", description: \"Path to tsv file with samples group\"}\n                ],\n        outputs: [\n          { name: chooseK, type: \"tsv\", file: \"chooseK.txt\", description: \"choosek on the posterior mean of admixture proportions for the range of K\" },\n          #{ name: distructPlot, type: \"svg\", file: \"distruct_plot_mqc.svg\", description: \"expected admixture proportions plot\" },\n        ],\n        options: [\n          {\n              name: \"fastStructure_threads\",\n              prefix: \"--threads\",\n              value: 16,\n              min: 1,\n              max: 64,\n              step: 1,\n              label: \"Threads to use\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_Kmin\",\n              prefix: \"-K\",\n              value: 2,\n              min: 2,\n              max: 64,\n              step: 1,\n              label: \"number of populations range min value to test\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_Kmax\",\n              prefix: \"-K\",\n              value: 4,\n              min: 2,\n              max: 64,\n              step: 1,\n              label: \"number of populations range max value to test\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_cv\",\n              prefix: \"--cv\",\n              value: 0,\n              min: 0,\n              max: 5,\n              step: 1,\n              label: \"number of test sets for cross-validation, 0 implies no CV step\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_prior\",\n              prefix: \"--prior\",\n              type: select,\n              choices: [\n                simple: \"simple\",\n                logistic: \"logistic\"\n              ],\n              value: \"simple\",\n              label: \"perform inference for the simplest, independent-loci, admixture model, with this prior\",\n          },  \n          {\n              name: \"fastStructure_tol\",\n              prefix: \"--tol\",\n              type: select,\n              choices: [\n                1e_6: \"1e-6\",\n                1e_7: \"1e-7\",\n                1e_8: \"1e-8\",\n              ],\n              value: \"1e-6\",\n              label: \"convergence criterion\",\n          },  \n        ],\n      },\n    ],\n  prepare_report_script:  fastStructure.prepare.report.R,\n  prepare_report_outputs: [\n    distruct_plot_mqc.png,\n  ],\n  install:\n    {\n      # numpy: [\n      #   \"pip3 install numpy\"\n      # ],\n      # scipy: [\n      #   \"pip3 install scipy\"\n      # ],\n      # matplotlib: [\n      #   \"pip3 install matplotlib\"\n      # ],\n      # cython: [\n      #   \"pip3 uninstall -y cython; pip3 install Cython==0.27.3\"\n      # ],\n      # gsl: [\n      #   \"wget http:\/\/gnu.mirror.vexxhost.com\/gsl\/gsl-2.4.tar.gz && tar -zxvf gsl-2.4.tar.gz\",\n      #   \"cd gsl-2.4\",\n      #   \".\/configure\",\n      #   \"make\",\n      #   \"sudo make install\",\n      #   \"cd ..\",\n      #   \"sudo rm -R gsl-2.4.tar.gz gsl-2.4\",\n      #   \"ln -s \/usr\/local\/lib\/libgsl* \/usr\/lib\/\"\n      # ],\n      # fastStructure: [\n      #   \"cd \/opt\/biotools\/bin \",\n      #   \"git clone https:\/\/github.com\/jashapiro\/fastStructure.git \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure && git checkout py3 \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure\/vars \",\n      #   \"python3 setup.py build_ext -f --inplace \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure \",\n      #   \"python3 setup.py build_ext -f  --inplace \",\n      #   \" sed -i '2iimport matplotlib as mpl' \/opt\/biotools\/bin\/fastStructure\/distruct.py\",\n      #   \" sed -i '3impl.use(\\\"svg\\\")' \/opt\/biotools\/bin\/fastStructure\/distruct.py \",\n      #   \"ENV PATH \/opt\/biotools\/bin\/fastStructure\",\n      #   \"pip3 install Cython --upgrade\"\n      # ],\n      # popHelper: [\n      #    'Rscript -e ''devtools::install_github(\"royfrancis\/pophelper\", Ncpus=8, upgrade =\"never\")'' '\n      # ],\n      # vcftools: [\n      #     \"apt-get install -y vcftools\"\n      #   ],\n      # plink: [\n      #     \"cd \/opt\/biotools\/bin\",\n      #     \"wget http:\/\/s3.amazonaws.com\/plink1-assets\/plink_linux_x86_64_20190304.zip \",\n      #     \"unzip plink_linux_x86_64_20190304.zip \",\n      #     \"rm -f plink_linux_x86_64_20190304.zip \"\n      # ]  \n      \n    },\n  citations:\n    {\n     SeqArray: [\n       \"Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093\/bioinformatics\/btx145.\" \n       ],\n     numpy: [\n      \"S. van der Walt, S. C. Colbert and G. Varoquaux, 'The NumPy Array: A Structure for Efficient Numerical Computation,' in Computing in Science & Engineering, vol. 13, no. 2, pp. 22-30, March-April 2011, doi: 10.1109\/MCSE.2011.37.\"\n     ],\n     scipy: [\n       \"Virtanen, P., Gommers, R., Oliphant, T.E. et al. SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat Methods 17, 261-272 (2020). https:\/\/doi.org\/10.1038\/s41592-019-0686-2\"\n     ],\n     matplotlib: [\n      \"J. D. Hunter, 'Matplotlib: A 2D Graphics Environment,' in Computing in Science & Engineering, vol. 9, no. 3, pp. 90-95, May-June 2007, doi: 10.1109\/MCSE.2007.55.\"\n     ],\n    fastStructure: [\n      \"Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589. \"\n    ],\n    vcftools: [\n      \"The Variant Call Format and VCFtools, Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, Bioinformatics, 2011 http:\/\/dx.doi.org\/10.1093\/bioinformatics\/btr330\"\n    ],\n    plink: [\n    \" Second-generation PLINK: rising to the challenge of larger and richer datasets. Christopher C Chang, Carson C Chow, Laurent CAM Tellier, Shashaank Vattikuti, Shaun M Purcell, James J Lee. GigaScience, Volume 4, Issue 1, December 2015, 13742-015-0047-8\"\n    ],\n    structure_threader: [\n    \" PINA\u2010MARTINS, Francisco, SILVA, Diogo N., FINO, Joana, et al. Structure_threader: an improved method for automation and parallelization of programs structure, fastStructure and MavericK on multicore CPU systems. Molecular ecology resources, 2017, vol. 17, no 6, p. e268-e274.\"\n    ]\n    },\n}\n"
    }
]