[
    {
        "id": "fastStructure_gds",
        "name": "fastStructure_gds",
        "article": "Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589",
        "website": "https:\/\/github.com\/jashapiro\/fastStructure\/tree\/py3",
        "git": "https:\/\/github.com\/jashapiro\/fastStructure",
        "description": "A fast algorithm for inferring population structure from large SNP genotype data",
        "version": "",
        "documentation": "",
        "multiqc": "custom",
        "commands": [
            {
                "name": "fastStructure_gds",
                "command": " structure.py",
                "category": "genet_pop",
                "output_dir": "fastStructure_gds",
                "inputs": [
                    {
                        "name": "gds",
                        "type": "gds",
                        "file": "",
                        "description": "Multisample gds file"
                    },
                    {
                        "name": "popmap_file",
                        "type": "popmap",
                        "file": "",
                        "description": "Path to tsv file with samples group"
                    }
                ],
                "outputs": [
                    {
                        "name": "chooseK",
                        "type": "tsv",
                        "file": "chooseK.txt",
                        "description": "choosek on the posterior mean of admixture proportions for the range of K"
                    }
                ],
                "options": [
                    {
                        "name": "fastStructure_gds_threads",
                        "prefix": "--threads",
                        "value": 16,
                        "min": 1,
                        "max": 64,
                        "step": 1,
                        "label": "Threads to use",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_gd",
                        "max": 64,
                        "step": 1,
                        "label": "number of populations range min value to test",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_gds_Kmax",
                        "prefix": "-K",
                        "value": 4,
                        "min": 2,
                        "max": 64,
                        "step": 1,
                        "label": "number of populations range max value to test",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_gds_cv",
                        "prefix": "--cv",
                        "value": 0,
                        "min": 0,
                        "max": 5,
                        "step": 1,
                        "label": "number of test sets for cross-validation, 0 implies no CV step",
                        "type": "numeric"
                    },
                    {
                        "name": "fastStructure_gds_prior",
                        "prefix": "--prior",
                        "type": "select",
                        "choices": [
                            {
                                "simple": "simple"
                            },
                            {
                                "logistic": "logistic"
                            }
                        ],
                        "value": "simple",
                        "label": "perform inference for the simplest, independent-loci, admixture model, with this prior"
                    },
                    {
                        "name": "fastStructure_gds_tol",
                        "prefix": "--tol",
                        "type": "select",
                        "choices": [
                            {
                                "1e_6": "1e-6"
                            },
                            {
                                "1e_7": "1e-7"
                            },
                            {
                                "1e_8": "1e-8"
                            }
                        ],
                        "value": "1e-6",
                        "label": "convergence criterion"
                    }
                ]
            }
        ],
        "prepare_report_script": "fastStructure_gds.prepare.report.R",
        "prepare_report_outputs": [
            "distruct_plot_mqc.png"
        ],
        "install": [],
        "citations": {
            "SeqArray": [
                "Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093\/bioinformatics\/btx145."
            ],
            "numpy": [
                "S. van der Walt, S. C. Colbert and G. Varoquaux, 'The NumPy Array: A Structure for Efficient Numerical Computation,' in Computing in Science & Engineering, vol. 13, no. 2, pp. 22-30, March-April 2011, doi: 10.1109\/MCSE.2011.37."
            ],
            "scipy": [
                "Virtanen, P., Gommers, R., Oliphant, T.E. et al. SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat Methods 17, 261-272 (2020). https:\/\/doi.org\/10.1038\/s41592-019-0686-2"
            ],
            "matplotlib": [
                "J. D. Hunter, 'Matplotlib: A 2D Graphics Environment,' in Computing in Science & Engineering, vol. 9, no. 3, pp. 90-95, May-June 2007, doi: 10.1109\/MCSE.2007.55."
            ],
            "fastStructure": [
                "Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589. "
            ],
            "vcftools": [
                "The Variant Call Format and VCFtools, Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, Bioinformatics, 2011 http:\/\/dx.doi.org\/10.1093\/bioinformatics\/btr330"
            ]
        },
        "yaml": "{\n  id: fastStructure_gds,\n  name: fastStructure_gds,\n  article: \"Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589\",\n  website: \"https:\/\/github.com\/jashapiro\/fastStructure\/tree\/py3\",\n  git: \"https:\/\/github.com\/jashapiro\/fastStructure\",\n  description: \"A fast algorithm for inferring population structure from large SNP genotype data\",\n  version: \"\",\n  documentation: \"\",\n  multiqc: \"custom\",\n  commands:\n    [ \n      {\n        name: fastStructure_gds,\n        command: \" structure.py\",\n        category: \"genet_pop\",\n        output_dir: fastStructure_gds,\n        inputs: [\n                 { name: gds, type: \"gds\", file: \"\",description: \"Multisample gds file\"} ,\n                 { name: popmap_file, type: \"popmap\", file: \"\", description: \"Path to tsv file with samples group\"}\n                ],\n        outputs: [\n          { name: chooseK, type: \"tsv\", file: \"chooseK.txt\", description: \"choosek on the posterior mean of admixture proportions for the range of K\" },\n          #{ name: distructPlot, type: \"svg\", file: \"distruct_plot_mqc.svg\", description: \"expected admixture proportions plot\" },\n        ],\n        options: [\n          { \n              name: \"fastStructure_gds_threads\",\n              prefix: \"--threads\",\n              value: 16,\n              min: 1,\n              max: 64,\n              step: 1,\n              label: \"Threads to use\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_gd\", \n              max: 64,\n              step: 1,\n              label: \"number of populations range min value to test\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_gds_Kmax\",\n              prefix: \"-K\",\n              value: 4,\n              min: 2,\n              max: 64,\n              step: 1,\n              label: \"number of populations range max value to test\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_gds_cv\",\n              prefix: \"--cv\",\n              value: 0,\n              min: 0,\n              max: 5,\n              step: 1,\n              label: \"number of test sets for cross-validation, 0 implies no CV step\",\n              type: \"numeric\",\n          },\n          {\n              name: \"fastStructure_gds_prior\",\n              prefix: \"--prior\",\n              type: select,\n              choices: [\n                simple: \"simple\",\n                logistic: \"logistic\"\n              ],\n              value: \"simple\",\n              label: \"perform inference for the simplest, independent-loci, admixture model, with this prior\",\n          },  \n          {\n              name: \"fastStructure_gds_tol\",\n              prefix: \"--tol\",\n              type: select,\n              choices: [\n                1e_6: \"1e-6\",\n                1e_7: \"1e-7\",\n                1e_8: \"1e-8\",\n              ],\n              value: \"1e-6\",\n              label: \"convergence criterion\",\n          },  \n        ],\n      },\n    ],\n  prepare_report_script:  fastStructure_gds.prepare.report.R,\n  prepare_report_outputs: [\n    distruct_plot_mqc.png,\n  ],\n  install:\n    {\n      # numpy: [\n      #   \"pip3 install numpy\"\n      # ],\n      # scipy: [\n      #   \"pip3 install scipy\"\n      # ],\n      # matplotlib: [\n      #   \"pip3 install matplotlib\"\n      # ],\n      # cython: [\n      #   \"pip3 install Cython\"\n      # ],\n      # gsl: [\n      #   \"wget http:\/\/gnu.mirror.vexxhost.com\/gsl\/gsl-2.4.tar.gz && tar -zxvf gsl-2.4.tar.gz\",\n      #   \"cd gsl-2.4\",\n      #   \".\/configure\",\n      #   \"make\",\n      #   \"sudo make install\",\n      #   \"cd ..\",\n      #   \"sudo rm -R gsl-latest.tar.gz gsl-2.4\",\n      #   \"ln -s \/usr\/local\/lib\/libgsl* \/usr\/lib\/\"\n      # ],\n      # fastStructure: [\n      #   \"pip3 uninstall -y Cyton && pip3 install Cython==0.27.3\",\n      #   \"cd \/opt\/biotools\/bin \",\n      #   \"git clone https:\/\/github.com\/jashapiro\/fastStructure.git \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure && git checkout py3 \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure\/vars \",\n      #   \"python3 setup.py build_ext -f --inplace \",\n      #   \"cd \/opt\/biotools\/bin\/fastStructure \",\n      #   \"python setup.py build_ext -f  --inplace \",\n      #   \" sed -i '2iimport matplotlib as mpl' \/opt\/biotools\/bin\/fastStructure\/distruct.py\",\n      #   \" sed -i '3impl.use(\\\"svg\\\")' \/opt\/biotools\/bin\/fastStructure\/distruct.py \",\n      #   \"ENV PATH \/opt\/biotools\/bin\/fastStructure\",\n      #   \"pip3 uninstall -y Cyton && pip3 install Cython\"\n      # ],\n      # popHelper: [\n      #    'Rscript -e ''devtools::install_github(\"royfrancis\/pophelper\", Ncpus=8, upgrade =\"never\")'' '\n      # ],\n      # SeqArray: [\n      #   'Rscript -e ''if (!requireNamespace(\"BiocManager\", quietly = TRUE))  install.packages(\"BiocManager\");\n      #    BiocManager::install(\"SeqArray\", update = TRUE, ask = FALSE)'' '\n      # ],\n      # vcftools: [\n      #     \"apt-get install -y vcftools\"\n      #   ]      \n    },\n  citations:\n    {\n     SeqArray: [\n       \"Zheng X, Gogarten S, Lawrence M, Stilp A, Conomos M, Weir B, Laurie C, Levine D (2017). SeqArray - A storage-efficient high-performance data format for WGS variant calls. Bioinformatics. doi: 10.1093\/bioinformatics\/btx145.\" \n       ],\n     numpy: [\n      \"S. van der Walt, S. C. Colbert and G. Varoquaux, 'The NumPy Array: A Structure for Efficient Numerical Computation,' in Computing in Science & Engineering, vol. 13, no. 2, pp. 22-30, March-April 2011, doi: 10.1109\/MCSE.2011.37.\"\n     ],\n     scipy: [\n       \"Virtanen, P., Gommers, R., Oliphant, T.E. et al. SciPy 1.0: fundamental algorithms for scientific computing in Python. Nat Methods 17, 261-272 (2020). https:\/\/doi.org\/10.1038\/s41592-019-0686-2\"\n     ],\n     matplotlib: [\n      \"J. D. Hunter, 'Matplotlib: A 2D Graphics Environment,' in Computing in Science & Engineering, vol. 9, no. 3, pp. 90-95, May-June 2007, doi: 10.1109\/MCSE.2007.55.\"\n     ],\n    fastStructure: [\n      \"Anil Raj, Matthew Stephens, and Jonathan K. Pritchard. fastSTRUCTURE: Variational Inference of Population Structure in Large SNP Data Sets, (Genetics) June 2014 197:573-589. \"\n    ],\n    vcftools: [\n      \"The Variant Call Format and VCFtools, Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, Bioinformatics, 2011 http:\/\/dx.doi.org\/10.1093\/bioinformatics\/btr330\"\n    ],\n    },\n}\n"
    }
]