The schema below is auto-generatated via the SocialGene Python Package.

# pip install socialgene
sg_schema --nodes  --markdown
sg_schema --rels  --markdown

Nodes

Label Description NF results subdirectory Neo4j header file Unique on properties
assembly Represents a single genome/assembly/BGC. If the input was a FASTA file or if assembly wasn’t in the genbank metadata then this will represent the file the data came from. genomic_info assembly.header uid [‘uid’, ‘altitude’, ‘bio_material’, ‘bioproject’, ‘biosample’, ‘cell_line’, ‘cell_type’, ‘chromosome’, ‘clone’, ‘clone_lib’, ‘collected_by’, ‘collection_date’, ‘country’, ‘cultivar’, ‘culture_collection’, ‘db_xref’, ‘dev_stage’, ‘ecotype’, ‘environmental_sample’, ‘focus’, ‘germline’, ‘haplogroup’, ‘haplotype’, ‘host’, ‘identified_by’, ‘isolate’, ‘isolation_source’, ‘lab_host’, ‘lat_lon’, ‘macronuclear’, ‘map’, ‘mating_type’, ‘metagenome_source’, ‘mol_type’, ‘note’, ‘organelle’, ‘organism’, ‘pcr_primers’, ‘plasmid’, ‘pop_variant’, ‘proviral’, ‘rearranged’, ‘segment’, ‘serotype’, ‘serovar’, ‘sex’, ‘specimen_voucher’, ‘strain’, ‘sub_clone’, ‘submitter_seqid’, ‘sub_species’, ‘sub_strain’, ‘tissue_lib’, ‘tissue_type’, ‘transgenic’, ‘type_material’, ‘variety’]
assembly Represents a single Mibig entry genomic_info assembly.header uid [‘uid’]
chebi Represents a ChEBI term None None uid [‘uid’]
chembl Represents a CHEMBL term None None uid [‘uid’]
chemical_compound:substructure Represents a chemical substructure None None inchi, CanonicalSmiles [‘uid’, ‘MolWt’, ‘HeavyAtomMolWt’, ‘ExactMolWt’, ‘NumValenceElectrons’, ‘NumRadicalElectrons’, ‘HeavyAtomCount’, ‘NumAliphaticCarbocycles’, ‘NumAliphaticHeterocycles’, ‘NumAliphaticRings’, ‘NumAromaticCarbocycles’, ‘NumAromaticHeterocycles’, ‘NumAromaticRings’, ‘NumHAcceptors’, ‘NumHDonors’, ‘NumHeteroatoms’, ‘NumRotatableBonds’, ‘NumSaturatedCarbocycles’, ‘NumSaturatedHeterocycles’, ‘NumSaturatedRings’, ‘RingCount’, ‘MolLogP’, ‘MolMR’, ‘AnonymousGraph’, ‘ElementGraph’, ‘CanonicalSmiles’, ‘MurckoScaffold’, ‘ExtendedMurcko’, ‘MolFormula’, ‘AtomBondCounts’, ‘DegreeVector’, ‘Mesomer’, ‘HetAtomTautomer’, ‘HetAtomProtomer’, ‘RedoxPair’, ‘Regioisomer’, ‘NetCharge’, ‘SmallWorldIndexBR’, ‘SmallWorldIndexBRL’, ‘ArthorSubstructureOrder’, ‘HetAtomTautomerv2’, ‘inchi’]
chemical_compound Represents a chemical compound None None inchi, CanonicalSmiles [‘uid’, ‘MolWt’, ‘HeavyAtomMolWt’, ‘ExactMolWt’, ‘NumValenceElectrons’, ‘NumRadicalElectrons’, ‘HeavyAtomCount’, ‘NumAliphaticCarbocycles’, ‘NumAliphaticHeterocycles’, ‘NumAliphaticRings’, ‘NumAromaticCarbocycles’, ‘NumAromaticHeterocycles’, ‘NumAromaticRings’, ‘NumHAcceptors’, ‘NumHDonors’, ‘NumHeteroatoms’, ‘NumRotatableBonds’, ‘NumSaturatedCarbocycles’, ‘NumSaturatedHeterocycles’, ‘NumSaturatedRings’, ‘RingCount’, ‘MolLogP’, ‘MolMR’, ‘AnonymousGraph’, ‘ElementGraph’, ‘CanonicalSmiles’, ‘MurckoScaffold’, ‘ExtendedMurcko’, ‘MolFormula’, ‘AtomBondCounts’, ‘DegreeVector’, ‘Mesomer’, ‘HetAtomTautomer’, ‘HetAtomProtomer’, ‘RedoxPair’, ‘Regioisomer’, ‘NetCharge’, ‘SmallWorldIndexBR’, ‘SmallWorldIndexBRL’, ‘ArthorSubstructureOrder’, ‘HetAtomTautomerv2’, ‘inchi’]
chemical_compound:substrate Mibig substrate (e.g. NRPS monomer) None None inchi, CanonicalSmiles [‘uid’, ‘MolWt’, ‘HeavyAtomMolWt’, ‘ExactMolWt’, ‘NumValenceElectrons’, ‘NumRadicalElectrons’, ‘HeavyAtomCount’, ‘NumAliphaticCarbocycles’, ‘NumAliphaticHeterocycles’, ‘NumAliphaticRings’, ‘NumAromaticCarbocycles’, ‘NumAromaticHeterocycles’, ‘NumAromaticRings’, ‘NumHAcceptors’, ‘NumHDonors’, ‘NumHeteroatoms’, ‘NumRotatableBonds’, ‘NumSaturatedCarbocycles’, ‘NumSaturatedHeterocycles’, ‘NumSaturatedRings’, ‘RingCount’, ‘MolLogP’, ‘MolMR’, ‘AnonymousGraph’, ‘ElementGraph’, ‘CanonicalSmiles’, ‘MurckoScaffold’, ‘ExtendedMurcko’, ‘MolFormula’, ‘AtomBondCounts’, ‘DegreeVector’, ‘Mesomer’, ‘HetAtomTautomer’, ‘HetAtomProtomer’, ‘RedoxPair’, ‘Regioisomer’, ‘NetCharge’, ‘SmallWorldIndexBR’, ‘SmallWorldIndexBRL’, ‘ArthorSubstructureOrder’, ‘HetAtomTautomerv2’, ‘inchi’]
chemical_fragment Represents a chemical fragment as defined by rdkit.Chem.Descriptors None None uid []
classyfire Represents a classyfire chemical ontology term None None uid [‘uid’, ‘name’, ‘definition’]
gnps_cluster Represents a GNPS molecular networking cluster None None cluster_index, workflow_uuid [‘workflow_uuid’, ‘defaultgroups’, ‘g1’, ‘g2’, ‘g3’, ‘g4’, ‘g5’, ‘g6’, ‘gnpslinkout_cluster’, ‘gnpslinkout_network’, ‘mqscore’, ‘mzerrorppm’, ‘massdiff’, ‘rtmean’, ‘rtmean_min’, ‘rtstderr’, ‘uniquefilesources’, ‘uniquefilesourcescount’, ‘cluster_index’, ‘componentindex’, ‘number_of_spectra’, ‘parent_mass’, ‘precursor_charge’, ‘precursor_mass’, ‘sumprecursor_intensity’]
gnps_library_spectrum Represents a GNPS library spectrum None None uid [‘uid’, ‘compound_name’, ‘compound_source’, ‘pi’, ‘data_collector’, ‘adduct’, ‘precursor_mz’, ‘exactmass’, ‘charge’, ‘cas_number’, ‘pubmed_id’, ‘smiles’, ‘inchi’, ‘inchi_aux’, ‘library_class’, ‘ionmode’, ‘libraryqualitystring’, ‘mqscore’, ‘tic_query’, ‘rt_query’, ‘mzerrorppm’, ‘sharedpeaks’, ‘massdiff’, ‘libmz’, ‘specmz’, ‘speccharge’, ‘moleculeexplorerdatasets’, ‘moleculeexplorerfiles’, ‘molecular_formula’, ‘inchikey’, ‘inchikey_planar’]
gnps_organism Represents an organism (as defined by GNPS) None None uid [‘uid’]
goterm Represent a GO term goterms goterms.header uid [‘uid’, ‘name’, ‘namespace’]
hmm Represents a single non-redundant HMM model hmm_info sg_hmm_nodes.header uid [‘uid’]
hmm_source Represents the source of an HMM model (e.g. PFAM) hmm_info hmm_source.header uid [‘uid’, ‘:LABEL’, ‘rel_path’, ‘name’, ‘acc’, ‘notes’, ‘description’, ‘date’, ‘hash’, ‘hash_used’, ‘model_length’, ‘super_category’, ‘category’, ‘subcategory’, ‘ga’, ‘tc’, ‘nc’]
instrument Represents an instrument None None uid [‘uid’]
ion_source Represents an ion source None None uid [‘uid’]
npatlas Represents a single NPAtlas entry None None uid [‘uid’, ‘original_name’, ‘mol_formula’, ‘mol_weight’, ‘exact_mass’, ‘inchikey’, ‘smiles’, ‘cluster_id’, ‘node_id’, ‘synonyms’, ‘inchi’, ‘m_plus_h’, ‘m_plus_na’, ‘genus’, ‘species’]
npclassifier_class Represents a NPClassifier class None None uid [‘uid’]
npclassifier_pathway Represents a NPClassifier pathway None None uid [‘uid’]
npclassifier_superclass Represents a NPClassifier superclass None None uid [‘uid’]
npmrd Represents a single NP-MRD entry None None uid [‘uid’]
nucleotide Represents a single nucleotide sequence (e.g. a contig/scaffold/chromosome) genomic_info locus.header uid [‘uid’, ‘external_id’, ‘altitude’, ‘bio_material’, ‘bioproject’, ‘biosample’, ‘cell_line’, ‘cell_type’, ‘chromosome’, ‘clone’, ‘clone_lib’, ‘collected_by’, ‘collection_date’, ‘country’, ‘cultivar’, ‘culture_collection’, ‘db_xref’, ‘dev_stage’, ‘ecotype’, ‘environmental_sample’, ‘focus’, ‘germline’, ‘haplogroup’, ‘haplotype’, ‘host’, ‘identified_by’, ‘isolate’, ‘isolation_source’, ‘lab_host’, ‘lat_lon’, ‘macronuclear’, ‘map’, ‘mating_type’, ‘metagenome_source’, ‘mol_type’, ‘note’, ‘organelle’, ‘organism’, ‘pcr_primers’, ‘plasmid’, ‘pop_variant’, ‘proviral’, ‘rearranged’, ‘segment’, ‘serotype’, ‘serovar’, ‘sex’, ‘specimen_voucher’, ‘strain’, ‘sub_clone’, ‘submitter_seqid’, ‘sub_species’, ‘sub_strain’, ‘tissue_lib’, ‘tissue_type’, ‘transgenic’, ‘type_material’, ‘variety’]
parameters Parameters and environmental variables used during database creation parameters parameters.header uid [‘uid’, ‘SG_LOC_NEO4J’, ‘SG_LOC_HMMS’, ‘NEO4J_dbms_memory_pagecache_size’, ’NEO4J_dbms_memory_heap_initial__size’, ’NEO4J_dbms_memory_heap_max__size’, ‘HMMSEARCH_IEVALUE’, ‘HMMSEARCH_BACKGROUND’, ‘HMMSEARCH_BIASFILTER’, ‘HMMSEARCH_NULL2’, ‘HMMSEARCH_SEED’, ‘HMMSEARCH_Z’, ‘HMMSEARCH_DOMZ’, ‘HMMSEARCH_F1’, ‘HMMSEARCH_F2’, ‘HMMSEARCH_F3’, ‘HMMSEARCH_E’, ‘HMMSEARCH_DOME’, ‘HMMSEARCH_INCE’, ‘HMMSEARCH_INCDOME’, ‘HMMSEARCH_BITCUTOFFS’, ‘platform’, ‘architecture’, ‘py_executable’, ‘py_version’, ‘genome_download_command’]
protein Represents a non-redundant protein protein_info protein_ids.header uid [‘uid’, ‘crc64’, ‘sequence’]
publication Represents a publication None None doi [‘doi’, ‘pmid’, ‘authors’, ‘title’, ‘journal’, ‘year’]
publication Represents a publication None None doi [‘doi’, ‘pmid’, ‘authors’, ‘title’, ‘journal’, ‘year’]
spectrum Represents a GNPS molecular networking spectrum None None uid [‘uid’, ‘original_filename’, ‘parentmass’, ‘charge’, ‘rettime’, ‘assembly’]
taxid Represents a single taxon within NCBI taxonomy taxdump_process taxid.header uid [‘uid’, ‘name’, ‘rank’]
tigrfam_mainrole Represents a TIGRFAM main role tigrfam_info tigrfam_mainrole.header uid [‘uid’]
tigrfam_role Represents a TIGRFAM role tigrfam_info tigrfam_role.header uid [‘uid’]
tigrfam_subrole Represents a TIGRFAM sub role tigrfam_info tigrfam_subrole.header uid [‘uid’]

Relationships

Label Relationship NF results subdirectory Neo4j header file
ALTERNATIVE_PARENTS (npatlas)-[:ALTERNATIVE_PARENTS]->(classyfire) None None
ASSEMBLES_TO (nucleotide)-[:ASSEMBLES_TO]->(assembly) genomic_info assembly_to_locus.header
ANNOTATES (hmm)-[:ANNOTATES]->(protein) parsed_domtblout protein_to_hmm_header.header
BLASTP (protein)-[:BLASTP]->(protein) diamond_blastp blastp.header
CONTAINS (chemical_compound)-[:CONTAINS]->(chemical_fragment) None None
DIRECT_PARENT (npatlas)-[:DIRECT_PARENT]->(classyfire) None None
ENCODES (nucleotide)-[:ENCODES]->(protein) genomic_info locus_to_protein.header
FROM (gnps_library_spectrum)-[:FROM]->(instrument) None None
FROM (gnps_cluster)-[:FROM]->(assembly) None None
FROM (gnps_library_spectrum)-[:FROM]->(ion_source) None None
FROM (gnps_library_spectrum)-[:FROM]->(gnps_organism) None None
GO_ANN (hmm_source)-[:GO_ANN]->(goterm) tigrfam_info tigrfam_to_go.header
GOTERM_RELS (goterm)-[:GOTERM_RELS]->(goterm) goterms go_to_go.header
HAS (npatlas)-[:HAS]->(publication) None None
HAS (npatlas)-[:HAS]->(gnps_library_spectrum) None None
HAS (npatlas)-[:HAS]->(npmrd) None None
IS_A (npatlas)-[:IS_A]->(npclassifier_pathway) None None
IS_A (classyfire)-[:IS_A]->(classyfire) None None
IS_A (npatlas)-[:IS_A]->(npclassifier_class) None None
IS_A (gnps_library_spectrum)-[:IS_A]->(npclassifier_pathway) None None
IS_TAXON (assembly)-[:IS_TAXON]->(taxid) genomic_info assembly_to_taxid.header
IS_A (gnps_library_spectrum)-[:IS_A]->(npclassifier_superclass) None None
IS_A (gnps_library_spectrum)-[:IS_A]->(npclassifier_class) None None
IS_A (gnps_library_spectrum)-[:IS_A]->(chemical_compound) None None
IS_A (npatlas)-[:IS_A]->(chemical_compound) None None
INTERMEDIATE_NODES (npatlas)-[:INTERMEDIATE_NODES]->(classyfire) None None
IS_A (npatlas)-[:IS_A]->(npclassifier_superclass) None None
LIBRARY_HIT (gnps_cluster)-[:LIBRARY_HIT]->(gnps_library_spectrum) None None
LOWEST_CLASS (npatlas)-[:LOWEST_CLASS]->(classyfire) None None
MMSEQS2 (protein)-[:MMSEQS2]->(protein) mmseqs2_cluster mmseqs2.header
MOLECULAR_NETWORK (gnps_cluster)-[:MOLECULAR_NETWORK]->(gnps_cluster) None None
MAINROLE_ANN (tigrfam_role)-[:MAINROLE_ANN]->(tigrfam_mainrole) tigrfam_info tigrfamrole_to_mainrole.header
PRODUCES (taxid)-[:PRODUCES]->(npatlas) None None
PRODUCES (assembly)-[:PRODUCES]->(npatlas) None None
PROTEIN_TO_GO (protein)-[:PROTEIN_TO_GO]->(goterm) protein_info protein_to_go.header
ROLE_ANN (hmm_source)-[:ROLE_ANN]->(tigrfam_role) tigrfam_info tigrfam_to_role.header
SYNONYM (classyfire)-[:SYNONYM]->(chebi) None None
SOURCE_DB (hmm)-[:SOURCE_DB]->(hmm_source) hmm_info hmm_source_relationships.header
SIMILAR (chemical_compound)-[:SIMILAR]->(chemical_compound) None None
SUBROLE_ANN (tigrfam_role)-[:SUBROLE_ANN]->(tigrfam_subrole) tigrfam_info tigrfamrole_to_subrole.header
TAXON_PARENT (taxid)-[:TAXON_PARENT]->(taxid) taxdump_process taxid_to_taxid.header