Skip to content

Schema

The schema below is auto-generatated via the SocialGene Python Package.

# pip install socialgene
sg_schema --nodes  --markdown
sg_schema --rels  --markdown

Nodes

Label Description NF results subdirectory Neo4j header file Unique on properties
assembly Represents a single genome/assembly/BGC. If the input was a FASTA file or if assembly wasn't in the genbank metadata then this will represent the file the data came from. genomic_info assembly.header uid ['uid', 'altitude', 'bio_material', 'bioproject', 'biosample', 'cell_line', 'cell_type', 'chromosome', 'clone', 'clone_lib', 'collected_by', 'collection_date', 'country', 'cultivar', 'culture_collection', 'db_xref', 'dev_stage', 'ecotype', 'environmental_sample', 'focus', 'germline', 'haplogroup', 'haplotype', 'host', 'identified_by', 'isolate', 'isolation_source', 'lab_host', 'lat_lon', 'macronuclear', 'map', 'mating_type', 'metagenome_source', 'mol_type', 'note', 'organelle', 'organism', 'pcr_primers', 'plasmid', 'pop_variant', 'proviral', 'rearranged', 'segment', 'serotype', 'serovar', 'sex', 'specimen_voucher', 'strain', 'sub_clone', 'submitter_seqid', 'sub_species', 'sub_strain', 'tissue_lib', 'tissue_type', 'transgenic', 'type_material', 'variety']
assembly:mibig Represents a single Mibig entry genomic_info assembly.header uid ['uid']
chebi Represents a ChEBI term None None uid ['uid', 'name']
chembl Represents a CHEMBL term None None uid ['uid']
chemical_compound:substrate Mibig substrate (e.g. NRPS monomer) None None inchi, CanonicalSmiles ['uid', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons', 'HeavyAtomCount', 'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles', 'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR', 'AnonymousGraph', 'ElementGraph', 'MurckoScaffold', 'ExtendedMurcko', 'MolFormula', 'AtomBondCounts', 'DegreeVector', 'Mesomer', 'HetAtomTautomer', 'HetAtomProtomer', 'RedoxPair', 'Regioisomer', 'NetCharge', 'SmallWorldIndexBR', 'SmallWorldIndexBRL', 'ArthorSubstructureOrder', 'HetAtomTautomerv2', 'inchi', 'CanonicalSmiles']
chemical_compound Represents a chemical compound None None inchi, CanonicalSmiles ['uid', 'MolWt', 'HeavyAtomMolWt', 'ExactMolWt', 'NumValenceElectrons', 'NumRadicalElectrons', 'HeavyAtomCount', 'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles', 'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles', 'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms', 'NumRotatableBonds', 'NumSaturatedCarbocycles', 'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP', 'MolMR', 'AnonymousGraph', 'ElementGraph', 'MurckoScaffold', 'ExtendedMurcko', 'MolFormula', 'AtomBondCounts', 'DegreeVector', 'Mesomer', 'HetAtomTautomer', 'HetAtomProtomer', 'RedoxPair', 'Regioisomer', 'NetCharge', 'SmallWorldIndexBR', 'SmallWorldIndexBRL', 'ArthorSubstructureOrder', 'HetAtomTautomerv2', 'inchi', 'CanonicalSmiles']
classyfire Represents a classyfire chemical ontology term None None uid ['uid', 'name', 'definition']
gnps_cluster Represents a GNPS molecular networking cluster None None cluster_index, workflow_uuid, task ['task', 'defaultgroups', 'g1', 'g2', 'g3', 'g4', 'g5', 'g6', 'gnpslinkout_cluster', 'gnpslinkout_network', 'mqscore', 'mzerrorppm', 'massdiff', 'rtmean', 'rtmean_min', 'rtstderr', 'uniquefilesources', 'uniquefilesourcescount', 'cluster_index', 'componentindex', 'number_of_spectra', 'parent_mass', 'precursor_charge', 'precursor_mass', 'sumprecursor_intensity', 'workflow_uuid']
gnps_library_spectrum Represents a GNPS library spectrum None None uid ['uid', 'compound_name', 'compound_source', 'pi', 'data_collector', 'adduct', 'precursor_mz', 'exactmass', 'charge', 'cas_number', 'pubmed_id', 'smiles', 'inchi', 'inchi_aux', 'library_class', 'ionmode', 'libraryqualitystring', 'mqscore', 'tic_query', 'rt_query', 'mzerrorppm', 'sharedpeaks', 'massdiff', 'libmz', 'specmz', 'speccharge', 'moleculeexplorerdatasets', 'moleculeexplorerfiles', 'molecular_formula', 'inchikey', 'inchikey_planar']
gnps_organism Represents an organism (as defined by GNPS) None None uid ['uid']
goterm Represent a GO term goterms goterms.header uid ['uid', 'name', 'namespace']
hmm Represents a single non-redundant HMM model hmm_info sg_hmm_nodes.header uid ['uid']
hmm_source Represents the source of an HMM model (e.g. PFAM) hmm_info hmm_source.header uid ['uid', ':LABEL', 'rel_path', 'name', 'acc', 'notes', 'description', 'date', 'hash', 'hash_used', 'model_length', 'super_category', 'category', 'subcategory', 'ga', 'tc', 'nc']
instrument Represents an instrument None None uid ['uid']
ion_source Represents an ion source None None uid ['uid']
mass_spectrum_file Represents a GNPS molecular networking spectrum file None None original_filename, workflow_uuid ['filename', 'gnps_filename', 'workflow_uuid']
mibig_activity Represents a single Mibig bioactivity None None uid ['uid']
mibig_biosynthetic_class Represents a single Mibig biosynthetic class None None uid ['uid']
mibig_compound Represents a single Mibig compound None None uid ['name', 'smiles', 'inchi']
ms2_spectrum Represents a GNPS molecular networking spectrum None None original_filename, specidx, workflow_uuid ['specidx', 'original_filename', 'parentmass', 'charge', 'rettime', 'workflow_uuid']
npatlas Represents a single NPAtlas entry None None uid ['uid', 'original_name', 'mol_formula', 'mol_weight', 'exact_mass', 'inchikey', 'smiles', 'cluster_id', 'node_id', 'synonyms', 'inchi', 'm_plus_h', 'm_plus_na', 'genus', 'species']
npclassifier_class Represents a NPClassifier class None None uid ['uid']
npclassifier_pathway Represents a NPClassifier pathway None None uid ['uid']
npclassifier_superclass Represents a NPClassifier superclass None None uid ['uid']
npmrd Represents a single NP-MRD entry None None uid ['uid']
nucleotide Represents a single nucleotide sequence (e.g. a contig/scaffold/chromosome) genomic_info locus.header uid ['uid', 'external_id', 'altitude', 'bio_material', 'bioproject', 'biosample', 'cell_line', 'cell_type', 'chromosome', 'clone', 'clone_lib', 'collected_by', 'collection_date', 'country', 'cultivar', 'culture_collection', 'db_xref', 'dev_stage', 'ecotype', 'environmental_sample', 'focus', 'germline', 'haplogroup', 'haplotype', 'host', 'identified_by', 'isolate', 'isolation_source', 'lab_host', 'lat_lon', 'macronuclear', 'map', 'mating_type', 'metagenome_source', 'mol_type', 'note', 'organelle', 'organism', 'pcr_primers', 'plasmid', 'pop_variant', 'proviral', 'rearranged', 'segment', 'serotype', 'serovar', 'sex', 'specimen_voucher', 'strain', 'sub_clone', 'submitter_seqid', 'sub_species', 'sub_strain', 'tissue_lib', 'tissue_type', 'transgenic', 'type_material', 'variety']
parameters Parameters and environmental variables used during database creation parameters parameters.header uid ['uid', 'SG_LOC_NEO4J', 'SG_LOC_HMMS', 'NEO4J_dbms_memory_pagecache_size', 'NEO4J_dbms_memory_heap_initial__size', 'NEO4J_dbms_memory_heap_max__size', 'HMMSEARCH_IEVALUE', 'HMMSEARCH_BACKGROUND', 'HMMSEARCH_BIASFILTER', 'HMMSEARCH_NULL2', 'HMMSEARCH_SEED', 'HMMSEARCH_Z', 'HMMSEARCH_DOMZ', 'HMMSEARCH_F1', 'HMMSEARCH_F2', 'HMMSEARCH_F3', 'HMMSEARCH_E', 'HMMSEARCH_DOME', 'HMMSEARCH_INCE', 'HMMSEARCH_INCDOME', 'HMMSEARCH_BITCUTOFFS', 'platform', 'architecture', 'py_executable', 'py_version', 'genome_download_command']
protein Represents a non-redundant protein protein_info protein_ids.header uid ['uid', 'crc64', 'sequence']
publication Represents a publication None None doi ['doi', 'pmid', 'authors', 'title', 'journal', 'year']
publication Represents a publication None None doi ['doi', 'pmid', 'authors', 'title', 'journal', 'year']
substructure Represents a chemical substructure None None inchi, CanonicalSmiles ['uid', 'inchi', 'CanonicalSmiles']
taxid Represents a single taxon within NCBI taxonomy taxdump_process taxid.header uid ['uid', 'name', 'rank']
tigrfam_mainrole Represents a TIGRFAM main role tigrfam_info tigrfam_mainrole.header uid ['uid']
tigrfam_role Represents a TIGRFAM role tigrfam_info tigrfam_role.header uid ['uid']
tigrfam_subrole Represents a TIGRFAM sub role tigrfam_info tigrfam_subrole.header uid ['uid']

Relationships

Label Relationship NF results subdirectory Neo4j header file
ANNOTATES (:hmm)-[:ANNOTATES]->(:protein) parsed_domtblout protein_to_hmm_header.header
ANALYSIS_OF (:mass_spectrum_file)-[:ANALYSIS_OF]->(:assembly) None None
ALTERNATIVE_PARENTS (:npatlas)-[:ALTERNATIVE_PARENTS]->(:classyfire) None None
ASSEMBLES_TO (:nucleotide)-[:ASSEMBLES_TO]->(:assembly) genomic_info assembly_to_locus.header
BLASTP (:protein)-[:BLASTP]->(:protein) diamond_blastp blastp.header
CLUSTERS_TO (:ms2_spectrum)-[:CLUSTERS_TO]->(:gnps_cluster) None None
DIRECT_PARENT (:npatlas)-[:DIRECT_PARENT]->(:classyfire) None None
ENCODES (:nucleotide)-[:ENCODES]->(:protein) genomic_info locus_to_protein.header
FROM (:gnps_library_spectrum)-[:FROM]->(:gnps_organism) None None
FROM (:gnps_library_spectrum)-[:FROM]->(:instrument) None None
FROM (:gnps_library_spectrum)-[:FROM]->(:ion_source) None None
GOTERM_RELS (:goterm)-[:GOTERM_RELS]->(:goterm) goterms go_to_go.header
GO_ANN (:hmm_source)-[:GO_ANN]->(:goterm) tigrfam_info tigrfam_to_go.header
HAS (:npatlas)-[:HAS]->(:gnps_library_spectrum) None None
HAS (:mass_spectrum_file)-[:HAS]->(:ms2_spectrum) None None
HAS (:npatlas)-[:HAS]->(:npmrd) None None
HAS (:npatlas)-[:HAS]->(:publication) None None
IS_A (:gnps_library_spectrum)-[:IS_A]->(:chemical_compound) None None
IS_A (:npatlas)-[:IS_A]->(:chebi) None None
IS_A (:npatlas)-[:IS_A]->(:chemical_compound) None None
IS_A (:npatlas)-[:IS_A]->(:npclassifier_superclass) None None
IS_A (:npatlas)-[:IS_A]->(:npclassifier_pathway) None None
IS_A (:npatlas)-[:IS_A]->(:npclassifier_class) None None
IS_A (:gnps_library_spectrum)-[:IS_A]->(:npclassifier_pathway) None None
IS_TAXON (:assembly)-[:IS_TAXON]->(:taxid) genomic_info assembly_to_taxid.header
IS_A (:classyfire)-[:IS_A]->(:classyfire) None None
IS_A (:gnps_library_spectrum)-[:IS_A]->(:npclassifier_superclass) None None
INTERMEDIATE_NODES (:npatlas)-[:INTERMEDIATE_NODES]->(:classyfire) None None
IS_A (:gnps_library_spectrum)-[:IS_A]->(:npclassifier_class) None None
LOWEST_CLASS (:npatlas)-[:LOWEST_CLASS]->(:classyfire) None None
LIBRARY_HIT (:gnps_cluster)-[:LIBRARY_HIT]->(:gnps_library_spectrum) None None
MAINROLE_ANN (:tigrfam_role)-[:MAINROLE_ANN]->(:tigrfam_mainrole) tigrfam_info tigrfamrole_to_mainrole.header
MOLECULAR_NETWORK (:gnps_cluster)-[:MOLECULAR_NETWORK]->(:gnps_cluster) None None
MCS_SIMILARITY (:chemical_compound)-[:MCS_SIMILARITY]->(:chemical_compound) None None
MMSEQS2 (:protein)-[:MMSEQS2]->(:protein) mmseqs2_cluster mmseqs2.header
PRODUCES (:taxid)-[:PRODUCES]->(:npatlas) None None
PROTEIN_TO_GO (:protein)-[:PROTEIN_TO_GO]->(:goterm) protein_info protein_to_go.header
PRODUCES (:assembly:mibig)-[:PRODUCES]->(:mibig_compound) None None
PRODUCES (:assembly:mibig)-[:PRODUCES]->(:npatlas) None None
ROLE_ANN (:hmm_source)-[:ROLE_ANN]->(:tigrfam_role) tigrfam_info tigrfam_to_role.header
SUBSTRUCTURE (:chemical_compound)-[:SUBSTRUCTURE]->(:substructure) None None
SOURCE_DB (:hmm)-[:SOURCE_DB]->(:hmm_source) hmm_info hmm_source_relationships.header
SYNONYM (:classyfire)-[:SYNONYM]->(:chebi) None None
SUBROLE_ANN (:tigrfam_role)-[:SUBROLE_ANN]->(:tigrfam_subrole) tigrfam_info tigrfamrole_to_subrole.header
TAXON_PARENT (:taxid)-[:TAXON_PARENT]->(:taxid) taxdump_process taxid_to_taxid.header
TANIMOTO_SIMILARITY (:chemical_compound)-[:TANIMOTO_SIMILARITY]->(:chemical_compound) None None