FASTA Hasher
<p>This tool hashes the sequence of each FASTA entry using sha512t24, described in:</p>
<cite>Hart, R. K. & Prlić, A. SeqRepo: A system for managing local collections of biological sequences. PLoS One 15, (2020).</cite>
<br><br>
<textarea id="fastaInput" rows="10" cols="50" placeholder="Paste your FASTA sequence here"></textarea><br>
<button onclick="processFasta()">Generate Hashed FASTA</button>
<button onclick="clearAll()">Clear All</button>
<h2>FASTA with deflines replaced with sha512t24 hash</h2>
<textarea id="fastaOutput" rows="10" cols="50" readonly></textarea><br>
<button onclick="downloadFasta()">Download FASTA</button>
<h2>TSV mapping input deflines to sha512t24 hash</h2>
<textarea id="deflineMap" rows="10" cols="50" readonly></textarea><br>
<button onclick="downloadMap()">Download Defline Map</button>
<script>
async function sha512t24u(input) {
const encoder = new TextEncoder();
const data = encoder.encode(input.toUpperCase());
const hashBuffer = await crypto.subtle.digest('SHA-512', data);
const first24Bytes = hashBuffer.slice(0, 24);
const base64String = btoa(String.fromCharCode(...new Uint8Array(first24Bytes)))
.replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
return base64String;
}
async function processFasta() {
const fastaInput = document.getElementById("fastaInput").value;
const lines = fastaInput.split("\n");
let result = "";
let deflineMap = ""; // No header
let currentDefline = "";
let sequenceBuffer = "";
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trim();
if (line.startsWith(">")) {
if (currentDefline && sequenceBuffer) {
// Normalize sequence by removing all non-alphabetic characters
const normalizedSequence = sequenceBuffer.replace(/[^A-Za-z]/g, "");
const hash = await sha512t24u(normalizedSequence); // Hash the normalized sequence
result += `>${hash}\n${sequenceBuffer}\n`;
deflineMap += `${currentDefline}\t${hash}\n`;
}
// Start a new defline and reset sequence buffer
currentDefline = line.slice(1).replace(/\t/g, "").trim(); // Remove ">", tabs, and trim whitespace
sequenceBuffer = "";
} else {
sequenceBuffer += line; // Accumulate sequence lines into a single sequence
}
}
// Process the last sequence and defline
if (currentDefline && sequenceBuffer) {
const normalizedSequence = sequenceBuffer.replace(/[^A-Za-z]/g, "");
const hash = await sha512t24u(normalizedSequence); // Hash the normalized sequence
result += `>${hash}\n${sequenceBuffer}\n`;
deflineMap += `${currentDefline}\t${hash}\n`;
}
document.getElementById("fastaOutput").value = result.trim();
document.getElementById("deflineMap").value = deflineMap.trim();
}
function downloadFasta() {
const fastaOutput = document.getElementById("fastaOutput").value;
const blob = new Blob([fastaOutput], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'output.fasta';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
function downloadMap() {
const deflineMap = document.getElementById("deflineMap").value;
const blob = new Blob([deflineMap], { type: 'text/tab-separated-values' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = 'defline_map.tsv';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
function clearAll() {
document.getElementById("fastaInput").value = "";
document.getElementById("fastaOutput").value = "";
document.getElementById("deflineMap").value = "";
}
</script>