I’m trying to download data from this url with python: https://pave.niaid.nih.gov/locus_viewer?seq_id=HPV99REF
I see the ajax query is:
<script>
const seqID = "HPV99REF";
const structureUrl = "/api/most_related_structure";
$(document).ready(function() {
var genomeData;
$.ajaxSetup({
headers: { 'X-CSRF-TOKEN': $('meta[name="csrf-token"]').attr('content') }
});
$.ajax("/api/genome/HPV99REF", {
type: 'GET',
contentType: 'application/json; charset=UTF-8',
tryCount : 0,
retryLimit : 3,
success: function(_res, _status, _xhr) {
genomeData = _res;
//console.log(genomeData);
populatePageWithGenomeData(genomeData, "");
if (genomeData.host.scientific_name.toLowerCase() === "homo sapiens") {
$('#orig_seq_link').html("<a href="/explore/reference_genomes/human_genomes">Original submitted sequence</a>");
} else {
$('#orig_seq_link').html("<a href="/explore/reference_genomes/animal_genomes">Original submitted sequence</a>");
}
},
error: function(response) {
//console.log(response);
if (response.status == 400 && this.tryCount <= this.retryLimit) {
this.tryCount++;
$.ajax(this);
} else {
window.location.href = '/ajax_500';
}
}
});
// populate the genbank, gff3, fasta panes
// common query string parameters for all API calls
let params = {
id_type: "genomes",
output_format: "response"
};
let formats = {"genbank": "gb","fasta": "fa", "gff3": "gff"};
// populate panes with each format
for (const format of Object.keys(formats)) {
params["sequence_format"] = format;
$.ajax("/api/sequence" + "?" + compileQueryString(params), {
type: 'POST',
contentType: 'application/json; charset=UTF-8',
data: JSON.stringify({
id_list: ["HPV99REF"]
}),
success: function(_res, _status, _xhr) {
$('#' + format + '-viewer-text').html(_res["HPV99REF"].replace(/(?:rn|r|n)/g, '<br>').replace(/s/g, ' '));
// handler for download button
$('#download'+ format.charAt(0).toUpperCase() + format.slice(1)).on('click', function(event) {
event.stopPropagation(); //so we don't trigger the SVG download for some reason
var blob = new Blob([_res["HPV99REF"]],{type:"text/plain;charset=utf-8"});
saveAs(blob,"HPV99REF."+formats[format]);
});
},
error: function(_res) {
console.log("Error retrieving " + format + " download");
console.log(_res);
}
});
}
// handler for drop down viewer selection
$('#displayFormatSelect').change(function() {
$('#locus-viewer, #genbank-viewer, #gff3-viewer, #fasta-viewer').hide();
const selectedText = $('#displayFormatSelect option:selected').text();
if (selectedText === "Locus View") {$('#locus-viewer').show();}
else if (selectedText === "GenBank") {$('#genbank-viewer').show();}
else if (selectedText === "Fasta") {$('#fasta-viewer').show();}
else if (selectedText === "GFF3") {$('#gff3-viewer').show();}
});
});
</script>
so I tried to download the data with python:
import requests
import json
session = requests.Session()
response = session.get("https://pave.niaid.nih.gov/locus_viewer?seq_id=HPV99REF")
token = response.text.split('csrf-token" content="')[1].split('"')[0]
print(token)
print(response.cookies)
session.cookies.update(response.cookies)
url = "https://pave.niaid.nih.gov/api/sequence"
headers = {
'X-CSRF-TOKEN': token,
'Content-Type': 'application/json; charset=UTF-8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36'
}
params = {
"id_type": "genomes",
"output_format": "response",
"sequence_format": "gff3"
}
data = {
"id_list": ["HPV99REF"]
}
encoded_params = '?' + '&'.join([f"{k}={v}" for k,v in params.items()])
urlnew = url+encoded_params
print(urlnew)
response = session.post(
urlnew,
data=json.dumps(data),
# json=data,
headers=headers
)
sequence_data = response.json()['HPV99REF']
# print(sequence_data)
but the response is: `400 Bad Request`
I’ve already set the csrf-token and cookies, what am I missing with python requests?