python requests.post responding “400 Bad Request”

40 viewsajaxpythonpython requestsweb crawler
0

I’m trying to download data from this url with python: https://pave.niaid.nih.gov/locus_viewer?seq_id=HPV99REF

I see the ajax query is:

<script>
  const seqID = "HPV99REF";
  const structureUrl = "/api/most_related_structure";
  
  $(document).ready(function() {
    var genomeData;
    $.ajaxSetup({
        headers: { 'X-CSRF-TOKEN': $('meta[name="csrf-token"]').attr('content') }
    });
    $.ajax("/api/genome/HPV99REF", {
      type: 'GET',
      contentType: 'application/json; charset=UTF-8',
      tryCount : 0,
      retryLimit : 3,
      success: function(_res, _status, _xhr) {
        genomeData = _res;
        //console.log(genomeData);
        populatePageWithGenomeData(genomeData, "");
        
        if (genomeData.host.scientific_name.toLowerCase() === "homo sapiens") {
            $('#orig_seq_link').html("<a href="/explore/reference_genomes/human_genomes">Original submitted sequence</a>");
        } else {
            $('#orig_seq_link').html("<a href="/explore/reference_genomes/animal_genomes">Original submitted sequence</a>");
        }
        
      },
      error: function(response) {
          //console.log(response);
          if (response.status == 400 && this.tryCount <= this.retryLimit) {
              this.tryCount++;
              $.ajax(this);
          } else {
              window.location.href = '/ajax_500';
          }
      }
    });
    // populate the genbank, gff3, fasta panes
    // common query string parameters for all API calls
    let params = {
      id_type: "genomes",
      output_format: "response"
    };
    let formats = {"genbank": "gb","fasta": "fa", "gff3": "gff"};
    // populate panes with each format
    for (const format of Object.keys(formats)) {
      params["sequence_format"] = format;
      $.ajax("/api/sequence" + "?" + compileQueryString(params), {
        type: 'POST',
        contentType: 'application/json; charset=UTF-8',
        data: JSON.stringify({
          id_list: ["HPV99REF"]
        }),
        success: function(_res, _status, _xhr) {
          $('#' + format + '-viewer-text').html(_res["HPV99REF"].replace(/(?:rn|r|n)/g, '<br>').replace(/s/g, '&nbsp;'));
          // handler for download button
          $('#download'+ format.charAt(0).toUpperCase() + format.slice(1)).on('click', function(event) {
              event.stopPropagation(); //so we don't trigger the SVG download for some reason
              var blob = new Blob([_res["HPV99REF"]],{type:"text/plain;charset=utf-8"});
              saveAs(blob,"HPV99REF."+formats[format]);
          });
        },
        error: function(_res) {
          console.log("Error retrieving " + format + " download");
          console.log(_res);
        }
      });
    }
    // handler for drop down viewer selection
    $('#displayFormatSelect').change(function() {
      $('#locus-viewer, #genbank-viewer, #gff3-viewer, #fasta-viewer').hide();
      const selectedText = $('#displayFormatSelect option:selected').text();
      if (selectedText === "Locus View") {$('#locus-viewer').show();}
      else if (selectedText === "GenBank") {$('#genbank-viewer').show();}
      else if (selectedText === "Fasta") {$('#fasta-viewer').show();}
      else if (selectedText === "GFF3") {$('#gff3-viewer').show();}
    });
  });
</script>

so I tried to download the data with python:

import requests
import json

session = requests.Session()
response = session.get("https://pave.niaid.nih.gov/locus_viewer?seq_id=HPV99REF")
token = response.text.split('csrf-token" content="')[1].split('"')[0] 
print(token)
print(response.cookies)

session.cookies.update(response.cookies)

url = "https://pave.niaid.nih.gov/api/sequence"
headers = {
    'X-CSRF-TOKEN': token,
    'Content-Type': 'application/json; charset=UTF-8',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.80 Safari/537.36' 
}

params = {
  "id_type": "genomes",
  "output_format": "response",
  "sequence_format": "gff3" 
}

data = {
  "id_list": ["HPV99REF"]
}

encoded_params = '?' + '&'.join([f"{k}={v}" for k,v in params.items()]) 
urlnew = url+encoded_params
print(urlnew)

response = session.post(
    urlnew,
    data=json.dumps(data),
#     json=data,
    headers=headers
)

sequence_data = response.json()['HPV99REF']
# print(sequence_data)

but the response is: `400 Bad Request`

I’ve already set the csrf-token and cookies, what am I missing with python requests?