HTTP Additional Coverage

In [2]:
import requests
In [1]:
import os
import os.path
import sys
import importlib

if os.path.isdir(os.path.join("../../..", "modules")):
    module_dir = os.path.join("../../..", "modules")
else:
    module_dir = os.path.join("../..", "modules")

module_path = os.path.abspath(module_dir)
if not module_path in sys.path:
    sys.path.append(module_path)

import mysocket as sock
importlib.reload(sock)

import util
importlib.reload(util)
Out[1]:
<module 'util' from '/Users/tcbressoud/Dropbox/cs181-DataSystems/cs181-bressoud/f20_class/modules/util.py'>

Query String Examples

Important for controlling requests to be used in Web Scraping and in API requests for data.

Standalone Query String URL Encoding

In [3]:
import urllib.parse

qstring_value = 'Hellö Wörld@Python?'
urllib.parse.quote(qstring_value)
Out[3]:
'Hell%C3%B6%20W%C3%B6rld%40Python%3F'
In [4]:
urllib.parse.quote_plus(qstring_value)
Out[4]:
'Hell%C3%B6+W%C3%B6rld%40Python%3F'
In [11]:
paramD = {'q': 'Python URL encoding', 'as_sitesearch': 'www.urlencoder.io', 'foo': 'Hellö Wörld@Python?'}
urllib.parse.urlencode(paramD)
Out[11]:
'q=Python+URL+encoding&as_sitesearch=www.urlencoder.io&foo=Hell%C3%B6+W%C3%B6rld%40Python%3F'

Query String URL Encoding as Part of requests

Separate Generation of Request from Issue and Generation of Response

In [17]:
location = "httpbin.org"
resource = "/get"
url = util.buildURL(resource, location, protocol="https")
print(url)

paramD = {'q': 'Python URL encoding', 'foo': '?@/'}
request = requests.Request("GET", url, params=paramD)
prepared = request.prepare()
https://httpbin.org/get
In [19]:
print(prepared.path_url)
/get?q=Python+URL+encoding&foo=%3F%40%2F
In [20]:
s = requests.Session()
response = s.send(prepared)
print(response.status_code)
print(response.json())
200
{'args': {'foo': '?@/', 'q': 'Python URL encoding'}, 'headers': {'Accept-Encoding': 'identity', 'Host': 'httpbin.org', 'X-Amzn-Trace-Id': 'Root=1-5fa15908-5aefc3be1673c7e2092570a5'}, 'origin': '65.185.170.47', 'url': 'https://httpbin.org/get?q=Python+URL+encoding&foo=%3F%40%2F'}

Simple Interface

In [21]:
location = "httpbin.org"
resource = "/get"
url = util.buildURL(resource, location, protocol="https")

paramD = {'q': 'Python URL encoding', 'foo': '?@/'}
response = requests.get(url, params=paramD)
print(response.status_code)
print(response.json())
200
{'args': {'foo': '?@/', 'q': 'Python URL encoding'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.24.0', 'X-Amzn-Trace-Id': 'Root=1-5fa1599f-4ff272b71fa01dab6a191375'}, 'origin': '65.185.170.47', 'url': 'https://httpbin.org/get?q=Python+URL+encoding&foo=%3F%40%2F'}

POST Examples

Socket

In [48]:
import json

data_string = """["foo", "bar", {"a": 1, "b": 2}]"""

request_line = 'POST /post?c=foo&d=baz HTTP/1.1\r\n'
host_line = 'Host: httpbin.org\r\n'
one_and_done = 'Connection: close\r\n'
content = 'Content-Type: application/json\r\n'
agent = 'User-Agent: curl/7.71.1\r\n'
empty_line = '\r\n'

request_message = request_line + host_line + \
                  one_and_done + content + agent + empty_line + data_string
In [49]:
print(request_message)
POST /post?c=foo&d=baz HTTP/1.1
Host: httpbin.org
Connection: close
Content-Type: application/json
User-Agent: curl/7.71.1

["foo", "bar", {"a": 1, "b": 2}]
In [50]:
connection = sock.makeConnection("httpbin.org", 80)
sock.sendString(connection, request_message)
reply = sock.receiveTillClose(connection)
connection.close()
print(reply)
HTTP/1.1 200 OK
Date: Tue, 03 Nov 2020 14:07:11 GMT
Content-Type: application/json
Content-Length: 384
Connection: close
Server: gunicorn/19.9.0
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true

{
  "args": {
    "c": "foo", 
    "d": "baz"
  }, 
  "data": "", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Content-Type": "application/json", 
    "Host": "httpbin.org", 
    "User-Agent": "curl/7.71.1", 
    "X-Amzn-Trace-Id": "Root=1-5fa1640f-032dd4396d74f6f451eed86b"
  }, 
  "json": null, 
  "origin": "65.185.170.47", 
  "url": "http://httpbin.org/post?c=foo&d=baz"
}

In [51]:
formD = {'q': 'Python URL encoding', 'foo': '?@/'}
data_string = urllib.parse.urlencode(formD)

request_line = 'POST /post?c=foo&d=baz HTTP/1.1\r\n'
host_line = 'Host: httpbin.org\r\n'
one_and_done = 'Connection: close\r\n'
empty_line = '\r\n'

request_message = request_line + host_line + \
                  one_and_done + empty_line + data_string
print(request_message)
POST /post?c=foo&d=baz HTTP/1.1
Host: httpbin.org
Connection: close

q=Python+URL+encoding&foo=%3F%40%2F
In [52]:
connection = sock.makeConnection("httpbin.org", 80)
sock.sendString(connection, request_message)
reply = sock.receiveTillClose(connection)
connection.close()
print(reply)
HTTP/1.1 200 OK
Date: Tue, 03 Nov 2020 14:07:16 GMT
Content-Type: application/json
Content-Length: 309
Connection: close
Server: gunicorn/19.9.0
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true

{
  "args": {
    "c": "foo", 
    "d": "baz"
  }, 
  "data": "", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Host": "httpbin.org", 
    "X-Amzn-Trace-Id": "Root=1-5fa16414-15485d3b373da44679321001"
  }, 
  "json": null, 
  "origin": "65.185.170.47", 
  "url": "http://httpbin.org/post?c=foo&d=baz"
}

curl

In [35]:
!curl -X POST -s -v -T postdata.json "http://httpbin.org/post"
*   Trying 18.208.255.250:80...
* Connected to httpbin.org (18.208.255.250) port 80 (#0)
> POST /post HTTP/1.1
> Host: httpbin.org
> User-Agent: curl/7.71.1
> Accept: */*
> Content-Length: 33
> Expect: 100-continue
> 
* Mark bundle as not supporting multiuse
< HTTP/1.1 100 Continue
* We are completely uploaded and fine
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< Date: Tue, 03 Nov 2020 14:02:36 GMT
< Content-Type: application/json
< Content-Length: 454
< Connection: keep-alive
< Server: gunicorn/19.9.0
< Access-Control-Allow-Origin: *
< Access-Control-Allow-Credentials: true
< 
{
  "args": {}, 
  "data": "[\"foo\", \"bar\", {\"a\": 1, \"b\": 2}]\n", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Content-Length": "33", 
    "Host": "httpbin.org", 
    "User-Agent": "curl/7.71.1", 
    "X-Amzn-Trace-Id": "Root=1-5fa162fc-3da6d6b702bc4d2b73bc5b8a"
  }, 
  "json": [
    "foo", 
    "bar", 
    {
      "a": 1, 
      "b": 2
    }
  ], 
  "origin": "65.185.170.47", 
  "url": "http://httpbin.org/post"
}
* Connection #0 to host httpbin.org left intact
In [54]:
!curl -X POST -s -v -d year=2001 -d newYear='Get+different+year' "http://httpbin.org/post?a=1&b=2"
*   Trying 35.170.225.136:80...
* Connected to httpbin.org (35.170.225.136) port 80 (#0)
> POST /post?a=1&b=2 HTTP/1.1
> Host: httpbin.org
> User-Agent: curl/7.71.1
> Accept: */*
> Content-Length: 36
> Content-Type: application/x-www-form-urlencoded
> 
* upload completely sent off: 36 out of 36 bytes
* Mark bundle as not supporting multiuse
< HTTP/1.1 200 OK
< Date: Tue, 03 Nov 2020 14:10:14 GMT
< Content-Type: application/json
< Content-Length: 504
< Connection: keep-alive
< Server: gunicorn/19.9.0
< Access-Control-Allow-Origin: *
< Access-Control-Allow-Credentials: true
< 
{
  "args": {
    "a": "1", 
    "b": "2"
  }, 
  "data": "", 
  "files": {}, 
  "form": {
    "newYear": "Get different year", 
    "year": "2001"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Content-Length": "36", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "curl/7.71.1", 
    "X-Amzn-Trace-Id": "Root=1-5fa164c6-00737d77087f315908fcd3cc"
  }, 
  "json": null, 
  "origin": "65.185.170.47", 
  "url": "http://httpbin.org/post?a=1&b=2"
}
* Connection #0 to host httpbin.org left intact

requests

In [63]:
location = "httpbin.org"
resource = "/post"

url = util.buildURL(resource, location, protocol="https")

paramD = {'a': 'foo', 'b': 'bar'}
formD = {'q': 'Python URL encoding', 'foo': '?@/'}
headerD = {'Accept': 'application/json'}
In [64]:
request = requests.Request("POST", url, params=paramD, data=formD, headers=headerD)
prepared = request.prepare()
In [65]:
print("URI:", prepared.path_url)
print("Request Headers:", prepared.headers)
print("Body:", prepared.body)
URI: /post?a=foo&b=bar
Request Headers: {'Accept': 'application/json', 'Content-Length': '35', 'Content-Type': 'application/x-www-form-urlencoded'}
Body: q=Python+URL+encoding&foo=%3F%40%2F
In [66]:
response = requests.post(url, params=paramD, data=formD, headers=headerD)
print(response.status_code)
print(response.json())
200
{'args': {'a': 'foo', 'b': 'bar'}, 'data': '', 'files': {}, 'form': {'foo': '?@/', 'q': 'Python URL encoding'}, 'headers': {'Accept': 'application/json', 'Accept-Encoding': 'gzip, deflate', 'Content-Length': '35', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.24.0', 'X-Amzn-Trace-Id': 'Root=1-5fa16835-44b7b2dc5305bddb5d134a7d'}, 'json': None, 'origin': '65.185.170.47', 'url': 'https://httpbin.org/post?a=foo&b=bar'}

Encoding -- Characters to Bytes

In [68]:
mystring = 'Hellö Wörld@Python?'
mybytes8 = mystring.encode('UTF-8')
mybytes16 = mystring.encode('UTF-16BE')
In [69]:
len(mystring)
Out[69]:
19
In [70]:
len(mybytes8)
Out[70]:
21
In [71]:
len(mybytes16)
Out[71]:
38
In [72]:
type(mybytes8)
Out[72]:
bytes
In [73]:
mybytes8.hex()
Out[73]:
'48656c6cc3b62057c3b6726c6440507974686f6e3f'
In [74]:
mybytes8
Out[74]:
b'Hell\xc3\xb6 W\xc3\xb6rld@Python?'
In [ ]:
mybytes8.decode()