HTTP请求 服务器默认GBK编码时 浏览器UTF-8编码 如果请求时不指定编码格式就会乱码
python3 模拟POST请求时 “Content-Type”指定编码会消失的问题
如下代码所示
在外部指定的”Content-Type”=”application/x-www-form-urlencoded; charset=UTF-8” 但是用抓包后返现charset=UTF-8消失了变成了 “Content-Type”=”application/x-www-form-urlencoded; ” 导致服务器用GBK解码,致使乱码。
import http.cookiejar
import urllib.request
import urllib.parse
def ungzip(data):
try:
data = gzip.decompress(data)
except Exception as e:
pass # print('未经压缩, 无需解压')
return data
def getOpener(head):
""" deal with the Cookies """
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key, value in head.items():
elem = (key, value)
header.append(elem)
opener.addheaders = header
return opener
header={
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
# ... other header
}
opener = getOpener(header)
postDict={
"sql": "select '你好' from dual",
"pageNum": "1",
"pageSize": "100"
}
postData = urllib.parse.urlencode(postDict).encode()
op = opener.open(url, postData, timeout=timeout)
response = op.read()
response = ungzip(response)
response = response.decode()
response = json.loads(response)
分析调试发现 上述方式指定的”Content-Type” 会被默认值取代 “application/x-www-form-urlencoded;”
具体源码分析如下 urllib.request.AbstractHTTPHandler.do_request_()
def do_request_(self, request):
host = request.host
if not host:
raise URLError('no host given')
if request.data is not None: # POST
data = request.data
if isinstance(data, str):
msg = "POST data should be bytes or an iterable of bytes. " \
"It cannot be of type str."
raise TypeError(msg)
# label_A 不是直接在给request赋值过Content-type 这地方会设置默认值
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
try:
mv = memoryview(data)
except TypeError:
if isinstance(data, collections.Iterable):
raise ValueError("Content-Length should be specified "
"for iterable data of type %r %r" % (type(data),
data))
else:
request.add_unredirected_header(
'Content-length', '%d' % (len(mv) * mv.itemsize))
sel_host = host
if request.has_proxy():
scheme, sel = splittype(request.selector)
sel_host, sel_path = splithost(sel)
if not request.has_header('Host'):
request.add_unredirected_header('Host', sel_host)
for name, value in self.parent.addheaders:
name = name.capitalize()
# 如果是POST请求 并走到了上面的label_A处 这地方就不会重新设置Content-type
if not request.has_header(name):
request.add_unredirected_header(name, value)
return request
解决方案
方案1、把源码中的 if not request.has_header(name) 判断去掉就OK 啦
方案2、因为opener.open中的url可以是字符串也可以是Request对象, 直接传入携带header的对象Request就ok啦
#op = opener.open(url, postData, timeout=timeout)
op = opener.open(urllib.request.Request(url, headers=header), postData, timeout=timeout)
更多推荐
python3 模拟POST请求时 “Content-Type”指定编码会消失的问题
发布评论