如何在python中获取会话cookie(How to get a session cookie in python)

我正在尝试使用python从站点下载文件，只能在登录后下载，这似乎工作正常！但是当我真正尝试下载文件时，我只收到一个文本文件，说我必须登录。我相信我需要获取PHPSESSID cookie并使用它，但无法弄清楚如何做到这一点。这是我的代码：

from BeautifulSoup import BeautifulSoup import re import requests import sys class LegendasTV(object): URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1' URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s' URL_LOGIN = 'http://legendas.tv/login' def __init__(self, usuario, senha): self.usuario = usuario self.senha = senha self.cookie = None self._login() def _login(self): s = requests.Session() url = self.URL_LOGIN payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"} r = s.post(url, payload) html = r.content if "<title>Login - Legendas TV</title>" in html: return 0 else: print 'Success login!' return 1 def _request(self, url, method='GET', data=None): if method == 'GET': r = requests.get(url, stream=True) if method == 'POST' and data: r = requests.post(url, data=data) return r def search(self, q, lang='pt-br', tipo='release'): if not q: pass # raise exception if not lang or not self.LEGENDA_LANG.get(lang): pass # raise exception if not tipo or not self.LEGENDA_TIPO.get(tipo): pass # raise exception busca = { 'txtLegenda': q, 'int_idioma': self.LEGENDA_LANG[lang], 'selTipo': self.LEGENDA_TIPO[tipo] } r = self._request(self.URL_BUSCA % q, method='POST', data=busca) if r: legendas = self._parser(r.text) else: pass # raise exception return legendas def _parser(self, data): legendas = [] html = BeautifulSoup(data) results = html.findAll("a") for result in results: if result.get("href") is not None and "S09E16" in result.get("href"): path_href = result.get("href").split("/") unique_id_download = path_href[2] url = self.URL_DOWNLOAD % unique_id_download def download(self, url_da_legenda): r = self._request(url_da_legenda) if r: with open("teste.rar", 'wb') as handle: print u'Baixando legenda:', url_da_legenda handle.write(r.content)

以下是我尝试使用代码下载一个文件的方法：

$ python Python 2.7.6 (default, Jun 22 2015, 17:58:13) [GCC 4.8.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> >>> from download_legenda import * >>> legendas_tv = LegendasTV("Login", "Pass") Success login! >>> >>> legendas_tv.download("http://legendas.tv/downloadarquivo/56c76ce239291") Baixando legenda: http://legendas.tv/downloadarquivo/56c76ce239291 >>>

我将不胜感激任何帮助。

I'm trying use python to download a file from a site, which can only be downloaded after you login, which seems to be working OK! But when I actually try to download the file, I only receive a text file saying I must log in. I believe I need to get the PHPSESSID cookie and use it, but can't figure out how to do it. Here is my code:

from BeautifulSoup import BeautifulSoup import re import requests import sys class LegendasTV(object): URL_BUSCA = 'http://legendas.tv/legenda/busca/%s/1' URL_DOWNLOAD = 'http://legendas.tv/downloadarquivo/%s' URL_LOGIN = 'http://legendas.tv/login' def __init__(self, usuario, senha): self.usuario = usuario self.senha = senha self.cookie = None self._login() def _login(self): s = requests.Session() url = self.URL_LOGIN payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"} r = s.post(url, payload) html = r.content if "<title>Login - Legendas TV</title>" in html: return 0 else: print 'Success login!' return 1 def _request(self, url, method='GET', data=None): if method == 'GET': r = requests.get(url, stream=True) if method == 'POST' and data: r = requests.post(url, data=data) return r def search(self, q, lang='pt-br', tipo='release'): if not q: pass # raise exception if not lang or not self.LEGENDA_LANG.get(lang): pass # raise exception if not tipo or not self.LEGENDA_TIPO.get(tipo): pass # raise exception busca = { 'txtLegenda': q, 'int_idioma': self.LEGENDA_LANG[lang], 'selTipo': self.LEGENDA_TIPO[tipo] } r = self._request(self.URL_BUSCA % q, method='POST', data=busca) if r: legendas = self._parser(r.text) else: pass # raise exception return legendas def _parser(self, data): legendas = [] html = BeautifulSoup(data) results = html.findAll("a") for result in results: if result.get("href") is not None and "S09E16" in result.get("href"): path_href = result.get("href").split("/") unique_id_download = path_href[2] url = self.URL_DOWNLOAD % unique_id_download def download(self, url_da_legenda): r = self._request(url_da_legenda) if r: with open("teste.rar", 'wb') as handle: print u'Baixando legenda:', url_da_legenda handle.write(r.content)

and here is how I am trying to use the code to download one file:

$ python Python 2.7.6 (default, Jun 22 2015, 17:58:13) [GCC 4.8.2] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> >>> from download_legenda import * >>> legendas_tv = LegendasTV("Login", "Pass") Success login! >>> >>> legendas_tv.download("http://legendas.tv/downloadarquivo/56c76ce239291") Baixando legenda: http://legendas.tv/downloadarquivo/56c76ce239291 >>>

I would appreciate any help.

最满意答案

在这个答案的帮助下，我终于明白了！

https://stackoverflow.com/a/12737874/1718174

我试图直接使用cookies，但似乎会话已经完成了繁重的部分，并为我们处理。以下是我的代码需要更新的部分：

def _login(self): s = requests.Session() url = self.URL_LOGIN payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"} r = s.post(url, payload) html = r.content if "<title>Login - Legendas TV</title>" in html: return 0 else: print 'Success on login!' self.session = s return 1 def _request(self, url, method='GET', data=None): if self.session: if method == 'GET': r = self.session.get(url, cookies=self.cookie, stream=True) if method == 'POST' and data: r = self.session.post(url, data=data, cookies=self.cookie) return r

With the help of this answer I finally figured it out!

https://stackoverflow.com/a/12737874/1718174

I was trying to use cookies directly, but seems session already does the heavy-lifting part and take care of that for us. Below are the parts that needed to be updated on my code:

def _login(self): s = requests.Session() url = self.URL_LOGIN payload = {'data[User][username]': self.usuario, 'data[User][password]': self.senha, "data[lembrar]": "on"} r = s.post(url, payload) html = r.content if "<title>Login - Legendas TV</title>" in html: return 0 else: print 'Success on login!' self.session = s return 1 def _request(self, url, method='GET', data=None): if self.session: if method == 'GET': r = self.session.get(url, cookies=self.cookie, stream=True) if method == 'POST' and data: r = self.session.post(url, data=data, cookies=self.cookie) return r

更多推荐

如何在python中获取会话cookie(How to get a session cookie in python)

最满意答案

发布评论取消回复

最近发表

热门文章

标签列表