From b950c0d70a1d2c95da8d60aca7af02bc50542a2b Mon Sep 17 00:00:00 2001 From: Stefan Israelsson Tampe Date: Wed, 5 Sep 2018 23:18:19 +0200 Subject: format strings added --- modules/language/python/module/urllib/request.py | 6 +++--- .../language/python/module/urllib/robotparser.py | 25 +++++++++++----------- 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'modules/language/python/module/urllib') diff --git a/modules/language/python/module/urllib/request.py b/modules/language/python/module/urllib/request.py index ff79318..d5372e5 100644 --- a/modules/language/python/module/urllib/request.py +++ b/modules/language/python/module/urllib/request.py @@ -82,7 +82,7 @@ f = urllib.request.urlopen('http://www.python.org/') # Possible extensions: # complex proxies XXX not sure what exactly was meant by this # abstract factory for opener - +pk(1) import base64 import bisect import email @@ -100,7 +100,7 @@ import collections import tempfile import contextlib import warnings - +pk(2) from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -109,7 +109,7 @@ from urllib.parse import ( splitattr, splitquery, splitvalue, splittag, to_bytes, unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook - +pk(3) # check for SSL try: import ssl diff --git a/modules/language/python/module/urllib/robotparser.py b/modules/language/python/module/urllib/robotparser.py index f110d80..cde8b47 100644 --- a/modules/language/python/module/urllib/robotparser.py +++ b/modules/language/python/module/urllib/robotparser.py @@ -1,4 +1,4 @@ -module(urllib.robotparser) +module(urllib,robotparser) """ robotparser.py @@ -13,8 +13,9 @@ module(urllib.robotparser) """ import collections -import urllib.parse -import urllib.request +import urllib.parse as uparse +import urllib.error as error +import urllib.request as request __all__ = ["RobotFileParser"] @@ -55,13 +56,13 @@ class RobotFileParser: def set_url(self, url): """Sets the URL referring to a robots.txt file.""" self.url = url - self.host, self.path = urllib.parse.urlparse(url)[1:3] + self.host, self.path = uparse.urlparse(url)[1:3] def read(self): """Reads the robots.txt URL and feeds it to the parser.""" try: - f = urllib.request.urlopen(self.url) - except urllib.error.HTTPError as err: + f = request.urlopen(self.url) + except error.HTTPError as err: if err.code in (401, 403): self.disallow_all = True elif err.code >= 400 and err.code < 500: @@ -112,7 +113,7 @@ class RobotFileParser: line = line.split(':', 1) if len(line) == 2: line[0] = line[0].strip().lower() - line[1] = urllib.parse.unquote(line[1].strip()) + line[1] = uparse.unquote(line[1].strip()) if line[0] == "user-agent": if state == 2: self._add_entry(entry) @@ -160,10 +161,10 @@ class RobotFileParser: return False # search for given user agent matches # the first match counts - parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) - url = urllib.parse.urlunparse(('','',parsed_url.path, + parsed_url = uparse.urlparse(uparse.unquote(url)) + url = uparse.urlunparse(('','',parsed_url.path, parsed_url.params,parsed_url.query, parsed_url.fragment)) - url = urllib.parse.quote(url) + url = uparse.quote(url) if not url: url = "/" for entry in self.entries: @@ -202,8 +203,8 @@ class RuleLine: if path == '' and not allowance: # an empty value means allow all allowance = True - path = urllib.parse.urlunparse(urllib.parse.urlparse(path)) - self.path = urllib.parse.quote(path) + path = uparse.urlunparse(uparse.urlparse(path)) + self.path = uparse.quote(path) self.allowance = allowance def applies_to(self, filename): -- cgit v1.2.3