summaryrefslogtreecommitdiff
path: root/modules/language/python/module/urllib/robotparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/language/python/module/urllib/robotparser.py')
-rw-r--r--modules/language/python/module/urllib/robotparser.py25
1 files changed, 13 insertions, 12 deletions
diff --git a/modules/language/python/module/urllib/robotparser.py b/modules/language/python/module/urllib/robotparser.py
index f110d80..cde8b47 100644
--- a/modules/language/python/module/urllib/robotparser.py
+++ b/modules/language/python/module/urllib/robotparser.py
@@ -1,4 +1,4 @@
-module(urllib.robotparser)
+module(urllib,robotparser)
""" robotparser.py
@@ -13,8 +13,9 @@ module(urllib.robotparser)
"""
import collections
-import urllib.parse
-import urllib.request
+import urllib.parse as uparse
+import urllib.error as error
+import urllib.request as request
__all__ = ["RobotFileParser"]
@@ -55,13 +56,13 @@ class RobotFileParser:
def set_url(self, url):
"""Sets the URL referring to a robots.txt file."""
self.url = url
- self.host, self.path = urllib.parse.urlparse(url)[1:3]
+ self.host, self.path = uparse.urlparse(url)[1:3]
def read(self):
"""Reads the robots.txt URL and feeds it to the parser."""
try:
- f = urllib.request.urlopen(self.url)
- except urllib.error.HTTPError as err:
+ f = request.urlopen(self.url)
+ except error.HTTPError as err:
if err.code in (401, 403):
self.disallow_all = True
elif err.code >= 400 and err.code < 500:
@@ -112,7 +113,7 @@ class RobotFileParser:
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
- line[1] = urllib.parse.unquote(line[1].strip())
+ line[1] = uparse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
self._add_entry(entry)
@@ -160,10 +161,10 @@ class RobotFileParser:
return False
# search for given user agent matches
# the first match counts
- parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url))
- url = urllib.parse.urlunparse(('','',parsed_url.path,
+ parsed_url = uparse.urlparse(uparse.unquote(url))
+ url = uparse.urlunparse(('','',parsed_url.path,
parsed_url.params,parsed_url.query, parsed_url.fragment))
- url = urllib.parse.quote(url)
+ url = uparse.quote(url)
if not url:
url = "/"
for entry in self.entries:
@@ -202,8 +203,8 @@ class RuleLine:
if path == '' and not allowance:
# an empty value means allow all
allowance = True
- path = urllib.parse.urlunparse(urllib.parse.urlparse(path))
- self.path = urllib.parse.quote(path)
+ path = uparse.urlunparse(uparse.urlparse(path))
+ self.path = uparse.quote(path)
self.allowance = allowance
def applies_to(self, filename):