|
@@ -6,21 +6,21 @@ from scrapy.spiders import CrawlSpider, Rule
|
|
|
|
|
|
class PingSpider(CrawlSpider):
|
|
|
|
|
|
- name = 'ping'
|
|
|
+ name = "ping"
|
|
|
|
|
|
custom_settings = {
|
|
|
|
|
|
- 'DOWNLOAD_TIMEOUT': 30,
|
|
|
+ "DOWNLOAD_TIMEOUT": 30,
|
|
|
|
|
|
|
|
|
- 'DOWNLOAD_HANDLERS': {'s3': None},
|
|
|
+ "DOWNLOAD_HANDLERS": {"s3": None},
|
|
|
}
|
|
|
|
|
|
def __init__(self, start_url):
|
|
|
self.start_urls = [start_url]
|
|
|
self.rules = (
|
|
|
- Rule(LinkExtractor(allow='^'+re.escape(start_url))),
|
|
|
- Rule(LinkExtractor(tags='img', attrs='src', deny_extensions=set())),
|
|
|
- Rule(LinkExtractor(tags='link', attrs='href', deny_extensions=set())),
|
|
|
+ Rule(LinkExtractor(allow="^" + re.escape(start_url))),
|
|
|
+ Rule(LinkExtractor(tags="img", attrs="src", deny_extensions=set())),
|
|
|
+ Rule(LinkExtractor(tags="link", attrs="href", deny_extensions=set())),
|
|
|
)
|
|
|
super(PingSpider, self).__init__()
|