|
@@ -0,0 +1,17 @@
|
|
|
+import re
|
|
|
+
|
|
|
+from scrapy.linkextractors import LinkExtractor
|
|
|
+from scrapy.spiders import CrawlSpider, Rule
|
|
|
+
|
|
|
+
|
|
|
+class PingSpider(CrawlSpider):
|
|
|
+ name = 'ping'
|
|
|
+
|
|
|
+ def __init__(self, start_url):
|
|
|
+ self.start_urls = [start_url]
|
|
|
+ self.rules = (
|
|
|
+ Rule(LinkExtractor(allow='^'+re.escape(start_url))),
|
|
|
+ Rule(LinkExtractor(tags='img', attrs='src', deny_extensions=set())),
|
|
|
+ Rule(LinkExtractor(tags='link', attrs='href', deny_extensions=set())),
|
|
|
+ )
|
|
|
+ super(PingSpider, self).__init__()
|