import re from scrapy.linkextractors import LinkExtractor from scrapy.spiders import CrawlSpider, Rule class PingSpider(CrawlSpider): name = 'ping' def __init__(self, start_url): self.start_urls = [start_url] self.rules = ( Rule(LinkExtractor(allow='^'+re.escape(start_url))), Rule(LinkExtractor(tags='img', attrs='src', deny_extensions=set())), Rule(LinkExtractor(tags='link', attrs='href', deny_extensions=set())), ) super(PingSpider, self).__init__()