in scrapy, process_request of custommiddleware(selenium) in middleware.py didnt work properly
for a while, My code couldn’t download images. But i find out the reason of malfunction. When I erase "process_request" of Seleniummiddleware in middlewares.py, finally i can download images. But I still need to use selenium, so I wanna know what’s wrong with process_request method.
here is seleniummiddleware.
class SeleniumMiddleware(object): @classmethod # must be written! def from_crawler(cls, crawler): middleware = cls() crawler.signals.connect(middleware.spider_opened, signals.spider_opened) crawler.signals.connect(middleware.spider_closed, signals.spider_closed) return middleware def spider_opened(self, spider): CHROMEDRIVER_PATH = './scraper/spiders/chromedriver' chrome_options = Options() chrome_options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36") browser = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH, chrome_options=chrome_options) self.driver = browser def spider_closed(self, spider): # pass self.driver.close() def process_request(self, request, spider): # self.driver.get('https://www.google.com') # make some cookies # self.driver.get('https://www.google.com/search?q=matchesfashion') # make some cookies self.driver.get(request.url) WebDriverWait(self.driver, 10).until(lambda driver: driver.execute_script('return document.readyState') == 'complete') # delay until driver receive everything body = to_bytes(text=self.driver.page_source) return HtmlResponse(url=request.url, body=body, encoding='utf-8', request=request) def process_response(self, request, response, spider): return response
which part should I fix or add for downloading images???