Selenium/PhantomJS Utility Class

The below Python class is referenced by both the web scraper and the web crawler.

import uuid
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

class SeleniumCrawler:
    def __init__(self):
        DesiredCapabilities.PHANTOMJS['phantomjs.page.settings.userAgent'] = \n            'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0'

        self.browser = webdriver.PhantomJS(executable_path='/usr/local/lib/node_modules/phantomjs/lib/phantom/bin/phantomjs')
        self.browser.set_window_size(1920, 1080)

    def navigate(self, url):
        self.browser.get(url)
        self.setup_scripts()
        return self.browser

    def setup_scripts(self):
        self.run_multiple_js(['lodash.min.js', 'jquery-2.1.4.min.js', 'whitebg.js'])

    def run_js(self, js_path):
        with open(js_path, 'r') as js_file:
            js_text = js_file.read()
            return self.browser.execute_script(js_text + '; return window.WebScrapeNS.data;')

    def run_multiple_js(self, js_paths):
        results = []
        for js_path in js_paths:
            results.append(self.run_js(js_path))
        return results

    def screenshot(self):
        file_name = str(uuid.uuid4()) + '.png'
        self.browser.save_screenshot('screenshots/' + file_name)

Comments

Leave a comment

What color are brown eyes? (spam prevention)
Submit
Code under MIT License unless otherwise indicated.
© 2020, Downranked, LLC.