pyefun.seleniumUtil.browserUtil 源代码

from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import requests
from pyefun import *
import logging
import hashlib

logger = logging.getLogger()


[文档]def 取md5(内容): m = hashlib.md5(内容.encode("utf8")) return m.hexdigest()
[文档]def 浏览器_获取远程chrome(server_url="http://127.0.0.1:4444/wd/hub"): capabilities = DesiredCapabilities.CHROME.copy() capabilities['goog:chromeOptions'] = { "args": [ # "--headless", # "window-size=1366,768" "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36" ] } driver = webdriver.Remote( command_executor=server_url, desired_capabilities=capabilities ) return driver
[文档]def 浏览器_获取本地chrome(): opt = webdriver.ChromeOptions() # opt.add_argument('--headless') opt.add_argument('--disable-gpu') # opt.add_argument("blink-settings=imagesEnabled=false") opt.add_argument( "user-agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36'") # opt.add_argument('--proxy-server=http://%s' % "127.0.0.1:11111") driver = webdriver.Chrome(options=opt) return driver
[文档]def 浏览器_获取本地Firefox(): opt = webdriver.FirefoxOptions() opt.add_argument('--headless') opt.add_argument('--disable-gpu') opt.add_argument("blink-settings=imagesEnabled=false") opt.add_argument( "user-agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'") driver = webdriver.Firefox(options=opt) return driver
[文档]def 浏览器_是否就绪(远程浏览器地址="http://127.0.0.1:4444/wd/hub"): """ while 浏览器_是否就绪(远程浏览器地址) == False: 延时(1) print("浏览器未就绪") """ try: data = requests.get(远程浏览器地址 + "/status") json = data.json() ready = json['value']['ready'] except: return False return ready
[文档]def 浏览器窗口最大化(driver): try: driver.maximize_window() except WebDriverException as e: driver.set_window_size(1920, 1080) # 如果最大化失败,设置窗口大小为 1920*1080
[文档]def get_path_exists(path): for v in path: if os.path.exists(v): return v return ''
chromeDriverPath = "" chromeBinaryLocation = ""
[文档]def 浏览器_函数计算环境初始化(): if 系统_是否为window系统(): return False global chromeDriverPath global chromeBinaryLocation if (chromeDriverPath != ""): return "浏览器环境已加载" # chromeDriverPath = "/opt/chrome86/chromedriver" # chromeBinaryLocation = "/opt/chrome86/headless-chromium" # data = 运行(chromeDriverPath+" --version") # 调试驱动是否正常 # logger.info(data) # return data # return os.listdir("/opt/") # 查看文件是否部署到位 chromeDriverPath = get_path_exists([ "./chromedriver", "/opt/chromedriver", "./chrome86/chromedriver", "/opt/chrome86/chromedriver", "./chrome69/chromedriver", "/opt/chrome69/chromedriver", ]) chromeBinaryLocation = get_path_exists([ "./headless-chromium", "/opt/headless-chromium", "./chrome86/headless-chromium", "/opt/chrome86/headless-chromium", "./chrome69/headless-chromium", "/opt/chrome69/headless-chromium", ]) # print("chromeDriverPath:%s" % chromeDriverPath) # print("chromeBinaryLocation:%s " % chromeBinaryLocation) if chromeDriverPath == "" or chromeBinaryLocation == "": exit("浏览器或驱动不存在") if os.access(chromeDriverPath, os.X_OK) == False or os.access(chromeBinaryLocation, os.X_OK) == False: # 如果没有执行权限那么复制到/tmp目录赋予执行权限 chromeDriverPathTmp = "/tmp/chromedriver" chromeBinaryLocationTmp = "/tmp/headless-chromium" shutil.copyfile(chromeDriverPath, chromeDriverPathTmp) shutil.copyfile(chromeBinaryLocation, chromeBinaryLocationTmp) chromeDriverPath = chromeDriverPathTmp chromeBinaryLocation = chromeBinaryLocationTmp os.chmod(chromeDriverPath, 0o0700) os.chmod(chromeBinaryLocation, 0o0700) return "浏览器环境初始化完成"
[文档]def 输出(str): if 系统_是否为window系统(): print(str) else: logger.info(str)
[文档]def 浏览器_自动获取chrome(): global chromeDriverPath global chromeBinaryLocation if 系统_是否为window系统(): return 浏览器_获取本地chrome() """ 用方便地将chrome 部署到部署到云上 方便 selenium的使用 阿里云函数计算 腾讯云函数 linux docker中均可直接运行部署 """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--window-size=1366x768') # chrome_options.add_argument('--user-data-dir=/tmp/user-data') chrome_options.add_argument('--hide-scrollbars') # chrome_options.add_argument('--enable-logging') # chrome_options.add_argument('--log-level=0') chrome_options.add_argument('--single-process') # chrome_options.add_argument('--data-path=/tmp/data-path') chrome_options.add_argument('--ignore-certificate-errors') chrome_options.add_argument('blink-settings=imagesEnabled=false') # chrome_options.add_argument('--homedir=/tmp') # chrome_options.add_argument('--disk-cache-dir=/tmp/cache-dir') chrome_options.add_argument( 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36') chrome_options.binary_location = chromeBinaryLocation chrome = webdriver.Chrome(chromeDriverPath, options=chrome_options) return chrome
if __name__ == '__main__': pass # 远程浏览器地址 = "http://127.0.0.1:4444/wd/hub" # while 浏览器_是否就绪(远程浏览器地址) == False: # 延时(1) # print("浏览器未就绪") # # driver = 浏览器_获取远程chrome(远程浏览器地址) driver = 浏览器_获取本地chrome() data = driver.get("https://www.baidu.com") # print(data) # print(driver.find_element_by_xpath("//html").text) print(driver.find_elements_by_xpath("//html")[0].text) # print(driver.title) # print(driver.name) driver.quit()