Google 浏览器驱动程序下载地址
# 一、安装
# 二、常用
| from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| from selenium.webdriver import ActionChains |
| from selenium.webdriver.common.keys import Keys |
| from selenium.webdriver.support.ui import WebDriverWait |
| from selenium.webdriver.support import expected_conditions as EC |
| CHROME_DRIVER = r'D:\software\anaconda3\Scripts\chromedriver.exe' |
| chrome_options = webdriver.ChromeOptions() |
| |
| |
| chrome_options.add_argument("window-size=1366,768") |
| |
| chrome_options.add_argument("-no-sandbox") |
| |
| |
| chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) |
| chrome_options.add_experimental_option('prefs', { |
| "download.default_directory": config.wgbgd_save, |
| "download.prompt_for_download": False, |
| "download.directory_upgrade": True, |
| "plugins.always_open_pdf_externally": True |
| }) |
| driver = webdriver.Chrome(CHROME_DRIVER, chrome_options=chrome_options) |
| wait = WebDriverWait(driver, 40) |
| |
| wait.until(EC.presence_of_element_located((By.ID, 'cardLogin'))) |
| wait.until(EC.element_to_be_clickable((By.ID, 'cusAdvancedQuery'))) |
| |
| wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '*'))) |
| |
| driver.get(config.bgd_url) |
| handles = driver.window_handles |
| driver.switch_to.window(handles[-1]) |
| |
| driver.close() |
# 三、入门
| from selenium import webdriver |
| from selenium.webdriver.common.keys import Keys |
| |
| |
| |
| chrome_options = webdriver.ChromeOptions() |
| |
| |
| |
| path = r"D:\software\anaconda3\Scripts\chromedriver.exe" |
| |
| driver = webdriver.Chrome(executable_path=path, options=opt) |
| driver.get('http://ww.baidu.com/') |
| print(driver.page_source) |
# 3.1 查找元素
查找以下元素: <input type="text" name="passwd" id="passwd-id" />
| element = driver.find_element_by_id("passwd-id") |
| element = driver.find_element_by_name("passwd") |
| element = driver.find_element_by_xpath("//input[@id='passwd-id']") |
| |
| find_element_by_link_text |
| find_element_by_partial_link_text |
| find_element_by_tag_name |
| find_element_by_class_name |
| find_element_by_css_selector |
# 3.2.1 获取元素的属性值
| element.get_attribute("name") |
# 3.2.2 获取 css 属性值
| element.value_of_css_property('display') |
# 3.3 弹出对话框处理,弹窗
| alert = driver.switch_to_alert() |
| |
| alert = driver.switch_to.alert() |
| |
| alert.accept() |
这个接口对 alert, confirm, prompt 对话框效果相同
# 3.4 访问浏览器历史记录
driver.get("http://www.example.com")
在浏览历史中前进和后退你可以使用:
| driver.forward() |
| driver.back() |
# 3.5 获取当前窗口的截图
| from selenium import webdriver |
| |
| driver = webdriver.Firefox() |
| driver.get('http://www.python.org/') |
| driver.save_screenshot('screenshot.png') |
| driver.quit() |
# 3.5 避免自动操作检测
| driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { |
| "source": """ |
| Object.defineProperty(navigator, 'webdriver', { |
| get: () => undefined |
| }) |
| """ |
| }) |
# 3.7 窗口最大化
# 3.8 切换 iframe
| iframe = driver.find_element_by_id('menu') |
| driver.switch_to.frame(iframe) |
# 3.8.1 通过元素的 id 或 name 属性切换
| driver,switch_to_frame('') |
# 3.8.2 通过元素索引切换
#通过索引切换 注意索引从 0 开始 注意 只有同级可以使用 索引切换 如果是嵌套的 frame 是不能使用索引切换的
| driver.switch_to.frame(1) |
# 3.8.3 通过定位元素的方式切换
| driver.switch_to.frame(driver.find_elements_by_tag_name("iframe")[0]) |
# 3.8.4 切换嵌套 iframe
同上 可以先切换到父 iframe 然后再切换到子 iframe
# 3.8.5 从子 iframe 切换到父级 iframe
driver.switch_to.parent_frame()
# 3.8.6 切换回主文档 ,即跳出所有 iframe
| driver.switch_to.default_content() |
————————————————
版权声明:本文为 CSDN 博主「词穷墨尽」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/weixin_41167150/article/details/109522685
# 四、等待
显式等待会让 WebDriver 等待满足一定的条件以后再进一步的执行。
隐式等待设置一个全局的最长等待时间,在查找元素时等待一段时间,如果元素在这个时间内找到了就立即执行操作,如果超过了这个时间还没有找到元素,则抛出异常。
# 4.1 显式等待
| from selenium import webdriver |
| from selenium.webdriver.common.by import By |
| from selenium.webdriver.support.ui import WebDriverWait |
| from selenium.webdriver.support import expected_conditions as EC |
| |
| driver = webdriver.Firefox() |
| driver.get("http://somedomain/url_that_delays_loading") |
| try: |
| |
| element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "myDynamicElement"))) |
| |
| element = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'cusAdvancedQuery'))) |
| finally: |
| driver.quit() |
等待 10s,如果 10s 内 id=myDynamicElement 的元素加载成功,则直接进入下一步。
# 4.2 隐式等待
| from selenium import webdriver |
| |
| driver = webdriver.Firefox() |
| driver.implicitly_wait(10) |
| driver.get("http://somedomain/url_that_delays_loading") |
| myDynamicElement = driver.find_element_by_id("myDynamicElement") |
隐式等待可以理解成在规定的时间范围内,浏览器在不停的刷新页面,直到找到相关元素或者时间结束。
# 关闭
关闭浏览器窗口,可以使用 quit 方法代替 close 方法, quit 将关闭整个浏览器,而 close 只会关闭一个标签页, 如果你只打开了一个标签页,大多数浏览器的默认行为是关闭浏览器。
| element.clear() |
| element.quit() |
# 五、截图
# 1、指定元素截图
| img = driver.find_element_by_id('image1') |
| img.screenshot('./code.png') |
# 2、网页全屏截图
| driver.save_screenshot('./full_snap.png') |
# 六、模拟鼠标 / 键盘操作
# 6.1 输入内容
模拟键盘输入操作
| element.send_keys("some text") |
还可以通过”Keys” 类来模式输入方向键
| element.send_keys(" and some", Keys.ARROW_DOWN) |
1、回车键:Keys.ENTER
2、删除键:Keys.BACK_SPACE
3、空格键:Keys.SPACE
4、制表键:Keys.TAB
5、回退键:Keys.ESCAPE
6、刷新键:Keys.F5
7、删除输入框内结尾的单个字符:Keys.BACK_SPACE
# Keys.LEFT 光标左移
dr.find_element_by_id("kw").send_keys(Keys.LEFT * 3)
# shift + right 右移光标选中
dr.find_element_by_id("kw").send_keys(Keys.SHIFT, Keys.RIGHT * 3)
常用组合键操作 Ctrl+__
| 1、全选:send_keys(Keys.CONTROL,'a') |
| 2、复制:send_keys(Keys.CONTROL,'c') |
| 3、粘贴:send_keys(Keys.CONTROL,'v') |
| 4、剪切:send_keys(Keys.CONTROL,'x') |
使用 ActionChains 类可以达到相同的效果
| driver.find_element_by_id("passwd-id") |
| |
| action = ActionChains(driver) |
| |
| action.key_down(Keys.CONTROL).send_keys('a').key_up(Keys.CONTROL).perform() |
| |
| action.key_down(Keys.CONTROL).send_keys('c').key_up(Keys.CONTROL).perform() |
| |
| action.key_down(Keys.CONTROL).send_keys('v').key_up(Keys.CONTROL).perform() |
# 6.2 多选框选择操作
| from selenium.webdriver.support.ui import Select |
| select = Select(driver.find_element_by_name('name')) |
| select.select_by_index(index) |
| select.select_by_visible_text("text") |
| select.select_by_value(value) |
# 6.2.1 取消已经选择的一些元素
| select = Select(driver.find_element_by_id('id')) |
| select.deselect_all() |
# 6.2.2 列出所有已经选择的选项
| select = Select(driver.find_element_by_xpath("xpath")) |
| all_selected_options = select.all_selected_options |
# 6.2.3 获得所以选项
# 6.3 拖放
可以使用拖放,无论是移动一个元素,或放到另一个元素内:
| element = driver.find_element_by_name("source") |
| target = driver.find_element_by_name("target") |
| |
| from selenium.webdriver import ActionChains |
| action_chains = ActionChains(driver) |
| action_chains.drag_and_drop(element, target).perform() |
# 6.4 页面滚动
# 6.4.1 滚动到页面底部
| driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") |
# 6.4.2 滚动到指定为止
| driver.execute_script("document.documentElement.scrollTop = 11245") |
# 6.4.3 循环滚动到指定为止
| for i in range(1, 10767, 10): |
| js_code = f'document.documentElement.scrollTop = {i}' |
| driver.execute_script(js_code) |
# 6.5 鼠标悬浮
| from selenium.webdriver.common.action_chains import ActionChains |
| |
| app_top = driver.find_element_by_id('AppTop_currentli') |
| |
| my_action.move_to_element(app_top).perform() |
# 6.6 鼠标点击事件
| from selenium.webdriver.common.action_chains import ActionChains |
# 6.6.1 右击
| rc = driver.find_element_by_id("rightclickid") |
| ActionChains(driver).context_click(rc).perform() |
# 6.6.2 双击
| rc = driver.find_element_by_id("rightclickid") |
| ActionChains(driver).double_click(rc).perform() |
# 6.6.3 移动
| |
| element = driver.find_element_by_name("source") |
| |
| target = driver.find_element_by_name("target") |
| |
| ActionChains(driver).drag_and_drop(element, target).perform() |
# 6.7 模拟鼠标滚动
# 6.7.1 滚动到页面底部
| js = "window.scrollTo(0, document.body.scrollHeight)" |
| driver.execute_script(js) |
# 6.7.2 滚动到页面顶部
| js = "window.scrollTo(0, 0)" |
| driver.execute_script(js) |
# 6.7.3 指定滚动距离
| js = "document.documentElement.scrollTop=7000" |
| driver.execute_script(js) |
# 七、运行 js 代码
# 7.1 获取浏览器中某个变量的值
| briver.execute_script("return 变量名") |
# 7.2 获取网页中的图片
获取 canvas 标签生成的图片,
| img_bs64 = briver.execute_script("return document.getElementsByClassName('图片元素的class')[0].toDataURL('image/png')") |
| img_base = img_bs64.split(',')[1] |
| img_base = base64.b64decode(img_base) |
| with open('bg.png', 'wb') as f: |
| f.write(img_base) |