sellenium(py) でページを画像も含めて取得するサンプル
よくわからないのでChatGPTさんに書いてもらったのをメモ(動作確認してません)
シンタックスハイライトされたのをそのまま貼れると便利だけど(gist 使えば良いのでは)
from selenium import webdriver
import time# specify the URL to be acquired
url = "https://www.example.com"# specify the interval (in seconds) at which to acquire the page
interval = 60# create a new Chrome web driver
driver = webdriver.Chrome()while True:
# acquire the page
driver.get(url)# get the page source
page_source = driver.page_source# save the page source to a file
with open("page.html", "w") as file:
file.write(page_source)# download all images on the page
images = driver.find_elements_by_tag_name("img")
for image in images:
image_url = image.get_attribute("src")
if image_url.startswith("http"):
# use the 'urllib' package to download the image
urllib.request.urlretrieve(image_url, "images/" + image_url.split("/")[-1])# wait for the specified interval
time.sleep(interval)# close the web driver
driver.close()