sellenium(py) でページを画像も含めて取得するサンプル


よくわからないのでChatGPTさんに書いてもらったのをメモ(動作確認してません)

シンタックスハイライトされたのをそのまま貼れると便利だけど(gist 使えば良いのでは)

from selenium import webdriver
import time

# specify the URL to be acquired
url = "https://www.example.com"

# specify the interval (in seconds) at which to acquire the page
interval = 60

# create a new Chrome web driver
driver = webdriver.Chrome()

while True:
# acquire the page
driver.get(url)

# get the page source
page_source = driver.page_source

# save the page source to a file
with open("page.html", "w") as file:
file.write(page_source)

# download all images on the page
images = driver.find_elements_by_tag_name("img")
for image in images:
image_url = image.get_attribute("src")
if image_url.startswith("http"):
# use the 'urllib' package to download the image
urllib.request.urlretrieve(image_url, "images/" + image_url.split("/")[-1])

# wait for the specified interval
time.sleep(interval)

# close the web driver
driver.close()