sellenium(py) でページを画像も含めて取得するサンプル

よくわからないのでChatGPTさんに書いてもらったのをメモ(動作確認してません)

シンタックスハイライトされたのをそのまま貼れると便利だけど(gist 使えば良いのでは)


from selenium import webdriver

import time
# specify the URL to be acquired

url = "https://www.example.com"
# specify the interval (in seconds) at which to acquire the page

interval = 60
# create a new Chrome web driver

driver = webdriver.Chrome()
while True:

    # acquire the page

    driver.get(url)
    # get the page source

    page_source = driver.page_source
    # save the page source to a file

    with open("page.html", "w") as file:

        file.write(page_source)
    # download all images on the page

    images = driver.find_elements_by_tag_name("img")

    for image in images:

        image_url = image.get_attribute("src")

        if image_url.startswith("http"):

            # use the 'urllib' package to download the image

            urllib.request.urlretrieve(image_url, "images/" + image_url.split("/")[-1])
    # wait for the specified interval

    time.sleep(interval)
# close the web driver

driver.close()