로딩 중이에요... 🐣

[코담] 웹개발·실전 프로젝트·AI까지, 파이썬·장고의 모든것을 담아낸 강의와 개발 노트

04 요약정리 | ✅ 저자: 이유정(박사)

🔍 Jupyter를 사용하는 대표적인 이유

코드를 셀 단위로 실행 가능
- 오류 발생 지점을 빠르게 파악 가능
- 디버깅, 테스트, 로깅에 유리함
중간 상태 확인 용이
- print(), 변수 출력 등을 통해 실행 결과를 바로 확인 가능
데이터 분석 및 시각화에 강력
- Matplotlib, Pandas, Seaborn 등과 결합할 때 특히 유용
인터랙티브한 개발 흐름
- 반복 실행이나 변수 실험에 매우 적합

필수 패키지 설치

pip install selenium
pip install webdriver-manager
pip install beautifulsoup4
pip install pandas

kakaomap_scrap.py

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec


def get_data_from_kakaomap():
    try:
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service)

        # 웹사이트 열기
        driver.get("https://map.kakao.com/")
        wait = WebDriverWait(driver, 10)
        wait.until(ec.visibility_of_element_located((By.ID, "search.keyword.query")))

        # 검색창에 검색어 입력하기
        search_input = driver.find_element(By.ID, "search.keyword.query")
        # wait until the element is visible
        search_input.send_keys("강남구 카페")
        search_input.send_keys(Keys.ENTER)
        wait = WebDriverWait(driver, 10)

        wait.until(ec.element_to_be_clickable((By.ID, "info.search.place.more")))
        driver.execute_script(
            """
        var element = document.getElementById('dimmedLayer');
        if (element) {
            element.className = 'DimmedLayer HIDDEN';
        }
        """
        )

        sleep(1)
        show_more_btn = driver.find_element(By.ID, "info.search.place.more")
        show_more_btn.click()

        wait = WebDriverWait(driver, 10)
        wait.until(ec.visibility_of_element_located((By.ID, "info.search.page")))

        page_count = 0
        items = []

        while page_count <= 5:
            if page_count != 0 and page_count % 5 == 0:
                page_next_btn_id = "info.search.page.next"
                next_btn = driver.find_element(By.ID, page_next_btn_id)
                next_btn.click()
                wait = WebDriverWait(driver, 10)
                wait.until(
                    ec.visibility_of_element_located((By.ID, "info.search.place.list"))
                )

            page_count += 1
            page_num = page_count % 5 if page_count % 5 != 0 else 5
            page_btn_id = f"info.search.page.no{page_num}"
            next_btn = driver.find_element(By.ID, page_btn_id)
            next_btn.click()
            wait = WebDriverWait(driver, 10)
            wait.until(
                ec.visibility_of_element_located((By.ID, "info.search.place.list"))
            )

            place_list = driver.find_element(By.ID, "info.search.place.list")
            shop_list = place_list.get_attribute("innerHTML")

            get_items(shop_list, items)
            sleep(2)
        # 검색 결과 확인

        driver.quit()
        return items
    except Exception as e:
        print(e)
        raise e


def get_items(html: str, parsed_items: list):
    from bs4 import BeautifulSoup

    soup = BeautifulSoup(html, "html.parser")
    items = soup.select("li.PlaceItem.clickArea")
    for item in items:
        item_dict = {}
        item_dict["name"] = item.find("span", {"data-id": "screenOutName"}).text
        item_dict["score"] = item.find("em", {"data-id": "scoreNum"}).text
        item_dict["address"] = item.find("p", {"data-id": "address"}).text
        item_dict["hour"] = item.find("a", {"data-id": "periodTxt"}).text
        parsed_items.append(item_dict)

    return parsed_items


if __name__ == "__main__":
    items = get_data_from_kakaomap()
    print(items)

✅ kakaomap_scrap.py 파일을 python kakaomap_scrap.py 로 실행해도 정상적으로 데이터를 추출 할 수 있습니다.

✅ 그러나, 어디에서 오류가 발생하는지 어렵기 때문에 jupyter 를 설치해 실행 합니다.

2.주피터를 설치한 상태에서 디렉토리 생성후 실행과정

📜 kakaomap_scrap.py

# 1. 필요한 모듈 임포트
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
from bs4 import BeautifulSoup

# 2. 크롬 드라이버 실행
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# 3. 카카오맵 열기 및 검색창 대기
driver.get("https://map.kakao.com/")
wait = WebDriverWait(driver, 10)
wait.until(ec.visibility_of_element_located((By.ID, "search.keyword.query")))

# 4. 검색어 입력 및 엔터
search_input = driver.find_element(By.ID, "search.keyword.query")
search_input.send_keys("강남구 카페")
search_input.send_keys(Keys.ENTER)
wait = WebDriverWait(driver, 10)

# 5. 더보기 버튼 대기 후 클릭
wait.until(ec.element_to_be_clickable((By.ID, "info.search.place.more")))
driver.execute_script("""
    var element = document.getElementById('dimmedLayer');
    if (element) {
        element.className = 'DimmedLayer HIDDEN';
    }
""")
sleep(1)
show_more_btn = driver.find_element(By.ID, "info.search.place.more")
show_more_btn.click()

# 6. 페이지 로딩 대기
wait = WebDriverWait(driver, 10)
wait.until(ec.visibility_of_element_located((By.ID, "info.search.page")))

# 7. 페이지 반복 탐색 및 데이터 수집
page_count = 0
items = []

while page_count <= 5:
    if page_count != 0 and page_count % 5 == 0:
        next_btn = driver.find_element(By.ID, "info.search.page.next")
        next_btn.click()
        wait.until(ec.visibility_of_element_located((By.ID, "info.search.place.list")))

    page_count += 1
    page_num = page_count % 5 if page_count % 5 != 0 else 5
    page_btn = driver.find_element(By.ID, f"info.search.page.no{page_num}")
    page_btn.click()
    wait.until(ec.visibility_of_element_located((By.ID, "info.search.place.list")))

    place_list = driver.find_element(By.ID, "info.search.place.list")
    shop_list = place_list.get_attribute("innerHTML")

    # 8. 파싱 함수 호출
    def get_items(html: str, parsed_items: list):
        soup = BeautifulSoup(html, "html.parser")
        items = soup.select("li.PlaceItem.clickArea")
        for item in items:
            item_dict = {}
            item_dict["name"] = item.find("span", {"data-id": "screenOutName"}).text
            item_dict["score"] = item.find("em", {"data-id": "scoreNum"}).text
            item_dict["address"] = item.find("p", {"data-id": "address"}).text
            item_dict["hour"] = item.find("a", {"data-id": "periodTxt"}).text
            parsed_items.append(item_dict)
        return parsed_items

    get_items(shop_list, items)
    sleep(2)

# 9. 결과 확인 및 드라이버 종료
driver.quit()
print(items)

1. vscode jupyter kakaomap_scrap.py 파일 생성 후 전체 실행

1) kakaomap_scap 디렉토리 생성후 init.py 파일 생성

첫번째

2) kakaomap_scrap.py 파일 생성 및 kakaomap_scrap.ipynb 생성

두번째

3) jupyter 실행

import sys, os
sys.path.append(os.getcwd())

from kakaomap_scrap import get_data_from_kakaomap as get_data_v2

get_data_v2()

🖨️ 실행결과

실행결과

2. vscode jupyter 단계별 실행

임의 ipynb 파일을 생성한다.

그리고 이미지와 같이

코드를 단계별로 첨부 하면서 실행코드를 실행하면 오류를 체크하면서 결과를 볼 수 있다.

🖼️ jupyter 단계별 실행시 카카오맵 단계별 실행 결과를 확인 할수 있다

3. jupyter 웹 브라우저 실행

jupyter notebook --no-browser --port 8888

가상환경은 당연히 지도 데이터를 가져오는 필수 패키지 라이브러가 설치한 라이브러리여야 합니다.

http://127.0.0.1:8888/tree

브라우저 Jupter 에서 file -> New ->Notebook 으로 으로 임의 파일 실행

🖼️ jupyter 실행

← 이전: 03 요약정리

다음 →: 01 공공데이터 API

💡 AI 인사이트

댓글 커뮤니티

검색