웹 크롤링을 할 때 로그인이 필요한 경우가 있는데요.
로그인만 필요하고 속도상의 문제등으로 실제 크롤링은 HttpClient 등으로 수행할 때 가능한 방법입니다.
1. 라이브러리
implementation group: 'org.apache.httpcomponents', name: 'fluent-hc', version: '4.5.13'
implementation group: 'org.seleniumhq.selenium', name: 'selenium-java', version: '4.0.0-beta-3'
2. 샘플
public void run()
{
// 본인 PC에 설치 된 크롬 버전과 동일해야 함
System.setProperty("webdriver.chrome.driver", "C:\\Users\\ddss6565\\Desktop\\eclipse-jee-2020-12-R-win32-x86_64\\workspace\\cookie\\src\\main\\resources\\chromedriver.exe");
ChromeOptions options = new ChromeOptions();
options.addArguments("headless");
WebDriver webDriver = new ChromeDriver(options);
webDriver.manage().timeouts().implicitlyWait(Duration.ofSeconds(30));
webDriver.get("https://www.naver.com");
webDriver.findElement(By.id("username")).sendKeys("username");
webDriver.findElement(By.id("password")).sendKeys("password!");
webDriver.findElement(By.id("login")).click();
Set<Cookie> cookies = webDriver.manage().getCookies();
webDriver.quit();
CookieStore cookieStore = convertBrowserCookie(cookies);
Executor executor = Executor.newInstance();
System.out.println(executor.use(cookieStore).execute(Request.Get("https://www.naver.com")).returnContent().asString(Charset.forName("UTF-8")));
}
public CookieStore convertBrowserCookie(Set<Cookie> browserCookies)
{
CookieStore cookieStore = new BasicCookieStore();
for(Cookie browserCookie : browserCookies)
{
BasicClientCookie basicClientCookie = new BasicClientCookie(browserCookie.getName(), browserCookie.getValue());
basicClientCookie.setDomain(browserCookie.getDomain());
basicClientCookie.setAttribute(BasicClientCookie.DOMAIN_ATTR, browserCookie.getDomain());
basicClientCookie.setSecure(browserCookie.isSecure());
basicClientCookie.setExpiryDate(browserCookie.getExpiry());
basicClientCookie.setPath(browserCookie.getPath());
cookieStore.addCookie(basicClientCookie);
}
return cookieStore;
}
반응형
댓글