- 프로젝트 만들고 main, about, movie\list cgv lotte megabox 페이지와 이들을 연결할 서블릿을 mainservlet, movieservlet으로 하도록 하자.
- selenium을 이용한 데이터를 가지고 오도록 하자.
- selenium을 이용한 크롤링을 해보겠다.
https://chromedriver.chromium.org/downloads
- create new crawlservlet.java
package controller;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
// path는 중복되면 안된다.
@WebServlet(value = { "/cgv.json", "/lotte.json", "/megabox.json" })
public class CrawlServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
PrintWriter out = response.getWriter();
response.setContentType("text/html;charset=UTF-8");
switch (request.getServletPath()) {
case "/cgv.json":
break;
case "/lotte.json":
break;
case "/maegabox.json":
break;
}
}
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
}
}
- 크롬 드라이버를 사용할 수 있게 셋팅해주겠다.
- 대량의 크롤링의 경우 selenium을 사용하지만 이외에는 어제의 jsoup으로 하자.
// path는 중복되면 안된다.
@WebServlet(value = {"/cgv.json", "/lotte.json", "/megabox.json"})
public class CrawlServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
PrintWriter out = response.getWriter();
System.setProperty("webdriver.chrome.driver","c:/data/web/chromedriver.exe" );
ChromeOptions options = new ChromeOptions();
WebDriver driver = new ChromeDriver(options);
response.setContentType("text/html;charset=UTF-8");
switch (request.getServletPath()) {
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
break;
case "/lotte.json":
break;
case "/megabox.json":
break;
}
}
- 이제 RUN하면 크롬드라이버를 통해 지정한 웹이 열린다.
- 더보기 버튼을 통해서 출력된 영화들을 가지고 오기 위해서 다음과 같이 하겠다.
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
WebElement more = driver.findElement(By.className("link-more"));
more.click();
break;
ㄴ 웹에서 더보기 버튼을 셀레늄을 통해서 직접 클릭하지 않아서 서버 run을 통해서 볼 수 있다.
- 이제 데이터를 데이터를 가지고 오는 작업을 하겠다.
switch (request.getServletPath()) {
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
WebElement more = driver.findElement(By.className("link-more"));
more.click();
List<WebElement> list = driver.findElement(By.className("sect-movie-chart"))
.findElements(By.tagName("li"));
for (WebElement e : list) {
WebElement wTitle = e.findElement(By.className("title"));
String title = wTitle.getText();
WebElement wImge = e.findElement(By.tagName("img"));
String image = wImge.getAttribute("src");
WebElement wPercent = e.findElement(By.className("percent"));
String percent = wPercent.getText();
WebElement wRelease = e.findElement(By.className("txt-info"))
.findElement(By.tagName("strong"));
String release = wRelease.getText();
System.out.println(title+" : "+image+" : "+percent+" : "+release);
}
break;
- 이제 json 데이터로 가지고오는 작업을 하겠다.
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
System.setProperty("webdriver.chrome.driver", "c:/data/web/chromedriver.exe");
ChromeOptions options = new ChromeOptions();
options.addArguments("headless");
WebDriver driver = new ChromeDriver(options);
switch (request.getServletPath()) {
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
WebElement more = driver.findElement(By.className("link-more"));
more.click();
List<WebElement> list = driver.findElement(By.className("sect-movie-chart"))
.findElements(By.tagName("li"));
JSONArray array = new JSONArray();
for (WebElement e : list) {
WebElement wTitle = e.findElement(By.className("title"));
String title = wTitle.getText();
WebElement wImge = e.findElement(By.tagName("img"));
String image = wImge.getAttribute("src");
WebElement wPercent = e.findElement(By.className("percent"))
.findElement(By.tagName("span"));
String percent = wPercent.getText();
WebElement wRelease = e.findElement(By.className("txt-info"))
.findElement(By.tagName("strong"));
String release = wRelease.getText();
JSONObject object = new JSONObject();
object.put("title", title);
object.put("image", image);
object.put("percent", percent);
object.put("release", release);
array.add(object);
}
out.println(array);
break;
- 매번 셀레늄을 이용해서 실시간 데이터를 불러오기에는 컴퓨터 사양이 좋지 못해 DB 테이블에 넣어주도록 하겠다.
- go to mysql
-- 2021.08.27
create table movies(
id int auto_increment primary key,
type nvarchar(20),
wdate nchar(10),
title nvarchar(500),
percent nvarchar(100),
rinfo nvarchar(100),
image nvarchar(1000)
);
desc movies;
- model\ database.java 파일을 복사해서 넣는다.
- create new MovieVO.java
package model;
public class MovieVO {
private int id;
private String type;
private String wdate;
private String title;
private String percent;
private String rinfo;
private String image;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getWdate() {
return wdate;
}
public void setWdate(String wdate) {
this.wdate = wdate;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPercent() {
return percent;
}
public void setPercent(String percent) {
this.percent = percent;
}
public String getRinfo() {
return rinfo;
}
public void setRinfo(String rinfo) {
this.rinfo = rinfo;
}
public String getImage() {
return image;
}
public void setImage(String image) {
this.image = image;
}
@Override
public String toString() {
return "MovieVO [id=" + id + ", type=" + type + ", wdate=" + wdate + ", title=" + title + ", percent=" + percent
+ ", rinfo=" + rinfo + ", image=" + image + "]";
}
}
- create new MovieDAO.java (class)
- 영화등록 메서드를 만들겠다.
package model;
import java.sql.PreparedStatement;
public class MovieDAO {
// 1. 영화등록
public void insert(MovieVO vo) {
try {
String sql = "insert into movies(title,type,image,rinfo,percent,wdate) values(?,?,?,?,?,?)";
PreparedStatement ps = Database.CON.prepareStatement(sql);
ps.setString(1, vo.getTitle());
ps.setString(2, vo.getType());
ps.setString(3, vo.getImage());
ps.setString(4, vo.getRinfo());
ps.setString(5, vo.getPercent());
ps.setString(6, vo.getWdate());
ps.execute();
} catch (Exception e) {
System.out.println("insert : " + e.toString());
}
}
}
- 크롤링한 데이터를 넣는 작업을 하겠다.
- crawlservlet.java
@WebServlet(value = {"/cgv.json", "/lotte.json", "/megabox.json"})
public class CrawlServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
MovieDAO dao = new MovieDAO();
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
System.setProperty("webdriver.chrome.driver", "c:/data/web/chromedriver.exe");
ChromeOptions options = new ChromeOptions();
options.addArguments("headless");
WebDriver driver = new ChromeDriver(options);
switch (request.getServletPath()) {
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
WebElement more = driver.findElement(By.className("link-more"));
more.click();
List<WebElement> list = driver.findElement(By.className("sect-movie-chart"))
.findElements(By.tagName("li"));
JSONArray array = new JSONArray();
for (WebElement e : list) {
WebElement wTitle = e.findElement(By.className("title"));
String title = wTitle.getText();
WebElement wImge = e.findElement(By.tagName("img"));
String image = wImge.getAttribute("src");
WebElement wPercent = e.findElement(By.className("percent"))
.findElement(By.tagName("span"));
String percent = wPercent.getText();
WebElement wRelease = e.findElement(By.className("txt-info"))
.findElement(By.tagName("strong"));
String release = wRelease.getText();
JSONObject object = new JSONObject();
object.put("title", title);
object.put("image", image);
object.put("percent", percent);
object.put("release", release);
array.add(object);
MovieVO vo = new MovieVO();
vo.setTitle(title);
vo.setImage(image);
vo.setPercent(percent);
vo.setRinfo(release);
vo.setType("CGV");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
vo.setWdate(sdf.format(new Date()));
dao.insert(vo);
}
out.println(array);
break;
- 이제 해당 서블릿을 실행하면 DB에서 입력된 데이터를 확인할 수 있다.
- 메가박스, 롯데시네마도 크롤링 해보겠다.
package controller;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import model.MovieDAO;
import model.MovieVO;
// path는 중복되면 안된다.
@WebServlet(value = { "/lotte.json", "/megabox.json", "/cgv.json" })
public class CrawlServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
MovieDAO dao = new MovieDAO();
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
System.setProperty("webdriver.chrome.driver", "c:/data/web/chromedriver.exe");
ChromeOptions options = new ChromeOptions();
// options.addArguments("headless");
WebDriver driver = new ChromeDriver(options);
switch (request.getServletPath()) {
case "/lotte.json":
driver.get("https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1");
WebElement lmore = driver.findElement(By.cssSelector(".btn_txt_more")).findElement(By.tagName("span"));
lmore.click();
// 1초 대기
try {
Thread.sleep(1000);
} catch (Exception e) {
e.printStackTrace();
}
List<WebElement> lList = driver.findElements(By.className("screen_add_box"));
// JSONArray lArray = new JSONArray();
for (WebElement e : lList) {
try {
WebElement wTitle = e.findElement(By.className("tit_info"));
String title = wTitle.getText();
WebElement wImage = e.findElement(By.tagName("img"));
String image = wImage.getAttribute("src");
WebElement wPercent = e.findElement(By.className("rate_info"))
.findElement(By.tagName("em"));
String percent = wPercent.getText();
System.out.println(title+":"+image+":"+percent);
MovieVO vo = new MovieVO();
vo.setTitle(title);
vo.setImage(image);
vo.setPercent(percent);
vo.setType("LotteCinema");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
vo.setWdate(sdf.format(new Date()));
vo.setRinfo(sdf.format(new Date()));
dao.insert(vo);
} catch (Exception error) {
System.out.println(error.toString());
}
}
break;
case "/megabox.json":
driver.get("https://www.megabox.co.kr/movie");
List<WebElement> mList = driver.findElement(By.id("movieList")).findElements(By.tagName("li"));
// JSONArray mArray = new JSONArray();
for (WebElement e : mList) {
WebElement wTitle = e.findElement(By.className("tit-area")).findElement(By.className("tit"));
String title = wTitle.getText();
WebElement wImage = e.findElement(By.tagName("img"));
String image = wImage.getAttribute("src");
WebElement wPercent = e.findElement(By.className("rate-date")).findElement(By.className("rate"));
String percent = wPercent.getText();
WebElement wInfo = e.findElement(By.className("rate-date")).findElement(By.className("date"));
String release = wInfo.getText();
percent = percent.substring(4);
System.out.println(title + ":" + image + ":" + percent + ":" + release);
MovieVO vo = new MovieVO();
vo.setTitle(title);
vo.setImage(image);
vo.setPercent(percent);
vo.setRinfo(release);
vo.setType("MEGABOX");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
vo.setWdate(sdf.format(new Date()));
dao.insert(vo);
}
break;
case "/cgv.json":
driver.get("http://www.cgv.co.kr/movies/");
WebElement more = driver.findElement(By.className("link-more"));
more.click();
List<WebElement> list = driver.findElement(By.className("sect-movie-chart")).findElements(By.tagName("li"));
JSONArray array = new JSONArray();
for (WebElement e : list) {
WebElement wTitle = e.findElement(By.className("title"));
String title = wTitle.getText();
WebElement wImge = e.findElement(By.tagName("img"));
String image = wImge.getAttribute("src");
WebElement wPercent = e.findElement(By.className("percent")).findElement(By.tagName("span"));
String percent = wPercent.getText();
WebElement wRelease = e.findElement(By.className("txt-info")).findElement(By.tagName("strong"));
String release = wRelease.getText();
JSONObject object = new JSONObject();
object.put("title", title);
object.put("image", image);
object.put("percent", percent);
object.put("release", release);
array.add(object);
MovieVO vo = new MovieVO();
vo.setTitle(title);
vo.setImage(image);
vo.setPercent(percent);
vo.setRinfo(release);
vo.setType("CGV");
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
vo.setWdate(sdf.format(new Date()));
dao.insert(vo);
}
out.println(array);
break;
}
}
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
}
}
- 새로운 프로젝트를 만들어 ex09에서 사용하였던 각 필요요소들을 가지고 오고 dao를 통해 db에서 데이터를 가지고 오는 메서드를 만들자.
- MovieDAO.java
// 2. 영화목록 메서드
public JSONObject list(String type) {
JSONObject object = new JSONObject();
try {
String sql = "select * from movies where type=?";
PreparedStatement ps = Database.CON.prepareStatement(sql);
ps.setString(1, type);
ResultSet rs = ps.executeQuery();
JSONArray array = new JSONArray();
while (rs.next()) {
JSONObject obj = new JSONObject();
obj.put("title", rs.getString("title"));
obj.put("wdate", rs.getString("wdate"));
obj.put("percent", rs.getString("percent"));
obj.put("rinfo", rs.getString("rinfo"));
obj.put("image", rs.getString("image"));
array.add(obj);
}
object.put("array", array);
} catch (Exception e) {
System.out.println("list : " + e.toString());
}
return object;
}
- MovieServlet.java
package controller;
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import model.MovieDAO;
@WebServlet(value = { "/movie/list", "/movie/cgv", "/movie/lotte", "/movie/megabox", "/movie/list.json" })
public class MovieServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
MovieDAO dao = new MovieDAO();
protected void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
switch (request.getServletPath()) {
case "/movie/list.json":
out.println(dao.list(request.getParameter("type")));
break;
case "/movie/list":
request.setAttribute("page", "/movie/list.jsp");
RequestDispatcher dis = request.getRequestDispatcher("/main.jsp");
dis.forward(request, response);
break;
case "/movie/cgv":
request.setAttribute("page", "/movie/cgv.jsp");
dis = request.getRequestDispatcher("/main.jsp");
dis.forward(request, response);
break;
case "/movie/lotte":
request.setAttribute("page", "/movie/lotte.jsp");
dis = request.getRequestDispatcher("/main.jsp");
dis.forward(request, response);
break;
case "/movie/megabox":
request.setAttribute("page", "/movie/megabox.jsp");
dis = request.getRequestDispatcher("/main.jsp");
dis.forward(request, response);
break;
}
}
protected void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
}
}
- create new cgv.jsp, lotte.jsp, legabox.jsp
- 틀은 같고 ajax의 type 변수값만 변경하면 된다.
<%@ page language="java" contentType="text/html; charset=UTF-8"
pageEncoding="UTF-8"%>
<style>
#movies {
overflow: hidden;
margin: 0px auto;
background: blue;
text-align: center;
padding-top: 20px;
background-color: black;
}
.item {
float: left;
width: 200px;
background: red;
width: 200px;
margin-bottom: 30px;
margin-left: 15px;
padding: 3px;
height: 330px;
background: white;
}
img {
width: 190px;
}
.title {
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
</style>
<h1>
<img style="text-align: center; width: 300px;"
src="https://w.namu.la/s/ceb7b90e35425e24ef31d720c6b1eb518200d123ab852acf1ef19ba0119f8b237e2301be9e89da2e4a834aa686d10b925ad475e5710b1d0d3e1409439b9b1e02f0534d8cb4ec9ac693c0ca32b4a9ec35e1703f8d43c98a21e4378eaa5fbc036c" />
</h1>
<div id="movies"></div>
<script id="temp" type="text/x-handlebars-template">
{{#each array}}
<div class="item">
<img src="{{image}}"/>
<div class="info">
<div class="title">제목 : {{title}}</div>
<div class="rinfo">개봉일 : {{rinfo}}</div>
<div class="percent">예매율 : {{percent}}</div>
</div>
</div>
{{/each}}
</script>
<script>
getList();
function getList() {
var type = 'CGV';
// type 만 바꿔주면 된다.
$.ajax({
type : 'get',
url : '/movie/list.json',
dataType : 'json',
data : {
"type" : type
},
success : function(data) {
var temp = Handlebars.compile($('#temp').html());
$('#movies').html(temp(data));
}
})
}
</script>
- 셀레늄 라이브러리를 제외한 프로젝트 폴더이니 확인바란다.
ex09.zip
1.37MB
ex10.zip
0.69MB
'ICIA 수업일지' 카테고리의 다른 글
2021.08.31 수업일지(Spring, Servlet, MVC, Mysql, PL/SQL, JAVA) (0) | 2021.08.31 |
---|---|
2021.08.30 수업일지(Servlet, Spring, Mysql) (0) | 2021.08.30 |
2021.08.26 수업일지(Servlet, Spring, Mysql) (0) | 2021.08.26 |
2021.08.25 수업일지(Servlet, Spring, Mysql) (0) | 2021.08.25 |
2021.08.24 수업일지(Servlet, Mysql, Spring) (0) | 2021.08.24 |