万方数据检索 java自动检索

Shaka 5月前 ⋅ 417 阅读

 

万方数据检索核心代码


public String getWanFangInfo(HttpServletRequest request,String corp, String tradeName) throws UnsupportedEncodingException {
Date startTime = new Date();
JSONObject root = new JSONObject();
String message = "获取失败!";

//今天周几
Calendar search = Calendar.getInstance();
String endSearchYear = String.valueOf(search.get(Calendar.YEAR));
search.set(Calendar.DATE,-7);
String beginSearchYear = String.valueOf(search.get(Calendar.YEAR));

//获取公司所有筛选条件
List filterList;
if (StringUtils.isNotBlank(tradeName)) {
filterList = filterTermService.getListByTradeName(tradeName,corp);
} else {
filterList = filterTermService.getNormalList(corp);
}
logger.info("筛选条件:" + filterList.size() + "个");
if (filterList.isEmpty()) {
message = "未获取到检索条件!";
}

//每个筛选条件查询一次
for (TrFilterTerm filterTerm : filterList) {
StringBuffer sb = new StringBuffer();
List list = new ArrayList();

//拼接搜索字符串
String data = fillWanFangData(sb, filterTerm, beginSearchYear, endSearchYear, 0);
//获取总页数
try {
String url = "http://g.wanfangdata.com.cn/searchResult/getCoreSearch.do?" + data;
//请求网址
logger.info("请求网址:" + url);
Document res = Jsoup.connect(url).timeout(120000).get();
String str = res.body().text().replaceAll("%(?![0-9a-fA-F]{2})", "%25");//.replaceAll("\\+", "%2B");
String encode = URLDecoder.decode(str, "utf-8");
JSONObject json = JSONObject.fromObject(encode);

//总页数
if(json!=null){
Integer total = Integer.parseInt(json.get("pageTotal") != null ? json.get("pageTotal").toString() : "0");
logger.info("总页数:" + total + "页,总条数:" + (json.get("pageTotalRows")!=null?json.get("pageTotalRows").toString():"0") + "通用名:" + filterTerm.getGeneralName() + "-商品名:" + filterTerm.getTradeName());
if (total == 0) {
if(filterList.size()==1){
message = filterTerm.getTradeName()+"-"+filterTerm.getGeneralName()+"条件未检索到结果";
}
continue;
}
for (int i = 1; i <= total; i++) {
logger.info("进入第" + i + "页...");
JSONArray jsonArray;
if (i == 1) {
jsonArray = JSONArray.fromObject(json.get("pageRow"));
} else {
sb = new StringBuffer();
data = fillWanFangData(sb, filterTerm, beginSearchYear, endSearchYear, i);
url = "http://g.wanfangdata.com.cn/searchResult/getCoreSearch.do?" + data;
logger.info("拼接网址:" + url);
res = Jsoup.connect(url).timeout(120000).get();
str = res.body().text().replaceAll("%(?![0-9a-fA-F]{2})", "%25");
encode = URLDecoder.decode(str, "utf-8");
json = JSONObject.fromObject(encode);
jsonArray = JSONArray.fromObject(json.get("pageRow"));
}
//循环保存数据
int j = 1;
for (Object jsonList : jsonArray) {
JSONObject rowData = JSONObject.fromObject(jsonList);
String title = rowData.get("title") != null ? rowData.get("title").toString().replaceAll("\\&[a-zA-Z]{0,9};", "").replaceAll("<[^>]*>", "") : "";
String abstractText = rowData.get("summary") != null ? rowData.get("summary").toString().replaceAll("\\&[a-zA-Z]{0,9};", "").replaceAll("<[^>]*>", "") : "";
String journal = rowData.get("perio_title") != null ? rowData.get("perio_title").toString() : "";
String englishJournal = rowData.get("perio_title_en") != null ? rowData.get("perio_title_en").toString() : "";
String year = rowData.get("publish_year") != null &&!rowData.get("publish_year").equals("null") ? rowData.get("publish_year").toString() : "";
String authors_unit = rowData.get("authors_unit") != null ? rowData.get("authors_unit").toString() : "";
String vol = rowData.get("issue_num") != null &&!rowData.get("issue_num").equals("null")? rowData.get("issue_num").toString() : "";
String keyWord = rowData.get("keywords") != null ? rowData.get("keywords").toString().replaceAll("\\&[a-zA-Z]{0,9};", "").replaceAll("<[^>]*>", "") : "";
String page = rowData.get("page_cnt") != null ? rowData.get("page_cnt").toString() : "";
String page_range = rowData.get("page_range") != null ? rowData.get("page_range").toString() : "";

Long ja = 0l;
Calendar calendar = Calendar.getInstance();
if(rowData.get("abst_webdate")!=null){
ja = Long.parseLong(JSONObject.fromObject(rowData.get("abst_webdate")).get("time").toString());
}
calendar.setTime(new Date(ja));
Date pubDate = calendar.getTime();

String id = rowData.get("article_id").toString();
String linkUrl = "http://g.wanfangdata.com.cn/details/detail.do?_type=";
String author = rowData.get("authors_name") != null ? rowData.get("authors_name").toString() : "";
String typeId = rowData.get("id").toString();

String type ="perio";
if(typeId.contains("degree")){
type="degree";
}else if(typeId.contains("conf")){
type="conf";
}
//通过连接检测重复
if(id!=null){
String saveUrl = linkUrl+type+"&id="+id;
TrPeriodical per = periodicalService.getByLink(saveUrl, corp, tradeName);
if (per == null) {
per = new TrPeriodical();
per.setLinkUrl(saveUrl);
per.setTitle(title);
per.setAbstractText(abstractText.length() > 1300 ? abstractText.substring(0, 1250) + "已截断" : abstractText);
per.setAuthor(author);
per.setAuthorsUnit(authors_unit);
per.setJournal(journal);
per.setEnglishJournal(englishJournal);
per.setYear(year);
per.setVol(vol);
per.setPubDate(pubDate);
per.setDrugName(filterTerm.getTradeName());
per.setGeneralName(filterTerm.getGeneralName());
per.setPage(page + (StringUtils.isNotBlank(page_range) ? "," + page_range : ""));
per.setKeyWord(keyWord);
per.setSearchCompany(filterTerm.getCorp());
per.setCreated(new Date());
per.setCreatedBy(getUser(request).getUserId());
per.setSourceWeb("万方");
per.setCreated(new Date());
list.add(per);
} else {
logger.info("重复记录跳过," +"第"+i+"页第"+j+"条数据,文件名为:"+title);
}
}
j++;
}
message = "保存成功";
}
}
} catch (IOException e) {
e.printStackTrace();
}
//每一种药保存一次
logger.info("保存药品名:[" + filterTerm.getTradeName() + "]的检索结果."+list.size() + "条");
periodicalService.save(list);
}
root.put("message", message);
Date endTime = new Date();
Long tt = endTime.getTime() - startTime.getTime();
logger.info("用时:" + tt / 1000 / 60 + "分," + tt / 1000 % 60 + "秒");
return root.toString();
}

//拼接万方请求参数
private String fillWanFangData(StringBuffer sb, TrFilterTerm tt, String startDate, String endDate, Integer pageNum) throws UnsupportedEncodingException {
String enTradeName = "", generalName = "";
if (StringUtils.isNotBlank(tt.getTradeName())) {
enTradeName = URLEncoder.encode(tt.getTradeName(), "utf-8");
}
if (StringUtils.isNotBlank(tt.getGeneralName())) {
generalName = URLEncoder.encode(tt.getGeneralName(), "utf-8");
}
sb.append("paramStrs=");
String[] fields;
if (tt.getField().contains(",")) {
fields = tt.getField().split(",");
} else {
fields = tt.getField().split(",");
}
int i = 0;
for (String field : fields) {
String field1 = URLEncoder.encode(field, "utf-8");
if (i != 0) {
sb.append("%2B");
}
sb.append(field1);
sb.append("%3A(");
if (StringUtils.isNotBlank(enTradeName)) {
sb.append("%22");
sb.append(enTradeName);
sb.append("%22");
if (StringUtils.isNotBlank(generalName)) {
sb.append("%2B");
}
}
if (StringUtils.isNotBlank(generalName)) {
sb.append("%22");
sb.append(generalName);
sb.append("%22");
}
sb.append(")");
i++;
}
sb.append("&startDate=");
sb.append(startDate);
sb.append("&endDate=");
sb.append(endDate);
sb.append("&classType=perio-perio_artical%2Cdegree-degree_artical%2Cconference-conf_artical");
sb.append("&pageNum=");
sb.append(pageNum);
sb.append("&pageSize=50");
sb.append("&isSearchSecond=false");
logger.info("拼接参数:" + sb.toString());
return sb.toString();
}

@ResponseBody
@RequestMapping(name = "获取客户公司", value = "/getCorp", method = RequestMethod.GET, produces = "application/json;charset=UTF-8")
public String getInfo() {
JSONArray json = new JSONArray();
List list = filterTermService.getCorp();
for (String aList : list) {
json.add(aList);
}
return json.toString();
}

注意:本文归作者所有,未经作者允许,不得转载

全部评论: 0

    我有话说: