`

抓取篮球球员图片-利用xpath匹配xml

    博客分类:
  • java
 
阅读更多
package com.hym.io;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.Node;

public class GetNewBKPlayerPhoto {
	private final static String PLAYER_BET_ID_PATH = "E:\\work\\2014\\basketball\\20140116_photo\\PlayerBetId2.txt";
	private final static String PLAYER_XML_PATH = "E:\\work\\2014\\basketball\\20140116_photo\\LqPlayer_xml.xml";
	private final static String PLAYER_NO_PHOTO_PATH = "E:\\work\\2014\\basketball\\20140116_photo\\noPhoto3.txt";
	private final static String localPath = "E:\\work\\2014\\basketball\\20140116_photo\\bkPlayerPhoto3";

	public static void main(String[] args) {
		System.out.println("downloaded begin ...... ");
		GetNewBKPlayerPhoto getPhoto = new GetNewBKPlayerPhoto();
		List<String> noPhotoList = getPhoto.downLoadPhoto();
		try {
			FileUtils.writeLines(new File(PLAYER_NO_PHOTO_PATH), noPhotoList);
		} catch (IOException e) {
			e.printStackTrace();
		}
		System.out.println("downloaded end ...... ");
	}

	public List<String> getNewPlayerBetId() {
		try {
			//File file = new File(PLAYER_BET_ID_PATH);
			//List<String> betIds = FileUtils.readLines(file);
			List<String> betIds = new ArrayList<String>();
			for (int i = 3791; i<= 4013; i++) {
				betIds.add(String.valueOf(i));
			}
			return betIds;
		} catch (Exception e) {
			e.printStackTrace();
		}

		return null;
	}

	public Map<String, String> getBetIdPhotoMap() {
		List<String> playerBetIdList = getNewPlayerBetId();
		Map<String, String> betIdPhotoMap = new HashMap<String, String>();
		String xml;
		try {
			xml = FileUtils
					.readFileToString(new File(PLAYER_XML_PATH), "utf-8");
			Element root = getRoot(xml);
			for (String betId : playerBetIdList) {
				String photo = getNodeAttribute(root, "i", "Photo", "id", betId);
				betIdPhotoMap.put(betId, photo);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return betIdPhotoMap;
	}

	public List<String> downLoadPhoto() {
		List<String> noPhotoList = new ArrayList<String>();
		Map<String, String> betIdPhotoMap = getBetIdPhotoMap();
		if (betIdPhotoMap != null && !betIdPhotoMap.isEmpty()) {
			Set<String> keySet = betIdPhotoMap.keySet();
			Iterator<String> iter = keySet.iterator();
			while (iter.hasNext()) {
				String betId = iter.next();
				String photo = betIdPhotoMap.get(betId);
				if (StringUtils.isEmpty(photo) || photo.indexOf("no") > -1) {
					noPhotoList.add(betId);
					continue;
				}
				downBKPlayerPhoto(betId, photo);
				System.out.println("downloaded betId = " + betId + ", photo = " + photo);
			}
		}
		return noPhotoList;
	}

	private Element getRoot(String xml) {
		try {
			Document doc = DocumentHelper.parseText(xml);
			Element root = doc.getRootElement();
			return root;
		} catch (DocumentException e) {
			e.printStackTrace();
		}
		return null;
	}

	public String getNodeAttribute(Element root, String nodeName,
			String attrName, String key, String value) {
		List<Node> nodes = root.selectNodes("//" + nodeName + "[" + key + "='"
				+ value + "']");
		if (nodes == null || nodes.isEmpty()) {
			return null;
		}
		return nodes.get(0).valueOf(attrName);
	}

	public void downBKPlayerPhoto(String betId, String photo) {
		String webSite = "http://nba.xxx.com";
		// 一个一个下载照片,并写入本地
		String suffix = photo.substring(photo.lastIndexOf("."));
		URL url;
		try {
			url = new URL(webSite + photo);
			File file = new File(localPath + "\\" + betId + suffix);

			FileUtils.copyURLToFile(url, file);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics