package com.javaweb.spider; import com.javaweb.common.utils.StringUtils; import com.javaweb.spider.domain.ExitWayEnum; import com.javaweb.spider.domain.SpiderConfig; import com.javaweb.spider.domain.SpiderException; import us.codecraft.webmagic.Request; import us.codecraft.webmagic.Spider; import us.codecraft.webmagic.processor.PageProcessor; import java.util.concurrent.ConcurrentHashMap; /** * 爬虫入口 */ public class MyConfigurableSpider extends Spider { /** * 用来保存正在运行的所有Spider,key要求唯一,一般为用户ID,需要调用方生成 */ public static final ConcurrentHashMap SPIDER_BUCKET = new ConcurrentHashMap<>(); private SpiderConfig config; /** * 唯一的key,一般为用户ID,需要调用方生成 */ private String missionId; private volatile long startTime = 0L; private MyConfigurableSpider(PageProcessor pageProcessor, SpiderConfig config, String missionId) { super(pageProcessor); this.config = config; this.missionId = missionId; SPIDER_BUCKET.put(missionId, this); } public static MyConfigurableSpider create(PageProcessor pageProcessor, SpiderConfig config, String missionId) { return new MyConfigurableSpider(pageProcessor, config, missionId); } public static MyConfigurableSpider getSpider(String missionId) { if (StringUtils.isEmpty(missionId)) { throw new SpiderException("missionId:[" + missionId + "]为空,请指定missionId"); } MyConfigurableSpider spider = SPIDER_BUCKET.get(missionId); if (null == spider) { throw new SpiderException("当前没有正在运行的爬虫!missionId:[" + missionId + "]"); } return spider; } @Override protected void onSuccess(Request request) { super.onSuccess(request); if (this.getStatus() == Status.Running && ExitWayEnum.DURATION.toString().equals(config.getExitWay())) { if (startTime < System.currentTimeMillis()) { this.stop(); } } } @Override public void run() { if (ExitWayEnum.DURATION.toString().equals(config.getExitWay())) { startTime = System.currentTimeMillis() + config.getCount() * 1000; } super.run(); } @Override protected void onError(Request request) { super.onError(request); } @Override public void close() { super.close(); SPIDER_BUCKET.remove(this.missionId); } @Override public void stop() { Status status = this.getStatus(); if (status.equals(Status.Running)) { super.stop(); SPIDER_BUCKET.remove(this.missionId); } else if (status.equals(Status.Init)) { throw new SpiderException("爬虫正在初始化!missionId:[" + this.missionId + "]"); } else { throw new SpiderException("当前没有正在运行的爬虫!missionId:[" + this.missionId + "]"); } } }