package cn.edu.hfut.dmic.webcollector.example;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.berkeley.BreadthCrawler;
import java.util.Iterator;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/example/DemoDepthCrawler.class */
public class DemoDepthCrawler extends BreadthCrawler {
    public DemoDepthCrawler(String str, boolean z) {
        super(str, z);
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void visit(Page page, CrawlDatums crawlDatums) {
        System.out.println("visiting:" + page.getUrl() + "\tdepth=" + page.meta("depth"));
    }

    @Override // cn.edu.hfut.dmic.webcollector.crawler.AutoParseCrawler
    protected void afterParse(Page page, CrawlDatums crawlDatums) {
        int intValue = (page.meta("depth") == null ? 1 : Integer.valueOf(page.meta("depth")).intValue()) + 1;
        Iterator<CrawlDatum> it = crawlDatums.iterator();
        while (it.hasNext()) {
            it.next().meta("depth", intValue + "");
        }
    }

    public static void main(String[] strArr) throws Exception {
        DemoDepthCrawler demoDepthCrawler = new DemoDepthCrawler("depth_crawler", true);
        for (int i = 1; i <= 5; i++) {
            demoDepthCrawler.addSeed(new CrawlDatum("http://news.hfut.edu.cn/list-1-" + i + ".html").meta("depth", "1"));
        }
        demoDepthCrawler.addRegex("http://news.hfut.edu.cn/show-.*html");
        demoDepthCrawler.addRegex("-.*\\.(jpg|png|gif).*");
        demoDepthCrawler.addRegex("-.*#.*");
        demoDepthCrawler.setTopN(5);
        demoDepthCrawler.start(3);
    }
}
