package com.bxm.spider.prod.job;

import com.alibaba.fastjson.JSONObject;
import com.bxm.spider.cache.RedisClient;
import com.bxm.spider.cache.constant.TaskKeyConstant;
import com.bxm.spider.constant.json.JsonObjectParaConstant;
import com.bxm.spider.constant.monitor.ErrorEnum;
import com.bxm.spider.constant.monitor.MonitorConstant;
import com.bxm.spider.constant.monitor.MonitorHelper;
import com.bxm.spider.constant.processor.PretreatmentEnum;
import com.bxm.spider.constant.proxy.ProxyFlagEnum;
import com.bxm.spider.constant.url.UrlTypeEnum;
import com.bxm.spider.prod.integration.service.DownLoadIntegrationService;
import com.bxm.spider.prod.model.dao.UrlConfig;
import com.bxm.spider.prod.param.DownloadParam;
import com.bxm.spider.prod.param.LoginAccountParam;
import com.bxm.spider.prod.common.constants.Constant;
import com.bxm.spider.prod.utils.ProdServiceUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.quartz.JobDataMap;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

import java.util.List;

/**
 * 列表url 默认控制job
 *
 * @ClassName SpiderQueueJob
 * @CopyRright (c) 2018-bxm：杭州微财网络科技有限公司
 * @Author kk.xie
 * @Date 2018/10/24 16:39
 * @Version 1.0
 * @Modifier kk.xie
 * @Modify Date 2018/10/24 16:39
 **/
public class SpiderQueueJob extends ProdJob {
    private Logger logger = LoggerFactory.getLogger(SpiderQueueJob.class);

    private final Integer DEFAULT_EXECUTE_RATE = 1;

    @Autowired
    private RedisClient redisClient;

    @Autowired
    private DownLoadIntegrationService downLoadIntegrationService;

    @Autowired
    private JobThreadPool jobThreadPool;

    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        String serialNum = "";
        try {
            JobDataMap data = context.getJobDetail().getJobDataMap();
            // 任务流水号
            serialNum = data.getString(Constant.SERIAL_NUM);

            // 列表队列redis key
            String queueKey = TaskKeyConstant.getQueueList(serialNum);
            // 执行的频率
            Integer executeRate = DEFAULT_EXECUTE_RATE;
            try {
                executeRate = data.getInt(Constant.EXECUTE_RATE);
            } catch (Exception e) {
                logger.error("SpiderQueueJob获取执行频率失败，使用默认值 {}, 流水号：{}", DEFAULT_EXECUTE_RATE, executeRate, e);
            }

            for (int i = 0; i < executeRate; i++) {
                String queueUrl = redisClient.lpop(queueKey);
                if (StringUtils.isEmpty(queueUrl)) {
                    if (logger.isDebugEnabled()) {
                        logger.debug("queue job:{}, pop url: null", context.getJobDetail().getKey());
                    }
                    continue;
                }
                logger.info(MonitorConstant.MONITOR, MonitorHelper.ofSuccessLog(MonitorConstant.PROD_START, serialNum, UrlTypeEnum.URL_LIST));

                UrlConfig urlConfig;
                String[] urls = queueUrl.split(Constant.URL_JOINT_CHAR);
                if (urls.length == 1) {
                    // 长度为1 则为种子url
                    urlConfig = (UrlConfig) data.get(urls[0]);
                } else if (urls.length == 2) {
                    // 长度为2 则为 种子url拼接上解析出来的列表url
                    urlConfig = (UrlConfig) data.get(urls[0]);
                    queueUrl = urls[1];
                } else {
                    // 非法
                    logger.warn("出现非法请求url: {}, data:{}", queueUrl, JSONObject.toJSONString(data));
                    continue;
                }

                if (urlConfig == null) {
                    logger.warn("urlconfig 不存在！出现非法请求url: {}, data: {}", queueUrl, JSONObject.toJSONString(data));
                    continue;
                }

                final String requestUrl = queueUrl;
                Integer proxyFlag = urlConfig.getProxyFlag();
                // 代理统一进入延迟队列中执行
                if (ProxyFlagEnum.PROXY_OPEN.getCode().equals(proxyFlag)) {
                    jobThreadPool.executeScheduled(() -> executeDown(urlConfig, context, requestUrl, queueKey));
                } else {
                    jobThreadPool.execute(() -> executeDown(urlConfig, context, requestUrl, queueKey));
                }

            }
        } catch (Exception e) {
            logger.error(MonitorConstant.MONITOR, "【执行队列下载出错】{} exception:", MonitorHelper.ofFailLog(
                    MonitorConstant.PROD_PROGRESS, serialNum, UrlTypeEnum.URL_LIST, ErrorEnum.PROD_ERROR, e.getMessage()), e);
        }

    }

    private void executeDown(UrlConfig urlConfig, JobExecutionContext context, String queueUrl, String queueKey) {
        String serialNum = urlConfig.getSerialNum();
        if (StringUtils.isEmpty(redisClient.hGet(TaskKeyConstant.getTaskExecutingHash(), serialNum))) {
            if (logger.isDebugEnabled()) {
                logger.debug("任务已停止，不执行列表调度操作！serialNum: {}", serialNum);
                return;
            }
        }

        String jsonStr = redisClient.hgetAnddel(TaskKeyConstant.getUrlObjectHash(serialNum), queueUrl);
        JSONObject jsonObject = JSONObject.parseObject(jsonStr);
        String cookie = urlConfig.getCookie();
        String userAgent = urlConfig.getUserAgent();
        String referer = urlConfig.getReferer();
        Integer queueDepth = urlConfig.getQueueDepth();
        String channel = urlConfig.getChannel();
        String originUrl = urlConfig.getUrl();
        String charset = urlConfig.getCharset();
        String pretreatmentParam = urlConfig.getPretreatmentParam();
        PretreatmentEnum pretreatmentType = StringUtils.isBlank(urlConfig.getPretreatmentType()) ? null : PretreatmentEnum.valueOf(urlConfig.getPretreatmentType());
        String processorType = null == jsonObject || null == jsonObject.get(JsonObjectParaConstant.PROCESSOR) || "".equals(String.valueOf(jsonObject.get(JsonObjectParaConstant.PROCESSOR))) ?
                urlConfig.getProcessorType() : jsonObject.get(JsonObjectParaConstant.PROCESSOR) + "";
        String persistenceType = null == jsonObject || null == jsonObject.get(JsonObjectParaConstant.PERSISTENCE) || "".equals(String.valueOf(jsonObject.get(JsonObjectParaConstant.PERSISTENCE))) ?
                urlConfig.getPersistenceType() : jsonObject.get(JsonObjectParaConstant.PERSISTENCE) + "";
        ProxyFlagEnum proxyFlag = ProxyFlagEnum.PROXY_OPEN.getCode() == urlConfig.getProxyFlag().intValue() ? ProxyFlagEnum.PROXY_OPEN : ProxyFlagEnum.PROXY_CLOSE;
        // 登陆用户的信息
        List<LoginAccountParam> loginAccountList = getLoginAccountParamList(context);

        // 判断种子url是否达到爬取上限
        String originDepth = redisClient.hGet(TaskKeyConstant.getDepthCatchHash(serialNum), originUrl);
        int currentDepth = NumberUtils.isCreatable(originDepth) ? Integer.valueOf(originDepth) : -1;

        if (logger.isDebugEnabled()) {
            logger.debug("execute job: {}, origin url depth: {}", context.getJobDetail().getKey(), currentDepth);
        }

        if (currentDepth > queueDepth) {
            logger.info("已达到爬取深度上限: {}，停止执行列表url爬取, 流水号：{}", queueDepth, serialNum);
            return;
        }
        DownloadParam downLoadParam = ProdServiceUtils.ofDownloadDto(queueUrl, originUrl, cookie, userAgent, referer, channel, serialNum, UrlTypeEnum
                .URL_LIST.getValue(), processorType, charset, pretreatmentParam, pretreatmentType, proxyFlag, persistenceType, null == jsonObject ? "" : jsonObject.getString
                (JsonObjectParaConstant.JSONOBJECT), loginAccountList);
        Boolean success = downLoadIntegrationService.httpDownLoad(downLoadParam);

        if (logger.isDebugEnabled()) {
            logger.debug("execute job: {}, download params:{}", context.getJobDetail().getKey(), downLoadParam);
        }

        if (success) {
            // 列表url调用下载中心成功！计入已下载列表缓存中，单周期内有效
            redisClient.sadd(TaskKeyConstant.getQueueCatchSet(serialNum), queueUrl);
            logger.info("execute queue url success ,serialNum: {}, redis key: {}, url:{} ", serialNum, queueKey, queueUrl);
        } else {
            logger.error("execute queue url: {} error ,serialNum: {}, push to the end of the list, redis key: {}",
                    queueUrl, serialNum, queueKey);
            redisClient.lpush(queueKey, queueUrl);

        }
        logger.info(MonitorConstant.MONITOR, MonitorHelper.ofSuccessLog(MonitorConstant.PROD_END, serialNum, UrlTypeEnum.URL_LIST));
    }
}
