package com.omni.ads.model.adssearchkeyword;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;

@Slf4j
public class NormalizeKeywordUtils {

    /**
     * 归一化关键词 - 1. 转小写  2.去空格制表符等 3. 去标点,符号分隔符等
     *
     * @param keyword 原始关键词
     * @return 归一化后的关键词
     */
    public static String normalizeKeyword(String keyword) {
        if (keyword == null) {
            return StringUtils.EMPTY;
        }
        try {
            return keyword.toLowerCase().replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5]", StringUtils.EMPTY);
        } catch (Exception ex) {
            log.error("normalize the keyword failed, keyword = " + keyword, ex);
        }
        return keyword;
    }

}
