package org.apache.mahout.utils.regex;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.lucene.analysis.Analyzer;
import org.apache.mahout.common.ClassUtils;

/* loaded from: input_file:BOOT-INF/lib/mahout-integration-0.12.2.jar:org/apache/mahout/utils/regex/RegexMapper.class */
public class RegexMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
    public static final String REGEX = "regex";
    public static final String GROUP_MATCHERS = "regex.groups";
    public static final String TRANSFORMER_CLASS = "transformer.class";
    public static final String FORMATTER_CLASS = "formatter.class";
    private Pattern regex;
    private List<Integer> groupsToKeep;
    private RegexTransformer transformer = RegexUtils.IDENTITY_TRANSFORMER;
    private RegexFormatter formatter = RegexUtils.IDENTITY_FORMATTER;
    public static final String ANALYZER_NAME = "analyzerName";

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.hadoop.mapreduce.Mapper
    public void setup(Mapper<LongWritable, Text, LongWritable, Text>.Context context) throws IOException, InterruptedException {
        this.groupsToKeep = new ArrayList();
        Configuration configuration = context.getConfiguration();
        this.regex = Pattern.compile(configuration.get(REGEX));
        String[] strings = configuration.getStrings(GROUP_MATCHERS);
        if (strings != null) {
            for (String str : strings) {
                this.groupsToKeep.add(Integer.valueOf(Integer.parseInt(str)));
            }
        }
        this.transformer = (RegexTransformer) ClassUtils.instantiateAs(configuration.get(TRANSFORMER_CLASS, IdentityTransformer.class.getName()), RegexTransformer.class);
        String str2 = configuration.get("analyzerName");
        if (str2 != null && (this.transformer instanceof AnalyzerTransformer)) {
            ((AnalyzerTransformer) this.transformer).setAnalyzer((Analyzer) ClassUtils.instantiateAs(str2, Analyzer.class));
        }
        this.formatter = (RegexFormatter) ClassUtils.instantiateAs(configuration.get(FORMATTER_CLASS, IdentityFormatter.class.getName()), RegexFormatter.class);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.hadoop.mapreduce.Mapper
    public void map(LongWritable longWritable, Text text, Mapper<LongWritable, Text, LongWritable, Text>.Context context) throws IOException, InterruptedException {
        String extract = RegexUtils.extract(text.toString(), this.regex, this.groupsToKeep, " ", this.transformer);
        if (extract.isEmpty()) {
            return;
        }
        context.write(longWritable, new Text(this.formatter.format(extract)));
    }
}
