/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.analysis.lang;

import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.parse.Parse;

public class LanguageIndexingFilter
implements IndexingFilter {
    private Configuration conf;
    private Set<String> indexLangs;

    public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
        String lang = parse.getData().getParseMeta().get("language");
        if (lang == null) {
            lang = parse.getData().getContentMeta().get("Content-Language");
        }
        if (lang == null || lang.length() == 0) {
            lang = "unknown";
        }
        if (!this.indexLangs.isEmpty() && !this.indexLangs.contains(lang)) {
            return null;
        }
        doc.add("lang", (Object)lang);
        return doc;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.indexLangs = new HashSet<String>(conf.getStringCollection("lang.index.languages"));
    }

    public Configuration getConf() {
        return this.conf;
    }
}

