package org.wikidata.query.rdf.tool.rdf;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ListMultimap;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.model.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikidata.query.rdf.common.WikibasePoint;
import org.wikidata.query.rdf.common.uri.OWL;
import org.wikidata.query.rdf.common.uri.Ontolex;
import org.wikidata.query.rdf.common.uri.Ontology;
import org.wikidata.query.rdf.common.uri.PropertyType;
import org.wikidata.query.rdf.common.uri.Provenance;
import org.wikidata.query.rdf.common.uri.RDFS;
import org.wikidata.query.rdf.common.uri.SKOS;
import org.wikidata.query.rdf.common.uri.SchemaDotOrg;
import org.wikidata.query.rdf.common.uri.UrisScheme;
import org.wikidata.query.rdf.tool.exception.ContainedException;

/* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger.class */
public final class Munger {
    private final UrisScheme uris;
    private final Set<String> limitLabelLanguages;
    private final List<String> singleLabelModeLanguages;
    private final boolean removeSiteLinks;
    private final boolean keepTypes;
    private String dumpFormatVersion;
    private final Map<String, FormatHandler> formatHandlers;
    private static final Logger log = LoggerFactory.getLogger((Class<?>) Munger.class);
    private static final Set<String> SKIPPED_TYPES = ImmutableSet.of(Ontology.ITEM, Ontology.Lexeme.LEXEME, Ontology.Lexeme.FORM, Ontology.Lexeme.SENSE);

    /* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger$BadSubjectException.class */
    public static class BadSubjectException extends ContainedException {
        private static final long serialVersionUID = -4869053066714948939L;

        public BadSubjectException(Set<String> set, UrisScheme urisScheme) {
            super(String.format(Locale.ROOT, "Unrecognized subjects:  %s.  Expected only sitelinks and subjects starting with %s and %s", set, urisScheme.entityData(), urisScheme.entityURIs()));
        }
    }

    /* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger$Builder.class */
    public static final class Builder {
        private UrisScheme uris;
        private Collection<String> limitLabelLanguages;
        private List<String> singleLabelModeLanguages;
        private boolean removeSiteLinks;
        private boolean keepTypes;
        private Map<String, FormatHandler> formatHandlers = new HashMap();

        Builder(UrisScheme urisScheme) {
            this.uris = urisScheme;
            if (WikibasePoint.DEFAULT_ORDER == WikibasePoint.CoordinateOrder.LAT_LONG) {
                addFormatHandler("0.0.2", new PointCoordinateSwitcher());
            } else {
                addFormatHandler("0.0.1", new PointCoordinateSwitcher());
            }
        }

        public Builder removeSiteLinks() {
            this.removeSiteLinks = true;
            return this;
        }

        public Builder limitLabelLanguages(String... strArr) {
            return limitLabelLanguages(Arrays.asList(strArr));
        }

        public Builder limitLabelLanguages(Collection<String> collection) {
            this.limitLabelLanguages = collection;
            return this;
        }

        public Builder singleLabelMode(String... strArr) {
            return singleLabelMode(Arrays.asList(strArr));
        }

        public Builder singleLabelMode(List<String> list) {
            this.singleLabelModeLanguages = list;
            return this;
        }

        public Builder keepTypes(boolean z) {
            this.keepTypes = z;
            return this;
        }

        @VisibleForTesting
        Builder addFormatHandler(String str, FormatHandler formatHandler) {
            this.formatHandlers.put(str, formatHandler);
            return this;
        }

        public Munger build() {
            return new Munger(this.uris, this.limitLabelLanguages == null ? null : ImmutableSet.copyOf((Collection) this.limitLabelLanguages), this.singleLabelModeLanguages == null ? null : ImmutableList.copyOf((Collection) this.singleLabelModeLanguages).reverse(), this.removeSiteLinks, this.keepTypes, ImmutableMap.copyOf((Map) this.formatHandlers));
        }
    }

    /* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger$FormatHandler.class */
    public interface FormatHandler {
        Statement handle(Statement statement);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger$MungeOperation.class */
    public class MungeOperation {
        private final String entityUri;
        private final Collection<Statement> statements;
        private final Resource entityUriImpl;
        private final SingleLabelModeWork singleLabelModeWorkForLabel;
        private final SingleLabelModeWork singleLabelModeWorkForDescription;
        private final NamespaceStatementPredicates predicates;
        private Literal revisionId;
        private Literal lastModified;
        private FormatHandler formatHandler;
        private final List<Statement> restoredStatements = new ArrayList();
        private final Set<String> subEntities = new HashSet();
        private final Set<String> siteLinks = new HashSet();
        private final Set<String> extraValidSubjects = new HashSet();
        private final ListMultimap<String, Statement> unknownSubjects = ArrayListMultimap.create();
        private final Set<Pair<URI, Literal>> dataStatements = new HashSet();

        /* JADX INFO: Access modifiers changed from: private */
        /* loaded from: input_file:org/wikidata/query/rdf/tool/rdf/Munger$MungeOperation$SingleLabelModeWork.class */
        public class SingleLabelModeWork {
            private Statement bestStatement;
            private int bestIndex;

            private SingleLabelModeWork() {
                this.bestIndex = -1;
            }

            public boolean statement(Statement statement) {
                int indexOf = Munger.this.singleLabelModeLanguages.indexOf(MungeOperation.this.objectAsLiteral(statement).getLanguage());
                if (indexOf <= this.bestIndex) {
                    return false;
                }
                this.bestStatement = statement;
                this.bestIndex = indexOf;
                return false;
            }

            public void addBestStatement(Collection<Statement> collection) {
                if (this.bestStatement != null) {
                    collection.add(this.bestStatement);
                }
            }
        }

        MungeOperation(String str, Collection<Statement> collection) {
            this.statements = collection;
            this.entityUri = Munger.this.uris.entityIdToURI(str);
            this.entityUriImpl = new URIImpl(this.entityUri);
            if (Munger.this.singleLabelModeLanguages != null) {
                this.singleLabelModeWorkForLabel = new SingleLabelModeWork();
                this.singleLabelModeWorkForDescription = new SingleLabelModeWork();
            } else {
                this.singleLabelModeWorkForLabel = null;
                this.singleLabelModeWorkForDescription = null;
            }
            setFormatVersion(Munger.this.dumpFormatVersion);
            this.predicates = new NamespaceStatementPredicates(Munger.this.uris);
        }

        private void setFormatVersion(String str) {
            this.formatHandler = (FormatHandler) Munger.this.formatHandlers.get(str);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public long getRevisionId() {
            if (this.revisionId == null) {
                return -1L;
            }
            return Long.parseLong(this.revisionId.stringValue());
        }

        public void munge() {
            Iterator<Statement> it = this.statements.iterator();
            while (it.hasNext()) {
                Statement next = it.next();
                if (this.formatHandler != null) {
                    Statement handle = this.formatHandler.handle(next);
                    if (handle == null) {
                        it.remove();
                    } else if (!handle.equals(next)) {
                        it.remove();
                        next = handle;
                        if (statement(next)) {
                            this.restoredStatements.add(next);
                        }
                    }
                }
                if (!statement(next)) {
                    it.remove();
                }
                Statement checkObjectLength = checkObjectLength(next);
                if (checkObjectLength != null) {
                    it.remove();
                    this.restoredStatements.add(checkObjectLength);
                }
            }
            finishSingleLabelMode();
            finishCommon();
        }

        private Statement checkObjectLength(Statement statement) {
            if (!(statement.getObject() instanceof Literal)) {
                return null;
            }
            Literal literal = (Literal) statement.getObject();
            if (literal.stringValue().length() > 32767) {
                return new StatementImpl(statement.getSubject(), statement.getPredicate(), literal.getDatatype().equals(RDF.LANGSTRING) ? new LiteralImpl(literal.stringValue().substring(0, 32767), literal.getLanguage()) : new LiteralImpl(literal.stringValue().substring(0, 32767), literal.getDatatype()));
            }
            return null;
        }

        private boolean statement(Statement statement) {
            String stringValue = statement.getSubject().stringValue();
            if (this.predicates.subjectInEntityDataNS(statement)) {
                return entityDataStatement(statement);
            }
            if (this.predicates.subjectInStatementNS(statement)) {
                return entityStatementStatement(statement);
            }
            if (this.predicates.subjectInReferenceNS(statement)) {
                return entityReferenceStatement(statement);
            }
            if (this.predicates.subjectInValueNS(statement)) {
                return entityValueStatement(statement);
            }
            if (Munger.this.uris.isEntityURI(stringValue)) {
                return entityStatement(statement);
            }
            if (stringValue.startsWith(Munger.this.uris.property(PropertyType.CLAIM))) {
                return propertyStatement(statement);
            }
            if (statement.getSubject() instanceof BNode) {
                return true;
            }
            return unknownStatement(statement);
        }

        private boolean propertyStatement(Statement statement) {
            if (statement.getSubject().stringValue().startsWith(Munger.this.uris.property(PropertyType.NOVALUE))) {
                return true;
            }
            return statement.getPredicate().stringValue().equals(org.wikidata.query.rdf.common.uri.RDF.TYPE);
        }

        private boolean entityDataStatement(Statement statement) {
            boolean z = false;
            String stringValue = statement.getPredicate().stringValue();
            boolean z2 = -1;
            switch (stringValue.hashCode()) {
                case -2097331139:
                    if (stringValue.equals(SchemaDotOrg.DATE_MODIFIED)) {
                        z2 = true;
                        break;
                    }
                    break;
                case -1637510197:
                    if (stringValue.equals(SchemaDotOrg.SOFTWARE_VERSION)) {
                        z2 = 2;
                        break;
                    }
                    break;
                case 716468594:
                    if (stringValue.equals(SchemaDotOrg.VERSION)) {
                        z2 = false;
                        break;
                    }
                    break;
            }
            switch (z2) {
                case false:
                    z = true;
                    this.revisionId = objectAsLiteral(statement);
                    break;
                case true:
                    z = true;
                    this.lastModified = objectAsLiteral(statement);
                    break;
                case true:
                    setFormatVersion(objectAsLiteral(statement).stringValue());
                    break;
                default:
                    if (stringValue.startsWith(Ontology.NAMESPACE)) {
                        z = true;
                        break;
                    }
                    break;
            }
            if (!z) {
                return false;
            }
            this.dataStatements.add(new ImmutablePair(statement.getPredicate(), objectAsLiteral(statement)));
            return false;
        }

        private boolean entityStatement(Statement statement) {
            String stringValue = statement.getSubject().stringValue();
            if (!stringValue.equals(this.entityUri) && !this.subEntities.contains(stringValue)) {
                return false;
            }
            String stringValue2 = statement.getPredicate().stringValue();
            boolean z = -1;
            switch (stringValue2.hashCode()) {
                case -1836724996:
                    if (stringValue2.equals(RDFS.LABEL)) {
                        z = 3;
                        break;
                    }
                    break;
                case -1755055055:
                    if (stringValue2.equals(SchemaDotOrg.NAME)) {
                        z = true;
                        break;
                    }
                    break;
                case -1624968892:
                    if (stringValue2.equals(OWL.SAME_AS)) {
                        z = 6;
                        break;
                    }
                    break;
                case -1500662850:
                    if (stringValue2.equals(Ontolex.SENSE_PREDICATE)) {
                        z = 8;
                        break;
                    }
                    break;
                case -906769524:
                    if (stringValue2.equals(SKOS.PREF_LABEL)) {
                        z = 2;
                        break;
                    }
                    break;
                case -225957800:
                    if (stringValue2.equals(Ontolex.LEXICAL_FORM)) {
                        z = 7;
                        break;
                    }
                    break;
                case -169519014:
                    if (stringValue2.equals(org.wikidata.query.rdf.common.uri.RDF.TYPE)) {
                        z = false;
                        break;
                    }
                    break;
                case 359869040:
                    if (stringValue2.equals(SKOS.ALT_LABEL)) {
                        z = 5;
                        break;
                    }
                    break;
                case 1604647830:
                    if (stringValue2.equals(SchemaDotOrg.DESCRIPTION)) {
                        z = 4;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return Munger.this.keepTypes || !Munger.SKIPPED_TYPES.contains(statement.getObject().stringValue());
                case true:
                case true:
                    return false;
                case true:
                    return !Munger.this.uris.entityURItoId(stringValue).startsWith("L") && !this.subEntities.contains(stringValue) && limitLabelLanguage(statement) && singleLabelMode(this.singleLabelModeWorkForLabel, statement);
                case true:
                    return limitLabelLanguage(statement) && singleLabelMode(this.singleLabelModeWorkForDescription, statement);
                case true:
                    return limitLabelLanguage(statement);
                case true:
                    return true;
                case true:
                case true:
                    this.subEntities.add(statement.getObject().stringValue());
                    return true;
                default:
                    return entityStatementWithUnrecognizedPredicate(statement);
            }
        }

        private boolean entityStatementWithUnrecognizedPredicate(Statement statement) {
            if (!this.predicates.reificationStatement(statement)) {
                return true;
            }
            registerExtraValidSubject(statement.getObject().stringValue());
            return true;
        }

        private boolean entityStatementStatement(Statement statement) {
            String stringValue = statement.getSubject().stringValue();
            String stringValue2 = statement.getPredicate().stringValue();
            boolean z = -1;
            switch (stringValue2.hashCode()) {
                case -1666691582:
                    if (stringValue2.equals(Provenance.WAS_DERIVED_FROM)) {
                        z = true;
                        break;
                    }
                    break;
                case -169519014:
                    if (stringValue2.equals(org.wikidata.query.rdf.common.uri.RDF.TYPE)) {
                        z = false;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    if (Munger.this.keepTypes) {
                        return true;
                    }
                    if (statement.getObject().stringValue().equals(Ontology.STATEMENT)) {
                        return false;
                    }
                    break;
                case true:
                    if (!this.predicates.objectInReferenceNS(statement)) {
                        return true;
                    }
                    registerExtraValidSubject(statement.getObject().stringValue());
                    return true;
            }
            if (this.extraValidSubjects.contains(stringValue)) {
                if (!this.predicates.objectInValueNS(statement)) {
                    return true;
                }
                registerExtraValidSubject(statement.getObject().stringValue());
                return true;
            }
            if (statement.getPredicate().stringValue().equals(Ontology.CONSTRAINT_VIOLATION)) {
                return false;
            }
            this.unknownSubjects.put(stringValue, statement);
            return false;
        }

        private boolean entityReferenceStatement(Statement statement) {
            String stringValue = statement.getObject().stringValue();
            String stringValue2 = statement.getSubject().stringValue();
            if (StatementPredicates.typeStatement(statement)) {
                if (Munger.this.keepTypes) {
                    return true;
                }
                if (StatementPredicates.referenceTypeStatement(statement)) {
                    return false;
                }
            }
            if (!this.extraValidSubjects.contains(stringValue2)) {
                this.unknownSubjects.put(stringValue2, statement);
                return false;
            }
            if (!this.predicates.tripleRefValue(statement)) {
                return true;
            }
            registerExtraValidSubject(stringValue);
            return true;
        }

        private boolean entityValueStatement(Statement statement) {
            String stringValue = statement.getSubject().stringValue();
            String stringValue2 = statement.getPredicate().stringValue();
            boolean z = -1;
            switch (stringValue2.hashCode()) {
                case -1271198106:
                    if (stringValue2.equals(Ontology.Quantity.NORMALIZED)) {
                        z = true;
                        break;
                    }
                    break;
                case -169519014:
                    if (stringValue2.equals(org.wikidata.query.rdf.common.uri.RDF.TYPE)) {
                        z = false;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return true;
                case true:
                    registerExtraValidSubject(statement.getObject().stringValue());
                    break;
            }
            if (this.extraValidSubjects.contains(stringValue)) {
                return true;
            }
            this.unknownSubjects.put(stringValue, statement);
            return false;
        }

        private boolean unknownStatement(Statement statement) {
            String stringValue = statement.getPredicate().stringValue();
            String stringValue2 = statement.getSubject().stringValue();
            if (stringValue.equals(Ontology.WIKIGROUP)) {
                return true;
            }
            if (this.siteLinks.contains(stringValue2)) {
                return !Munger.this.removeSiteLinks;
            }
            if (this.extraValidSubjects.contains(stringValue2)) {
                return true;
            }
            if (!stringValue.equals(org.wikidata.query.rdf.common.uri.RDF.TYPE) || !statement.getObject().stringValue().equals(SchemaDotOrg.ARTICLE)) {
                this.unknownSubjects.put(stringValue2, statement);
                return false;
            }
            this.siteLinks.add(stringValue2);
            if (Munger.this.removeSiteLinks) {
                this.unknownSubjects.removeAll((Object) stringValue2);
                return false;
            }
            this.restoredStatements.addAll(this.unknownSubjects.removeAll((Object) stringValue2));
            return true;
        }

        private boolean limitLabelLanguage(Statement statement) {
            if (Munger.this.limitLabelLanguages == null) {
                return true;
            }
            String language = objectAsLiteral(statement).getLanguage();
            return language != null && Munger.this.limitLabelLanguages.contains(language);
        }

        private boolean singleLabelMode(SingleLabelModeWork singleLabelModeWork, Statement statement) {
            return singleLabelModeWork == null || singleLabelModeWork.statement(statement);
        }

        private void finishCommon() {
            if (!this.unknownSubjects.isEmpty()) {
                if (this.statements.isEmpty() && this.restoredStatements.isEmpty()) {
                    throw new BadSubjectException(this.unknownSubjects.keySet(), Munger.this.uris);
                }
                Munger.log.info("Unrecognized subjects: {} while processing {}.  Expected only sitelinks and subjects starting with {} and {}", this.unknownSubjects.keySet(), this.entityUri, Munger.this.uris.entityData(), Munger.this.uris.entityURIs());
                this.unknownSubjects.entries().stream().limit(20L).forEach(entry -> {
                    Munger.log.info("Unrecognized statement: s:{} p:{} o:{}", ((Statement) entry.getValue()).getSubject(), ((Statement) entry.getValue()).getPredicate(), ((Statement) entry.getValue()).getObject());
                });
                if (this.unknownSubjects.size() > 20) {
                    Munger.log.info("More than 20 unrecognized statements, further statements not logged.");
                }
            }
            if (this.revisionId == null) {
                throw new ContainedException("Didn't get a revision id for " + this.statements);
            }
            if (this.lastModified == null) {
                throw new ContainedException("Didn't get a last modified date for " + this.statements);
            }
            for (Pair<URI, Literal> pair : this.dataStatements) {
                this.statements.add(new StatementImpl(this.entityUriImpl, pair.getLeft(), pair.getRight()));
            }
            this.statements.addAll(this.restoredStatements);
        }

        private void finishSingleLabelMode() {
            if (Munger.this.singleLabelModeLanguages != null) {
                this.singleLabelModeWorkForLabel.addBestStatement(this.statements);
                this.singleLabelModeWorkForDescription.addBestStatement(this.statements);
            }
        }

        private void registerExtraValidSubject(String str) {
            this.extraValidSubjects.add(str);
            this.restoredStatements.addAll(this.unknownSubjects.removeAll((Object) str));
        }

        /* JADX INFO: Access modifiers changed from: private */
        @SuppressFBWarnings(value = {"LEST_LOST_EXCEPTION_STACK_TRACE"}, justification = "Cause is really not needed here.")
        public Literal objectAsLiteral(Statement statement) {
            try {
                return (Literal) statement.getObject();
            } catch (ClassCastException e) {
                throw new ContainedException("Expected Literal in object position of:  " + statement);
            }
        }
    }

    private Munger(UrisScheme urisScheme, Set<String> set, List<String> list, boolean z, boolean z2, Map<String, FormatHandler> map) {
        this.uris = urisScheme;
        this.limitLabelLanguages = set;
        this.singleLabelModeLanguages = list;
        this.removeSiteLinks = z;
        this.keepTypes = z2;
        this.formatHandlers = map;
    }

    public void setFormatVersion(String str) {
        this.dumpFormatVersion = str;
    }

    @VisibleForTesting
    String getDumpFormatVersion() {
        return this.dumpFormatVersion;
    }

    public long munge(String str, Collection<Statement> collection) {
        MungeOperation mungeOperation = new MungeOperation(str, collection);
        mungeOperation.munge();
        return mungeOperation.getRevisionId();
    }

    public static Builder builder(UrisScheme urisScheme) {
        return new Builder(urisScheme);
    }
}
