package org.apache.iceberg.mr.mapreduce;

import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataTask;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Partitioning;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.avro.Avro;
import org.apache.iceberg.common.DynMethods;
import org.apache.iceberg.data.CachingDeleteLoader;
import org.apache.iceberg.data.DeleteLoader;
import org.apache.iceberg.data.GenericDeleteFilter;
import org.apache.iceberg.data.IdentityPartitionConverters;
import org.apache.iceberg.data.avro.DataReader;
import org.apache.iceberg.data.orc.GenericOrcReader;
import org.apache.iceberg.data.parquet.GenericParquetReaders;
import org.apache.iceberg.encryption.EncryptedFiles;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.hive.HiveVersion;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.mr.hive.HiveIcebergInputFormat;
import org.apache.iceberg.mr.hive.IcebergAcidUtil;
import org.apache.iceberg.orc.ORC;
import org.apache.iceberg.parquet.Parquet;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.PartitionUtil;

/* loaded from: input_file:org/apache/iceberg/mr/mapreduce/IcebergRecordReader.class */
public final class IcebergRecordReader<T> extends AbstractIcebergRecordReader<T> {
    private static final String HIVE_VECTORIZED_READER_CLASS = "org.apache.iceberg.mr.hive.vector.HiveVectorizedReader";
    private static final DynMethods.StaticMethod HIVE_VECTORIZED_READER_BUILDER;
    private Iterable<FileScanTask> tasks;
    private CloseableIterator<T> currentIterator;
    private T current;

    @Override // org.apache.iceberg.mr.mapreduce.AbstractIcebergRecordReader
    public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        super.initialize(inputSplit, taskAttemptContext);
        this.tasks = ((IcebergSplit) inputSplit).taskGroup().tasks();
        this.currentIterator = nextTask();
    }

    private CloseableIterator<T> nextTask() {
        CloseableIterator<T> it = CloseableIterable.concat(Iterables.transform(this.tasks, fileScanTask -> {
            return open(fileScanTask, this.expectedSchema);
        })).iterator();
        return (!isFetchVirtualColumns() || Utilities.getIsVectorized(this.conf)) ? it : new IcebergAcidUtil.VirtualColumnAwareIterator(it, this.expectedSchema, this.conf, this.table);
    }

    public boolean nextKeyValue() throws IOException {
        if (this.currentIterator.hasNext()) {
            this.current = this.currentIterator.next();
            return true;
        }
        this.currentIterator.close();
        return false;
    }

    public T getCurrentValue() {
        return this.current;
    }

    public void close() throws IOException {
        this.currentIterator.close();
    }

    private CloseableIterable<T> openVectorized(FileScanTask fileScanTask, Schema schema) {
        Preconditions.checkArgument(!fileScanTask.file().format().equals(FileFormat.AVRO), "Vectorized execution is not yet supported for Iceberg avro tables. Please turn off vectorization and retry the query.");
        Preconditions.checkArgument(HiveVersion.min(HiveVersion.HIVE_3), "Vectorized read is unsupported for Hive 2 integration.");
        Path path = new Path(fileScanTask.file().path().toString());
        Map<Integer, ?> constantsMap = constantsMap(fileScanTask, HiveIdentityPartitionConverters::convertConstant);
        Expression residualForTask = HiveIcebergInputFormat.residualForTask(fileScanTask, getContext().getConfiguration());
        return applyResidualFiltering((CloseableIterable) HIVE_VECTORIZED_READER_BUILDER.invoke(this.table, path, fileScanTask, constantsMap, getContext(), residualForTask, schema), residualForTask, schema);
    }

    private CloseableIterable openGeneric(FileScanTask fileScanTask, Schema schema) {
        CloseableIterable<T> newParquetIterable;
        if (fileScanTask.isDataTask()) {
            IcebergInternalRecordWrapper icebergInternalRecordWrapper = new IcebergInternalRecordWrapper(this.table.schema().asStruct(), schema.asStruct());
            CloseableIterable<StructLike> rows = ((DataTask) fileScanTask).rows();
            icebergInternalRecordWrapper.getClass();
            return CloseableIterable.transform(rows, icebergInternalRecordWrapper::wrap);
        }
        DataFile file = fileScanTask.file();
        InputFile decrypt = this.table.encryption().decrypt(EncryptedFiles.encryptedInput(this.table.io().newInputFile(file.path().toString()), file.keyMetadata()));
        switch (file.format()) {
            case AVRO:
                newParquetIterable = newAvroIterable(decrypt, fileScanTask, schema);
                break;
            case ORC:
                newParquetIterable = newOrcIterable(decrypt, fileScanTask, schema);
                break;
            case PARQUET:
                newParquetIterable = newParquetIterable(decrypt, fileScanTask, schema);
                break;
            default:
                throw new UnsupportedOperationException(String.format("Cannot read %s file: %s", file.format().name(), file.path()));
        }
        return newParquetIterable;
    }

    private CloseableIterable<T> open(FileScanTask fileScanTask, Schema schema) {
        switch (getInMemoryDataModel()) {
            case PIG:
                throw new UnsupportedOperationException("Pig and Hive object models are not supported.");
            case HIVE:
                return openVectorized(fileScanTask, schema);
            case GENERIC:
                GenericDeleteFilter genericDeleteFilter = new GenericDeleteFilter(this.table.io(), fileScanTask, this.table.schema(), schema) { // from class: org.apache.iceberg.mr.mapreduce.IcebergRecordReader.1
                    @Override // org.apache.iceberg.data.DeleteFilter
                    protected DeleteLoader newDeleteLoader() {
                        return new CachingDeleteLoader(this::loadInputFile, IcebergRecordReader.this.conf);
                    }
                };
                return (CloseableIterable<T>) genericDeleteFilter.filter(openGeneric(fileScanTask, genericDeleteFilter.requiredSchema()));
            default:
                throw new UnsupportedOperationException("Unsupported memory model");
        }
    }

    private CloseableIterable<T> newAvroIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
        Expression residualForTask = HiveIcebergInputFormat.residualForTask(fileScanTask, getContext().getConfiguration());
        Avro.ReadBuilder split = Avro.read(inputFile).project(schema).split(fileScanTask.start(), fileScanTask.length());
        if (isReuseContainers()) {
            split.reuseContainers();
        }
        if (getNameMapping() != null) {
            split.withNameMapping(NameMappingParser.fromJson(getNameMapping()));
        }
        split.createReaderFunc((schema2, schema3) -> {
            return DataReader.create(schema2, schema3, constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant));
        });
        return applyResidualFiltering(split.build(), residualForTask, schema);
    }

    private CloseableIterable<T> newParquetIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
        Expression residualForTask = HiveIcebergInputFormat.residualForTask(fileScanTask, getContext().getConfiguration());
        Parquet.ReadBuilder split = Parquet.read(inputFile).project(schema).filter(residualForTask).caseSensitive(isCaseSensitive()).split(fileScanTask.start(), fileScanTask.length());
        if (isReuseContainers()) {
            split.reuseContainers();
        }
        if (getNameMapping() != null) {
            split.withNameMapping(NameMappingParser.fromJson(getNameMapping()));
        }
        split.createReaderFunc(messageType -> {
            return GenericParquetReaders.buildReader(schema, messageType, constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant));
        });
        return applyResidualFiltering(split.build(), residualForTask, schema);
    }

    private CloseableIterable<T> newOrcIterable(InputFile inputFile, FileScanTask fileScanTask, Schema schema) {
        Map<Integer, ?> constantsMap = constantsMap(fileScanTask, IdentityPartitionConverters::convertConstant);
        Schema schemaWithoutConstantsAndMeta = schemaWithoutConstantsAndMeta(schema, constantsMap);
        Expression residualForTask = HiveIcebergInputFormat.residualForTask(fileScanTask, getContext().getConfiguration());
        ORC.ReadBuilder split = ORC.read(inputFile).project(schemaWithoutConstantsAndMeta).filter(residualForTask).caseSensitive(isCaseSensitive()).split(fileScanTask.start(), fileScanTask.length());
        if (getNameMapping() != null) {
            split.withNameMapping(NameMappingParser.fromJson(getNameMapping()));
        }
        split.createReaderFunc(typeDescription -> {
            return GenericOrcReader.buildReader(schema, typeDescription, constantsMap);
        });
        return applyResidualFiltering(split.build(), residualForTask, schema);
    }

    private Map<Integer, ?> constantsMap(FileScanTask fileScanTask, BiFunction<Type, Object, Object> biFunction) {
        boolean z = !TypeUtil.select(this.expectedSchema, fileScanTask.spec().identitySourceIds()).columns().isEmpty();
        if (this.expectedSchema.findField(MetadataColumns.PARTITION_COLUMN_ID) == null && !z) {
            return Collections.emptyMap();
        }
        return PartitionUtil.constantsMap(fileScanTask, Partitioning.partitionType(this.table), biFunction);
    }

    private static Schema schemaWithoutConstantsAndMeta(Schema schema, Map<Integer, ?> map) {
        return TypeUtil.selectNot(schema, (Set<Integer>) Stream.of((Object[]) new Set[]{map.keySet(), MetadataColumns.metadataFieldIds(), (Set) Optional.ofNullable(schema.findField(MetadataColumns.PARTITION_COLUMN_ID)).map((v0) -> {
            return v0.type();
        }).map((v0) -> {
            return v0.asStructType();
        }).map((v0) -> {
            return v0.fields();
        }).map(list -> {
            return (Set) list.stream().map((v0) -> {
                return v0.fieldId();
            }).collect(Collectors.toSet());
        }).orElseGet(Collections::emptySet)}).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toSet()));
    }

    static {
        if (HiveVersion.min(HiveVersion.HIVE_3)) {
            HIVE_VECTORIZED_READER_BUILDER = DynMethods.builder("reader").impl(HIVE_VECTORIZED_READER_CLASS, Table.class, Path.class, FileScanTask.class, Map.class, TaskAttemptContext.class, Expression.class, Schema.class).buildStatic();
        } else {
            HIVE_VECTORIZED_READER_BUILDER = null;
        }
    }
}
