package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperation;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandlerException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.DockerLinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDevice;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu.GpuDiscoverer;
import org.apache.hadoop.yarn.server.nodemanager.nodelabels.AbstractNodeLabelsProvider;

/* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/gpu/GpuResourceHandlerImpl.class */
public class GpuResourceHandlerImpl implements ResourceHandler {
    static final Log LOG = LogFactory.getLog(GpuResourceHandlerImpl.class);
    public static final String EXCLUDED_GPUS_CLI_OPTION = "--excluded_gpus";
    public static final String CONTAINER_ID_CLI_OPTION = "--container_id";
    private GpuResourceAllocator gpuAllocator;
    private CGroupsHandler cGroupsHandler;
    private PrivilegedOperationExecutor privilegedOperationExecutor;

    public GpuResourceHandlerImpl(Context context, CGroupsHandler cGroupsHandler, PrivilegedOperationExecutor privilegedOperationExecutor) {
        this.cGroupsHandler = cGroupsHandler;
        this.privilegedOperationExecutor = privilegedOperationExecutor;
        this.gpuAllocator = new GpuResourceAllocator(context);
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler
    public List<PrivilegedOperation> bootstrap(Configuration configuration) throws ResourceHandlerException {
        try {
            List<GpuDevice> gpusUsableByYarn = GpuDiscoverer.getInstance().getGpusUsableByYarn();
            if (gpusUsableByYarn == null || gpusUsableByYarn.isEmpty()) {
                LOG.error("GPU is enabled on the NodeManager, but couldn't find any usable GPU devices, please double check configuration.");
                throw new ResourceHandlerException("GPU is enabled on the NodeManager, but couldn't find any usable GPU devices, please double check configuration.");
            }
            Iterator<GpuDevice> it = gpusUsableByYarn.iterator();
            while (it.hasNext()) {
                this.gpuAllocator.addGpu(it.next());
            }
            this.cGroupsHandler.initializeCGroupController(CGroupsHandler.CGroupController.DEVICES);
            return null;
        } catch (YarnException e) {
            LOG.error("Exception when trying to get usable GPU device", e);
            throw new ResourceHandlerException(e);
        }
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler
    public synchronized List<PrivilegedOperation> preStart(Container container) throws ResourceHandlerException {
        String containerId = container.getContainerId().toString();
        GpuResourceAllocator.GpuAllocation assignGpus = this.gpuAllocator.assignGpus(container);
        this.cGroupsHandler.createCGroup(CGroupsHandler.CGroupController.DEVICES, containerId);
        if (DockerLinuxContainerRuntime.isDockerContainerRequested(container.getLaunchContext().getEnvironment())) {
            return null;
        }
        try {
            PrivilegedOperation privilegedOperation = new PrivilegedOperation(PrivilegedOperation.OperationType.GPU, (List<String>) Arrays.asList("--container_id", containerId));
            if (!assignGpus.getDeniedGPUs().isEmpty()) {
                ArrayList arrayList = new ArrayList();
                Iterator<GpuDevice> it = assignGpus.getDeniedGPUs().iterator();
                while (it.hasNext()) {
                    arrayList.add(Integer.valueOf(it.next().getMinorNumber()));
                }
                privilegedOperation.appendArgs(Arrays.asList(EXCLUDED_GPUS_CLI_OPTION, StringUtils.join(AbstractNodeLabelsProvider.NODE_LABELS_SEPRATOR, arrayList)));
            }
            this.privilegedOperationExecutor.executePrivilegedOperation(privilegedOperation, true);
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(new PrivilegedOperation(PrivilegedOperation.OperationType.ADD_PID_TO_CGROUP, PrivilegedOperation.CGROUP_ARG_PREFIX + this.cGroupsHandler.getPathForCGroupTasks(CGroupsHandler.CGroupController.DEVICES, containerId)));
            return arrayList2;
        } catch (PrivilegedOperationException e) {
            this.cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, containerId);
            LOG.warn("Could not update cgroup for container", e);
            throw new ResourceHandlerException((Throwable) e);
        }
    }

    public GpuResourceAllocator getGpuAllocator() {
        return this.gpuAllocator;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler
    public List<PrivilegedOperation> reacquireContainer(ContainerId containerId) throws ResourceHandlerException {
        this.gpuAllocator.recoverAssignedGpus(containerId);
        return null;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler
    public synchronized List<PrivilegedOperation> postComplete(ContainerId containerId) throws ResourceHandlerException {
        this.gpuAllocator.cleanupAssignGpus(containerId);
        this.cGroupsHandler.deleteCGroup(CGroupsHandler.CGroupController.DEVICES, containerId.toString());
        return null;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler
    public List<PrivilegedOperation> teardown() throws ResourceHandlerException {
        return null;
    }
}
