Shortcuts

Source code for hfai.distributed.nccl_opt_control

import os
import subprocess

class HFAI_NCCL_OPT_LEVEL(object):
    DISABLED = 0     # 禁用一切NCCL优化选项
    AUTO = 1         # 自动选择NCCL优化:保守
    FULL = 3         # 自动选择NCCL优化:激进,尽量打开所有能启用的优化
    COVER_AUTO = 10  # 在AUTO的基础上,用户自定义某些选项
    CUSTOM = 101     # 预留接口,用户自行选择优化组合       

# OPT GRAPH config
def set_nccl_graph_topo_opt(custom_graph_file=None):
    defualt_custom_gaph_file = "graph1.txt"
    if isinstance(custom_graph_file, str) and len(custom_graph_file)>0:
        graph_file = custom_graph_file
    else:
        graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), defualt_custom_gaph_file)
    os.environ['NCCL_GRAPH_FILE'] = graph_file

def set_nccl_min_nchannels(param=None):
    param = param if isinstance(param, str) else '1'
    os.environ['NCCL_MIN_NCHANNELS'] = param

def set_nccl_max_nchannels(param=None):
    param = param if isinstance(param, str) else '1'
    os.environ['NCCL_MAX_NCHANNELS'] = param

def set_nccl_algorithm(param=None):
    param = param if isinstance(param, str) else 'Ring'
    os.environ['NCCL_ALGO'] = param

def set_nccl_proto(param=None):
    param = param if isinstance(param, str) else 'Simple'
    os.environ['NCCL_PROTO'] = param

# GPU Direct RDMA 
def set_nccl_GPU_direct_rdma(flag=True):
    if not flag:
        # 禁用 GDR_READ
        os.environ['NCCL_NET_GDR_READ'] = '0'
    else:
        os.environ['NCCL_NET_GDR_LEVEL'] = 'SYS'
        os.environ['NCCL_NET_GDR_READ'] = '1'

def check_hardware_compatibility():
    # TOPO 检查CPU型号
    gpu_cnt = 0
    ib_cnt = 0
    ib_in_second_socket = False
    PHB_cnt = 0
    try:
        topo = subprocess.check_output(['nvidia-smi', 'topo', '-mp']).decode('utf-8')
    except:
        return False
    ls = topo.split('\n')
    ls = ls[1:ls.index('Legend:') - 1] 
    for item in ls:
        if item and item[0] and item.startswith('GPU'):
            gpu_cnt += 1
        if item and item[0] and item.startswith('mlx5'):
            ib_cnt += 1
            ib_temp = item.split('\t')
            sys_cnt = 0
            for legend in ib_temp:
                if legend.strip() == 'SYS':
                    sys_cnt += 1 # 统计不再同一个NUMA NODE的GPU数量
                else:
                    if legend.strip() == 'X':
                        if sys_cnt >= 4: # 仅在NPS1情况有效
                            ib_in_second_socket = True
                        break
        if gpu_cnt == 6 or gpu_cnt ==7:
            # GPU5 GPU6 expect in same PHB
            if item.find('PHB') >= 0:
                PHB_cnt += 1
    # 开启条件 1IB卡 8GPU ib在socket1 GPU5/6在同一个PHB
    if ib_cnt == 1 and gpu_cnt == 8 and ib_in_second_socket and PHB_cnt == 2:
        return True

HFAI_NCCL_OPT_CONFIG = { # 所有参数均为str或者boolean类型
    "GDR" : set_nccl_GPU_direct_rdma, # True/False 强制开启GPU Direct RDMA
    "GDR_LEVEL": None, # 暂不支持
    "GDR_READ": None,  # 暂不支持
    "GRAPH_OPT": set_nccl_graph_topo_opt,    # 自定义GRAPH,可选参数 GRAPH_OPT_CUSTOM_FILE 输入自定义GRAPH文件的路径
    "MIN_NCHANNELS": set_nccl_min_nchannels, # 设置MIN_NCHANNELS 大于等于'1'的str
    "MAX_NCHANNELS": set_nccl_max_nchannels, # 设置MAX_NCHANNELS 大于等于'1'的str
    "NCCL_ALGO": set_nccl_algorithm,         # 设置NCCL通讯拓扑算法 Ring Tree
    "NCCL_PROTO": set_nccl_proto,            # 设置NCCL通讯协议 LL/LL128/Simple 低延迟/128Byte低延迟/常规
}

[docs]def set_nccl_opt_level(OPT_LEVEL=0, CUSTOM_CONFIG=None): """ 设置萤火2号集群NCCL优化等级和自定义配置 Args: OPT_LEVEL (int): ``HFAI_NCCL_OPT_LEVEL`` 定义的枚举类型,有 ``DISABLED`` , ``AUTO`` , ``FULL`` , ``COVER_AUTO`` , ``CUSTOM`` 5种 CUSTOM_CONFIG (dict, optional): [可选] 自定义优化配置 ``OPT_LEVEL`` 表示优化等级,有 ``DISABLED`` , ``AUTO`` , ``FULL`` , ``COVER_AUTO`` , ``CUSTOM`` 5种,使用 ``HFAI_NCCL_OPT_LEVEL`` 类定义好的属性即可: .. code-block:: python class HFAI_NCCL_OPT_LEVEL(object): DISABLED = 0 # 代表无优化,默认情况 AUTO = 1 # 自动选择 NCCL 优化,保守策略,会根据节点信息自动选择可以用优化参数 FULL = 3 # 自动优化,激进策略,开启全部最佳优化,已知跟 sub group 有冲突 COVER_AUTO = 10 # 在 AUTO 的基础上,用户自定义某些选项 CUSTOM = 101 # 用户自行选择优化组合,通过第二个参数 CUSTOM_CONFIG 传入具体优化选项 ``CUSTOM_CONFIG`` 可选参数,代表传入具体优化选项,一个自定义优化参数配置(python Dict类型)示例如下: .. code-block:: python CUSTOM_CONFIG={ 'GRAPH_OPT': 'path/to/your/graph.txt', # 自定义GRAPH,可选参数 GRAPH_OPT_CUSTOM_FILE 输入自定义GRAPH文件的路径 'NCCL_ALGO': 'Ring', # 设置NCCL通讯拓扑算法 Ring/Tree 'NCCL_PROTO': 'Simple', # 设置NCCL通讯协议 LL/LL128/Simple 低延迟/128Byte低延迟/常规 'GDR': True, # True/False 强制开启GPU Direct RDMA 'MIN_NCHANNELS': '1', # 设置MIN_NCHANNELS 大于等于'1'的str 'MAX_NCHANNELS': '1', # 设置MAX_NCHANNELS 大于等于'1'的str } 一般情况下,建议使用 ``AUTO`` 或者 ``FULL`` 等级优化。目前已知 ``FULL`` 等级优化跟sub group冲突,使用时请注意。 Examples: .. code-block:: python import hfai hfai.distributed.set_nccl_opt_level(hfai.distributed.HFAI_NCCL_OPT_LEVEL.AUTO) # 接正常代码... """ # 无任何优化 if OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.DISABLED: return # AUTO 优化选项,检测到是萤火2号DL节点,会开启全部优化 # check GPU compatibility if OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and check_hardware_compatibility(): # print('NCCL_OPT_ON') set_nccl_GPU_direct_rdma(True) # set_nccl_graph_topo_opt() # 移动到优化等级FULL set_nccl_algorithm() # set_nccl_proto() set_nccl_min_nchannels() set_nccl_max_nchannels() os.environ['HFAI_NCCL_OPT_LEVEL'] = 'AUTO' if OPT_LEVEL >= HFAI_NCCL_OPT_LEVEL.FULL and OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and check_hardware_compatibility(): set_nccl_graph_topo_opt() os.environ['HFAI_NCCL_OPT_LEVEL'] = 'FULL' # 自定义配置,开启AUTO之后也会覆盖 if OPT_LEVEL >= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.CUSTOM and isinstance(CUSTOM_CONFIG, dict): os.environ['HFAI_NCCL_OPT_LEVEL'] = 'CUSTOM' for item in CUSTOM_CONFIG: if item in HFAI_NCCL_OPT_CONFIG and HFAI_NCCL_OPT_CONFIG[item] is not None: HFAI_NCCL_OPT_CONFIG[item](CUSTOM_CONFIG[item]) # else: # print('[WARN]: unsupport CUSTOM_CONFIG params:', item, CUSTOM_CONFIG[item]) return