Source code for hfai.distributed.nccl_opt_control
import os
import subprocess
# 环境变量 HFAI_NCCL_OPT_LEVEL 用于标识优化选项设置
class HFAI_NCCL_OPT_LEVEL(object):
DISABLED = 0 # 禁用一切NCCL优化选项
AUTO = 1 # 自动选择NCCL优化:保守
FULL = 3 # 自动选择NCCL优化:激进,尽量打开所有能启用的优化
COVER_AUTO = 10 # 在AUTO的基础上,用户自定义某些选项
CUSTOM = 101 # 预留接口,用户自行选择优化组合
# OPT GRAPH config
def set_nccl_graph_topo_opt(custom_graph_file=None):
defualt_custom_gaph_file = "graph1.txt"
if isinstance(custom_graph_file, str) and len(custom_graph_file)>0:
graph_file = custom_graph_file
else:
graph_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), defualt_custom_gaph_file)
os.environ['NCCL_GRAPH_FILE'] = graph_file
def set_nccl_min_nchannels(param=None):
param = param if isinstance(param, str) else '1'
os.environ['NCCL_MIN_NCHANNELS'] = param
def set_nccl_max_nchannels(param=None):
param = param if isinstance(param, str) else '1'
os.environ['NCCL_MAX_NCHANNELS'] = param
def set_nccl_algorithm(param=None):
param = param if isinstance(param, str) else 'Ring'
os.environ['NCCL_ALGO'] = param
def set_nccl_proto(param=None):
param = param if isinstance(param, str) else 'Simple'
os.environ['NCCL_PROTO'] = param
# GPU Direct RDMA
def set_nccl_GPU_direct_rdma(flag=True):
if not flag:
# 禁用 GDR_READ
os.environ['NCCL_NET_GDR_READ'] = '0'
else:
os.environ['NCCL_NET_GDR_LEVEL'] = 'SYS'
os.environ['NCCL_NET_GDR_READ'] = '1'
def check_hardware_compatibility():
# TOPO 检查CPU型号
gpu_cnt = 0
ib_cnt = 0
ib_in_second_socket = False
PHB_cnt = 0
try:
topo = subprocess.check_output(['nvidia-smi', 'topo', '-mp']).decode('utf-8')
except:
return False
ls = topo.split('\n')
ls = ls[1:ls.index('Legend:') - 1]
for item in ls:
if item and item[0] and item.startswith('GPU'):
gpu_cnt += 1
if item and item[0] and item.startswith('mlx5'):
ib_cnt += 1
ib_temp = item.split('\t')
sys_cnt = 0
for legend in ib_temp:
if legend.strip() == 'SYS':
sys_cnt += 1 # 统计不再同一个NUMA NODE的GPU数量
else:
if legend.strip() == 'X':
if sys_cnt >= 4: # 仅在NPS1情况有效
ib_in_second_socket = True
break
if gpu_cnt == 6 or gpu_cnt ==7:
# GPU5 GPU6 expect in same PHB
if item.find('PHB') >= 0:
PHB_cnt += 1
# 开启条件 1IB卡 8GPU ib在socket1 GPU5/6在同一个PHB
if ib_cnt == 1 and gpu_cnt == 8 and ib_in_second_socket and PHB_cnt == 2:
return True
HFAI_NCCL_OPT_CONFIG = { # 所有参数均为str或者boolean类型
"GDR" : set_nccl_GPU_direct_rdma, # True/False 强制开启GPU Direct RDMA
"GDR_LEVEL": None, # 暂不支持
"GDR_READ": None, # 暂不支持
"GRAPH_OPT": set_nccl_graph_topo_opt, # 自定义GRAPH,可选参数 GRAPH_OPT_CUSTOM_FILE 输入自定义GRAPH文件的路径
"MIN_NCHANNELS": set_nccl_min_nchannels, # 设置MIN_NCHANNELS 大于等于'1'的str
"MAX_NCHANNELS": set_nccl_max_nchannels, # 设置MAX_NCHANNELS 大于等于'1'的str
"NCCL_ALGO": set_nccl_algorithm, # 设置NCCL通讯拓扑算法 Ring Tree
"NCCL_PROTO": set_nccl_proto, # 设置NCCL通讯协议 LL/LL128/Simple 低延迟/128Byte低延迟/常规
}
[docs]def set_nccl_opt_level(OPT_LEVEL=0, CUSTOM_CONFIG=None):
"""
设置萤火2号集群NCCL优化等级和自定义配置
Args:
OPT_LEVEL (int): ``HFAI_NCCL_OPT_LEVEL`` 定义的枚举类型,有 ``DISABLED`` , ``AUTO`` , ``FULL`` , ``COVER_AUTO`` , ``CUSTOM`` 5种
CUSTOM_CONFIG (dict, optional): [可选] 自定义优化配置
``OPT_LEVEL`` 表示优化等级,有 ``DISABLED`` , ``AUTO`` , ``FULL`` , ``COVER_AUTO`` , ``CUSTOM`` 5种,使用 ``HFAI_NCCL_OPT_LEVEL`` 类定义好的属性即可:
.. code-block:: python
class HFAI_NCCL_OPT_LEVEL(object):
DISABLED = 0 # 代表无优化,默认情况
AUTO = 1 # 自动选择 NCCL 优化,保守策略,会根据节点信息自动选择可以用优化参数
FULL = 3 # 自动优化,激进策略,开启全部最佳优化,已知跟 sub group 有冲突
COVER_AUTO = 10 # 在 AUTO 的基础上,用户自定义某些选项
CUSTOM = 101 # 用户自行选择优化组合,通过第二个参数 CUSTOM_CONFIG 传入具体优化选项
``CUSTOM_CONFIG`` 可选参数,代表传入具体优化选项,一个自定义优化参数配置(python Dict类型)示例如下:
.. code-block:: python
CUSTOM_CONFIG={
'GRAPH_OPT': 'path/to/your/graph.txt', # 自定义GRAPH,可选参数 GRAPH_OPT_CUSTOM_FILE 输入自定义GRAPH文件的路径
'NCCL_ALGO': 'Ring', # 设置NCCL通讯拓扑算法 Ring/Tree
'NCCL_PROTO': 'Simple', # 设置NCCL通讯协议 LL/LL128/Simple 低延迟/128Byte低延迟/常规
'GDR': True, # True/False 强制开启GPU Direct RDMA
'MIN_NCHANNELS': '1', # 设置MIN_NCHANNELS 大于等于'1'的str
'MAX_NCHANNELS': '1', # 设置MAX_NCHANNELS 大于等于'1'的str
}
一般情况下,建议使用 ``AUTO`` 或者 ``FULL`` 等级优化。目前已知 ``FULL`` 等级优化跟sub group冲突,使用时请注意。
Examples:
.. code-block:: python
import hfai
hfai.distributed.set_nccl_opt_level(hfai.distributed.HFAI_NCCL_OPT_LEVEL.AUTO)
# 接正常代码...
"""
# 无任何优化
if OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.DISABLED:
os.environ['HFAI_NCCL_OPT_LEVEL'] = 'DISABLED'
os.environ.pop('NCCL_GRAPH_FILE', None)
os.environ.pop('NCCL_MIN_NCHANNELS', None)
os.environ.pop('NCCL_MAX_NCHANNELS', None)
os.environ.pop('NCCL_ALGO', None)
os.environ.pop('NCCL_PROTO', None)
os.environ.pop('NCCL_NET_GDR_LEVEL', None)
os.environ.pop('NCCL_NET_GDR_READ', None)
return
# AUTO 优化选项,检测到是萤火2号DL节点,会开启全部优化
# check GPU compatibility
if OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and check_hardware_compatibility():
# print('NCCL_OPT_ON')
set_nccl_GPU_direct_rdma(True)
# set_nccl_graph_topo_opt() # 移动到优化等级FULL
set_nccl_algorithm()
# set_nccl_proto()
set_nccl_min_nchannels()
set_nccl_max_nchannels()
os.environ['HFAI_NCCL_OPT_LEVEL'] = 'AUTO'
if OPT_LEVEL >= HFAI_NCCL_OPT_LEVEL.FULL and OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and check_hardware_compatibility():
set_nccl_graph_topo_opt()
os.environ['HFAI_NCCL_OPT_LEVEL'] = 'FULL'
# 自定义配置,开启AUTO之后也会覆盖
if OPT_LEVEL >= HFAI_NCCL_OPT_LEVEL.COVER_AUTO and OPT_LEVEL <= HFAI_NCCL_OPT_LEVEL.CUSTOM and isinstance(CUSTOM_CONFIG, dict):
os.environ['HFAI_NCCL_OPT_LEVEL'] = 'CUSTOM'
for item in CUSTOM_CONFIG:
if item in HFAI_NCCL_OPT_CONFIG and HFAI_NCCL_OPT_CONFIG[item] is not None:
HFAI_NCCL_OPT_CONFIG[item](CUSTOM_CONFIG[item])
# else:
# print('[WARN]: unsupport CUSTOM_CONFIG params:', item, CUSTOM_CONFIG[item])
return
def get_recommend_opt_level():
if check_hardware_compatibility():
return HFAI_NCCL_OPT_LEVEL.AUTO
return HFAI_NCCL_OPT_LEVEL.DISABLED
def get_user_role():
return os.environ.get("MARSV2_USER_ROLE", '')
def get_nccl_opt_level():
return os.environ.get("HFAI_NCCL_OPT_LEVEL", '')
def is_external_user():
return get_user_role() == 'external'
def set_nccl_user_default_values():
if get_nccl_opt_level():
return
if is_external_user():
recommend_opt_level = get_recommend_opt_level()
if recommend_opt_level != HFAI_NCCL_OPT_LEVEL.DISABLED:
set_nccl_opt_level(recommend_opt_level)