Shortcuts

Source code for hfai.datasets.ljspeech

from typing import Callable, Optional

import pickle
from ffrecord import FileReader
from .base import (
    BaseDataset,
    get_data_dir,
    register_dataset,
)

"""
Expected file organization:

    [data_dir]
        LJSpeech.ffr

"""


[docs]@register_dataset class LJSpeech(BaseDataset): """ 这是一个用于语音识别的数据集 该数据集由 13100 个来自 7 部非小说类书籍的单个说话者阅读段落的短音频片段组成。更多信息参考:https://keithito.com/LJ-Speech-Dataset Args: transform (Callable): transform 函数,对样本进行 transfrom,接受一条样本作为输入,输出 transform 之后的样本 check_data (bool): 是否对每一条样本检验校验和(默认为 ``True``) miniset (bool): 是否使用 mini 集合(默认为 ``False``) Returns: wav_id, sample_rate, wavefrom, text, norm_text (str, int, np.ndarray, str, str): 返回的每条样本是一个五元组,包括原始 wav 文件的 id,采样率,wav 文件的数据,音频对应的文本,归一化后的文本 Examples: .. code-block:: python from hfai.datasets import LJSpeech def transform(wav_id, sample_rate, wavefrom, text, norm_text): ... dataset = LJSpeech(transform) loader = dataset.loader(batch_size=64, num_workers=4) for wav_id, sample_rate, wavefrom, text, norm_text in loader: # training model """ def __init__( self, transform: Optional[Callable] = None, check_data: bool = True, miniset: bool = False ) -> None: super(LJSpeech, self).__init__() self.split = None data_dir = get_data_dir() if miniset: data_dir = data_dir / "mini" self.fname = data_dir / "LJSpeech" / "LJSpeech.ffr" self.reader = FileReader(self.fname, check_data) self.transform = transform def __len__(self): return self.reader.n def __getitem__(self, indices): bytes_ = self.reader.read(indices) samples = [pickle.loads(x) for x in bytes_] transformed_samples = [] for s in samples: if self.transform: s = self.transform(s) transformed_samples.append(s) return transformed_samples