Source code for hfai.datasets.ljspeech
from typing import Callable, Optional
import pickle
from ffrecord import FileReader
from .base import (
BaseDataset,
get_data_dir,
register_dataset,
)
"""
Expected file organization:
[data_dir]
LJSpeech.ffr
"""
[docs]@register_dataset
class LJSpeech(BaseDataset):
"""
这是一个用于语音识别的数据集
该数据集由 13100 个来自 7 部非小说类书籍的单个说话者阅读段落的短音频片段组成。更多信息参考:https://keithito.com/LJ-Speech-Dataset
Args:
transform (Callable): transform 函数,对样本进行 transfrom,接受一条样本作为输入,输出 transform 之后的样本
check_data (bool): 是否对每一条样本检验校验和(默认为 ``True``)
miniset (bool): 是否使用 mini 集合(默认为 ``False``)
Returns:
wav_id, sample_rate, wavefrom, text, norm_text (str, int, np.ndarray, str, str): 返回的每条样本是一个五元组,包括原始 wav 文件的 id,采样率,wav 文件的数据,音频对应的文本,归一化后的文本
Examples:
.. code-block:: python
from hfai.datasets import LJSpeech
def transform(wav_id, sample_rate, wavefrom, text, norm_text):
...
dataset = LJSpeech(transform)
loader = dataset.loader(batch_size=64, num_workers=4)
for wav_id, sample_rate, wavefrom, text, norm_text in loader:
# training model
"""
def __init__(
self,
transform: Optional[Callable] = None,
check_data: bool = True,
miniset: bool = False
) -> None:
super(LJSpeech, self).__init__()
self.split = None
data_dir = get_data_dir()
if miniset:
data_dir = data_dir / "mini"
self.fname = data_dir / "LJSpeech" / "LJSpeech.ffr"
self.reader = FileReader(self.fname, check_data)
self.transform = transform
def __len__(self):
return self.reader.n
def __getitem__(self, indices):
bytes_ = self.reader.read(indices)
samples = [pickle.loads(x) for x in bytes_]
transformed_samples = []
for s in samples:
if self.transform:
s = self.transform(s)
transformed_samples.append(s)
return transformed_samples