Shortcuts

Source code for sentarget.datasets.nonlpl

r"""
``NoNLPL`` is a dataset instance used to load pre-trained embeddings.
"""

import os
from torchtext.vocab import Vectors

from ._utils import download_from_url, extract_to_dir


[docs]class NoNLPL(Vectors): r"""The Norwegian Bokmal NLPL dataset contains more than 1,000,000 pre-trained word embeddings from the norwegian language. Examples:: >>> vectors = NoNLPL.load() """ urls = ['http://vectors.nlpl.eu/repository/20/58.zip'] name = '58' dirname = 'nlpl-vectors' def __init__(self, filepath): super().__init__(filepath)
[docs] @classmethod def load(cls, data='model.txt', root='.vector_cache'): r"""Load pre-trained word embeddings. Args: data (sting): string of the data containing the pre-trained word embeddings. root (string): root folder where vectors are saved. Returns: NoNLPL: loaded dataset. """ path = os.path.join(root, cls.dirname, cls.name) # Maybe download if not os.path.isdir(path): path = cls.download(root) filepath = os.path.join(path, data) return NoNLPL(filepath)
[docs] @classmethod def download(cls, root): r"""Download and unzip a web archive (.zip, .gz, or .tgz). Args: root (str): Folder to download data to. Returns: string: Path to extracted dataset. """ path_dirname = os.path.join(root, cls.dirname) path_name = os.path.join(path_dirname, cls.name) if not os.path.isdir(path_dirname): for url in cls.urls: filename = os.path.basename(url) zpath = os.path.join(path_dirname, filename) if not os.path.isfile(zpath): if not os.path.exists(os.path.dirname(zpath)): os.makedirs(os.path.dirname(zpath)) print(f'Download {filename} from {url} to {zpath}') download_from_url(url, zpath) extract_to_dir(zpath, path_name) return path_name

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Check the GitHub page and contribute to the project

View GitHub