dataverse

View on PyPIReverse Dependencies (0)

1.0.5 dataverse-1.0.5-py3-none-any.whl

Wheel Details

Project: dataverse
Version: 1.0.5
Filename: dataverse-1.0.5-py3-none-any.whl
Download: [link]
Size: 88001
MD5: 487d8a0b53c29895db8255e3c9d74bcc
SHA256: 602fcd1b6612713726a49b0407db83f92c901b76c286b7ce6d74e7d08285b26a
Uploaded: 2024-04-04 08:23:38 +0000

dist-info

METADATA

Metadata-Version: 2.1
Name: dataverse
Version: 1.0.5
Summary: An open-source simplifies ETL workflow with Python based on Spark
Author: Dataverse Team
Author-Email: dataverse[at]upstage.ai
License: Apache License 2.0
Requires-Dist: requests
Requires-Dist: numpy
Requires-Dist: pandas
Requires-Dist: fasttext-wheel
Requires-Dist: omegaconf
Requires-Dist: pyarrow (==14.0.1)
Requires-Dist: datasets
Requires-Dist: pyspark
Requires-Dist: scipy
Requires-Dist: trafilatura
Requires-Dist: html2text
Requires-Dist: faker
Requires-Dist: awscli
Requires-Dist: boto3
Requires-Dist: pre-commit (==3.6.0)
Requires-Dist: botocore
Requires-Dist: rsa
Requires-Dist: s3transfer
Requires-Dist: isort
Requires-Dist: pytest
License-File: LICENSE
[No description]

WHEEL

Wheel-Version: 1.0
Generator: bdist_wheel (0.41.2)
Root-Is-Purelib: true
Tag: py3-none-any

RECORD

Path Digest Size
dataverse/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/api/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/api/cli.py sha256=XC2d2PBjlCvVDjwL16IZeO5b4KfbcsVh5GecpQ36u-o 413
dataverse/api/emr.py sha256=yclJdmwbEbkQcSRbbw8wTiPg8QBEQdf3CSETrluCrg8 1350
dataverse/config/__init__.py sha256=pMSFHIzM8VhlUucLswiBhSGm9efPbOA9yqP0XigP9U8 30
dataverse/config/interface.py sha256=neC7ALn-6piNV3a_rp1NOwdoHgZjYwJbpVbZOJUbKdg 8299
dataverse/etl/__init__.py sha256=pST076vRaTjGNG8-i7UHeBOLsKnbbxC1awplHaNyEbY 133
dataverse/etl/pipeline.py sha256=noz1uBduhjMYm5UvzpeYppEujbaDuK7RW3Y4_96zQGc 15812
dataverse/etl/registry.py sha256=7nM0FaMGy-MZ7gp3RF-0EOVYYzrHYI4cgPfVIGNkcec 14324
dataverse/etl/__sample/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/__sample/ducky.py sha256=mAYV3iSjKJyMCZzKXkm7BTTcsKzH3BVB_fH3H64R1mA 374
dataverse/etl/__sample/github.py sha256=NvZbBE8IMz7a0UD-vwSCZUYOaqBIgPQ8bTxF6VfC9Ew 1437
dataverse/etl/bias/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/cleaning/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/cleaning/char.py sha256=OhWPNXnmIS51_0CKBZ02PJIGf7fVRKkBHmd39VU-aWM 3759
dataverse/etl/cleaning/document.py sha256=yqQhV_J6uTV5keBAEyy4JM-hyMCBl-XnKrevIIRC538 2656
dataverse/etl/cleaning/html.py sha256=JyiH6oJ0EsoP-aHWK70qe4Y8s4bEvfxPd1HksaA5kGI 2421
dataverse/etl/cleaning/korean.py sha256=PJAbG91rW5cbs_27LAuI153K7Ed29P4GoA_RXtKmk8c 9955
dataverse/etl/cleaning/length.py sha256=kxC5a75vT7I5V617GciFRvTPUwzxqYsA_eGB7MYs4x4 2998
dataverse/etl/cleaning/number.py sha256=JinQd02ZDWf4y682Vk6LW9NbF-AtViUdIr_CpFaDrzw 1816
dataverse/etl/cleaning/table.py sha256=IoTDmQPTxdxCiMb3wbFgyTgcQX_OdgGOUcLYBAmNTJE 2318
dataverse/etl/cleaning/unicode.py sha256=JOJiPVIS21thR1jbKbbTH6MAsefp12smmO0s2fIc3LQ 3415
dataverse/etl/data_ingestion/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/data_ingestion/arrow.py sha256=lsf-u3We4QNiPBOTlLOKpI5eOV9Gf8_0ECL601_OLMg 5615
dataverse/etl/data_ingestion/common_crawl.py sha256=yOKOu4P4wkBlAbO1ZERa9P4Zw-B756F0lQMWoA6PnYA 15117
dataverse/etl/data_ingestion/csv.py sha256=6raFal_g5p1rSyQ716d-Yv7toXzf4XhZ39espkXyEmw 1051
dataverse/etl/data_ingestion/cultura_x.py sha256=6XtvPXsTFbSfQ1FcDqDfPgdDnjm346HcKAIwFUwRBKw 991
dataverse/etl/data_ingestion/huggingface.py sha256=zBtSOImKcOuGZ7dpO3nvicgNbe15cMN6O5QWAiOs1yg 1590
dataverse/etl/data_ingestion/parquet.py sha256=S-HrXf82WvnsZ5glKq_JczgC2AM45jQN6hV37siGj3k 836
dataverse/etl/data_ingestion/red_pajama.py sha256=ItiyFWoVvNrDDeGCxm-WoAwn3yfkRuNu-8t38FrTMzA 3535
dataverse/etl/data_ingestion/slim_pajama.py sha256=ERru6Vbc81ZlZSqaQm5j4_R1oNGcYt7AcecqLsarG8I 1617
dataverse/etl/data_ingestion/test.py sha256=vrI-IQqfFZu7fuD7dvrMSfUg2EFJc-E5bjKz414eP4o 1506
dataverse/etl/data_load/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/data_load/aws.py sha256=rzr-T4P2MCM6hVnE7muHGrQZsEl_UcuKH5_ohyzvFvc 140
dataverse/etl/data_load/huggingface.py sha256=ChQ_JWQNQqcvya6TkKsNy1uyD0q6BVZ4umD9l2hSmRM 2525
dataverse/etl/data_load/parquet.py sha256=KdER_47ZXw-El0pI1zny0H0RDeCnc4FGiUK8nhjtLtw 1250
dataverse/etl/data_save/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/data_save/aws.py sha256=hDt17oP9enctTjbazt_UUDqtmCErxJVbgNvKpr7ZsmE 139
dataverse/etl/data_save/huggingface.py sha256=xSc79-fiqf8NOW1cV-A2RbtsT8gaG80tfrsgW0jNETQ 2524
dataverse/etl/data_save/parquet.py sha256=lGi3YRi_m_36-MvcaOL6y27eUPvFIn_O_-DA6xu1c8M 1249
dataverse/etl/decontamination/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/deduplication/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/deduplication/common_crawl.py sha256=n7cuYKaEbXICx2YzlO6P-65DHIxyZhpxZwfWxkaaCeI 2586
dataverse/etl/deduplication/exact.py sha256=vADsMnuDumubN0LjGMkJv81Z3MBJTOZRZK--qudrYVg 903
dataverse/etl/deduplication/minhash.py sha256=B3mqK4OMO9SL_a1tSf5puVokG-XqCQDGRtDLgmd0Pps 14139
dataverse/etl/deduplication/polyglot.py sha256=rLBfpSTvB_OlbN_MonoS8VuZ1AY_-nVWwKOR2bBlYuk 5331
dataverse/etl/pii/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/pii/card.py sha256=dWgXsxnUwiMFkG57jP5WqaeV-hE_tWKAsD-n7fmK82I 3099
dataverse/etl/pii/nin.py sha256=7wDlMgNhw5WkkcO6dzZZWXRywuNe0rvTLPkXXPoI1vI 3392
dataverse/etl/quality/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/quality/language.py sha256=SKA9kTtmKi3c2jULZlAwpVEJ8QZ5Vx0nPax8AxYDxKw 6078
dataverse/etl/toxicity/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/utils/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/etl/utils/log.py sha256=hIU_CWxJgrSSWmy1oPbOMxPWsfyy1u0aGBfK2M2jIZg 890
dataverse/etl/utils/sampling.py sha256=mSexR5eNzDzeCKjA7F5lbdtVVJC2TdpoRTNzfanWWK8 1375
dataverse/etl/utils/statistics.py sha256=Ziyhk7cpiCy7ck1G25cQOIf8XNV_3B6S-nfzxtZdB9M 2185
dataverse/lab/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/utils/__init__.py sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU 0
dataverse/utils/analyze/__init__.py sha256=hAS7XGdorLRrvX9PbScsfabwbtaIYBNq96qCibEsnfo 86
dataverse/utils/analyze/pip.py sha256=R4iN3H97qJPSQSTZhpOuJeZmBhNoOYjWchnidWltwvQ 296
dataverse/utils/analyze/python.py sha256=nchHNEbl-Iqm23VhICYLUnJJDUxCp6FjNTOP2HiPv1w 1036
dataverse/utils/api/__init__.py sha256=pgAJdND7egHR1gWshxXPqJgNjq5w0j5-Rk5Sjp-j4Ws 1603
dataverse/utils/api/aws.py sha256=e9SEtzfvMtuu3AnOVty3c5wGJlqbAlAWTrTcI0eK7nQ 68144
dataverse/utils/format/__init__.py sha256=kPoH1GQBG_nvIHtxtjRVqQJWt2x40W5OEcbQq22R6pU 151
dataverse/utils/format/huggingface.py sha256=sqfXx_6cM0OHSD0Ldx0j0I0uRWY1bSCPLfTsBBavopg 3319
dataverse/utils/format/ufl.py sha256=fIBYyV5Rue-kkBHLSO8h4rRhi01G_B7jPU0HZpZb5_s 144
dataverse/utils/setting/__init__.py sha256=v7-MJJgJgK3c9IZ6V-TfJKjjrMffxhpKKjmwzIWgAQ0 110
dataverse/utils/setting/system.py sha256=jhHNPS-m2ugAy7mSoyickmYD0t7mqTb1Cxj3DOg_XUY 7283
dataverse/utils/setting/user.py sha256=TJfVJbAgeViMxvNgH-1Xt-5fRCmxRzLWLe1MQRnprjI 6019
dataverse-1.0.5.dist-info/LICENSE sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ 11357
dataverse-1.0.5.dist-info/METADATA sha256=pY6joUmq3WunZZ_AimuZta6V3LHfstTWT3vDtCJyOsw 721
dataverse-1.0.5.dist-info/WHEEL sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A 92
dataverse-1.0.5.dist-info/entry_points.txt sha256=2JG3fmkHIRftHHC4pKRQb9TT0cl8jKkjwzw-6I03hqw 53
dataverse-1.0.5.dist-info/top_level.txt sha256=DQthQD-a7flZ1h_SzQ_2X0ZD9T4hl_K8UW43H1y_t6k 10
dataverse-1.0.5.dist-info/RECORD

top_level.txt

dataverse

entry_points.txt

dataverse = dataverse.api.cli:main