opuscleaner
View on PyPI — Reverse Dependencies (0)
0.4.1 | opuscleaner-0.4.1-py3-none-any.whl |
Wheel Details
Project: | opuscleaner |
Version: | 0.4.1 |
Filename: | opuscleaner-0.4.1-py3-none-any.whl |
Download: | [link] |
Size: | 340634 |
MD5: | ad46aef37d9c783ea152d00e48f7d6f2 |
SHA256: | 7b0b76f2e73deb1293e916804cb4578cbb5d1379ab456ceb5ed24ccb872bed5a |
Uploaded: | 2024-02-19 14:29:20 +0000 |
dist-info
METADATA · WHEEL · RECORD · entry_points.txt
METADATA
WHEEL
Wheel-Version: | 1.0 |
Generator: | hatchling 1.21.1 |
Root-Is-Purelib: | true |
Tag: | py3-none-any |
RECORD
Path | Digest | Size |
---|---|---|
opuscleaner/__about__.py | sha256=Z4S0cUqfWFIxSvanoTDQh676x2hjV4pKRi-FEpPk-k4 | 18 |
opuscleaner/__init__.py | sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU | 0 |
opuscleaner/_util.py | sha256=hLgqtZ4WwElrlqByJjmnaiBb2KbEUtHPTzuWCJTmxTY | 3500 |
opuscleaner/categories.py | sha256=NnVJEtWwWi1rNer6L0pWxxrCypD9Hlq0nF_vmfKMoiY | 1707 |
opuscleaner/clean.py | sha256=Dy_mIE0_ipZ_2bmtptbpqzfU3Xf40uOnoedfJ73p-Rg | 26758 |
opuscleaner/col.py | sha256=SlzLRRgqZWUUMNVTXTjPCX5EgbW0IZYxuiyNUoeg4sY | 3163 |
opuscleaner/config.py | sha256=DrxOxIpQYnvDSsm-adl-a07_w_bkTl3O1oO1pjvm2ws | 1490 |
opuscleaner/datasets.py | sha256=O0gNy_7tszJuMvi0fU_oSQMozEVEj_gILKV-YM6iZQE | 1424 |
opuscleaner/download.py | sha256=HqYnOmd4EW9_FfSdhAMB7qnpdO-wEuARMH7DX9Efxy0 | 13710 |
opuscleaner/filters.py | sha256=FD3tqwpAzLuBt544VS194wfymXnCUWXHxEYeC0A18gM | 7901 |
opuscleaner/logging.py | sha256=CnRmBEtlPrtQI0bjKPfc0SxGJmqAV0QmA6RVHxn-OxA | 7339 |
opuscleaner/opusfilter_compat.py | sha256=pb1MjCyKmYKlvogm_Y2fVyRDmyrFdegpm1nmRAgo7Lc | 6452 |
opuscleaner/sample.py | sha256=eXv9_BdZPL4Nx32xLWV2TzRMXjOUWtmkuVCSRixCmAw | 4794 |
opuscleaner/server.py | sha256=EdTXI44YfsHXzOKqJg5s1SZhPxLtx1jz0m6BwdQ8UHk | 17712 |
opuscleaner/threshold.py | sha256=OBHTNV2IHrWCtxj7fILHsNTUrpCiMbvxPdBuSMHouAA | 8153 |
opuscleaner/filters/alpha_ratio.json | sha256=xOO9gsN0sLLuFOwHgFgPsrk0RcQHTtkVvPQSXF_Ijow | 3391 |
opuscleaner/filters/alpha_ratio.py | sha256=vN2S1JCVsku3AW74_VbhQReOvC27t88aUWiaSdvWUXo | 3981 |
opuscleaner/filters/bicleaner_hardrules.json | sha256=PLZm502DA1FGuFz9baJcwvaQFyRKcNUuVtmlsj_PMPI | 1261 |
opuscleaner/filters/bifixer.json | sha256=k5SoI1PupsJEJhDf5mZ6GPjFap6kCxrgPV_SwV8OYrY | 3714 |
opuscleaner/filters/bifixer_dedupe.py | sha256=hp6vs3pFZ7FFioSbPfjLOWWukeo9udCnX6mfoKyJ4mk | 454 |
opuscleaner/filters/clean_common.py | sha256=j2ang53GRHVRcc1760AOBnM6yT608JyK__YIvpU4ZtE | 2440 |
opuscleaner/filters/deescape-special-chars.json | sha256=aSnzlrENPRaoI4Z-94sRj1EWLsGmT7hvTgJs0KwJsug | 496 |
opuscleaner/filters/deescape-special-chars.perl | sha256=YrUz1T7J91dGJHngP3oUksnAej2f_0ps1GJR5HwUscI | 631 |
opuscleaner/filters/deescape_tsv.json | sha256=cRrF55vUpp2lbThJ2zDsxwMhhOVkGqb-WHjjgZS57xA | 190 |
opuscleaner/filters/deescape_tsv.py | sha256=Gi0kTOdv-4CyeG1vzhhIsDPg_QHYuYUtZgheQ2NZgb8 | 363 |
opuscleaner/filters/detokenizer.json | sha256=iMsyKuh0IoIAMwzjsV9pdGy7J-96gSNtK_w7a9P_d_g | 1232 |
opuscleaner/filters/detokenizer.perl | sha256=hnz-0bP9zu_moUhamYZu5ZIGXvtg1Rx0GLowQWb4liw | 12473 |
opuscleaner/filters/fasttext_filter.json | sha256=JX9U08q9ouDDWc-dr7fB4RR3mM467KVIqzoTHSG0O4c | 8692 |
opuscleaner/filters/fasttext_filter.py | sha256=rczY_wI2DJgZWxYuvLdmmobM6WwdqleVikoWFhb8Xn0 | 3173 |
opuscleaner/filters/fix_elitr_eca.json | sha256=WpUpFsA2iLEdpETVr5nNUuv5GvaQ67VCAr6FtjUL9TU | 180 |
opuscleaner/filters/fix_elitr_eca.py | sha256=X4eAKNz8bvgIuWlHfCopvWHESjpUmlMyHW86w5jExyI | 1473 |
opuscleaner/filters/fix_quotes.json | sha256=AYf3yOd8UO5gC9vTTVSO4vpF3UNeqqapYF7pYnxm9hI | 116 |
opuscleaner/filters/fix_quotes.py | sha256=xcH-Gc6cFrSWzhUMrqDOl1Ih6J8EN4camWyS5_TcNUs | 322 |
opuscleaner/filters/fix_sent_final_punct.json | sha256=z23-yiglyJ5u7E5emJtfr4CsscYONiFTFErrGguLTj4 | 299 |
opuscleaner/filters/fix_sent_final_punct.py | sha256=7l5vfoJcQXAGJeGEs7L98hQrBLbX-KgtnlNKxksuKCw | 2191 |
opuscleaner/filters/fix_un_chinese.json | sha256=6uQ9UBASDSiR6MK9KfrOTwE246skhOr4T_rvRm--6p4 | 218 |
opuscleaner/filters/fix_un_chinese.py | sha256=iBrtt0T0sBcx1zEsnryTUI7BT4K1vqijg7_qRTUi2_k | 354 |
opuscleaner/filters/fix_wiki.json | sha256=Cxud6fNmB_n1xTwZQoT-GiAYL862hXz8Q9m9TsYZHm8 | 1344 |
opuscleaner/filters/fix_wiki.py | sha256=R6zcrBjEKNLL7nRBoD1LDFYbB-qUVw7e2krDNYELxUA | 3569 |
opuscleaner/filters/langid.json | sha256=X5PQVJI3GlgWM57ujdvKQskeyXvcHD0M_ijZDIJZSWA | 689 |
opuscleaner/filters/langid.py | sha256=KYPc_2grpqs1Q439rV3iPPYP2KPOytKMbsAyVNHLiUc | 3019 |
opuscleaner/filters/laser_similarity.json | sha256=Ip8UqavUb0Ul_QpwNdMxTklfyYDlb0fgPkFAvvrXrYU | 853 |
opuscleaner/filters/laser_similarity.py | sha256=QPTHjs40ffGxKfbUsNa2X06IMbXpbKD18OpkPrI81Kc | 4769 |
opuscleaner/filters/max_length.json | sha256=lMKoVux4rsqZnX3L6BzsH9hGYVR2cE7darWXY5hamkM | 452 |
opuscleaner/filters/max_length.py | sha256=DbQuoVfwkvVSCpfuTU2GWe7-DTIBM-V2sd5I2AYd_1M | 1545 |
opuscleaner/filters/max_word_length.json | sha256=uKYYTF2hvP5_WoNF33Q1D1zlMJwg4h89Uayp-pZ2Q-8 | 446 |
opuscleaner/filters/max_word_length.py | sha256=fInOT6qosziYL4tnWO5lIRgBHed5KAi0ckjg5HK0-W4 | 869 |
opuscleaner/filters/normalize_whitespace.json | sha256=XwMP9kpaeGdqnK8qqxO2eopEHdZa1G6zCxk2S7ecf9E | 325 |
opuscleaner/filters/normalize_whitespace.py | sha256=WMuwWODLL6LgkPemnL8DYb3P_bbSLGdmnnLVswiib8A | 806 |
opuscleaner/filters/num_mismatch.json | sha256=XDwhewISTNxeEoi0aPIVWi4Bwqgc5toa6uUoUCbB0I8 | 493 |
opuscleaner/filters/num_mismatch.py | sha256=ojIpv6OygCBWD42tsyY5dGL2TrTrWOuSqYU4nFmxoIQ | 1868 |
opuscleaner/filters/regexp.json | sha256=xXySs9Jlh5a1CZMk_K6o2rQtmgVYtumtZ_66lO3AnXo | 216 |
opuscleaner/filters/remove_empty_lines.json | sha256=NsBSbv_SFCYBFPstsQm5tJhJL3cKRwhBQFGbmGaiVvQ | 176 |
opuscleaner/filters/remove_empty_lines.py | sha256=XUQeK8ce6y9rjR5B9kayeeD__UmtHZI8fPj5kJttJ0w | 341 |
opuscleaner/filters/remove_frequent_patterns.json | sha256=FwZNO5kbpny-v-sm9oVRCYKSg-r_jvOmtNsMaElsssI | 402 |
opuscleaner/filters/remove_frequent_patterns.py | sha256=2zBx3qZ88CqKXyKuT-yYDb0w-1_1s8C4aepsdwYTm50 | 2700 |
opuscleaner/filters/remove_frequent_patterns.txt | sha256=H1L0x6sbKI-TazzJW_5AP809StDh7_vDwyfMLTCmEOo | 417 |
opuscleaner/filters/sed.json | sha256=qJIA6oqGywsrW7saokrh3R6ydJAeW8jfUk_D_f8IQQw | 335 |
opuscleaner/filters/segment_chinese.json | sha256=l_yNMmSLFandBvCpiYfIgKEEv4BoM3nO3qmqOpv2CvI | 240 |
opuscleaner/filters/segment_chinese.py | sha256=XYM0ARBFNcCKphVgzElp9hPe5OSskK6uRmhDrgLcAWo | 200 |
opuscleaner/filters/segment_japanese.json | sha256=_cLMEMueM6ZrNjMhX8Qpe2h7yDtekdUhrv4lqNpvpeY | 432 |
opuscleaner/filters/segment_japanese.py | sha256=hMTkxWTXiJPZm98PjziXBfMvRtgebwpkMvOXTBL9cZ8 | 588 |
opuscleaner/filters/simplify_chinese.json | sha256=OZVuscTAsg2NGp6ASQ_EzeN6mODE2A4BpUAFypvBJak | 154 |
opuscleaner/filters/split_sentences.json | sha256=OjcJMMj15oCIdrDFrH65uUhNL9ZbskdLtyWBYAh0TrE | 576 |
opuscleaner/filters/split_sentences.py | sha256=EkRHf93Zb4UWTo9iPdwy473DdyMEe_Vk8t_REd1Jnmg | 1059 |
opuscleaner/filters/src_trg_ratio.json | sha256=41Hy5zGMq9hHtz0UHCIRtdak1E6BrP7BJKXty7WdB20 | 495 |
opuscleaner/filters/src_trg_ratio.py | sha256=rsRWlgptlcbXae0uEqAnW3JomfD0i2iqKwl5C3l6z-I | 2298 |
opuscleaner/filters/strip_suffix.json | sha256=KY7B97tYIb_wKTJbxyGaigiRct2VjY2H9hmNIfazKv4 | 774 |
opuscleaner/filters/strip_suffix.py | sha256=qHLn344hsO4OLbga_86d77Yam71T9CaQ_u5muGQrMX8 | 2271 |
opuscleaner/filters/test_num_mismatch.py | sha256=6bU03Wf3HBMz7q2y1EhyRoENZ8A9Q1le_CguEFgu5rQ | 2186 |
opuscleaner/filters/traditionalise_chinese.json | sha256=uTNwESPeljvtSB1jRrqGIo4BvyVbP-A_vErNpU_E0rM | 152 |
opuscleaner/filters/opusfilter/AlphabetRatioFilter.json | sha256=hZTweQFPfRltkuT9q568bUYLq_JwVKwl5n0G--35ffI | 469 |
opuscleaner/filters/opusfilter/AverageWordLengthFilter.json | sha256=z3cnYV_vg-3LEBFZfFW5tjQg8N_UOMKrBAeZB_sNvXw | 633 |
opuscleaner/filters/opusfilter/CharacterScoreFilter.json | sha256=rvrDYrS8y0KUg787-XzJQBS0L6BoQSjN6vYJwd5f2Do | 4296 |
opuscleaner/filters/opusfilter/Detokenizer.json | sha256=ahxhKkLh872Zk7_Pc_LXuMfe4OstmqqWSGsx1oZMfhg | 1127 |
opuscleaner/filters/opusfilter/HtmlTagFilter.json | sha256=e8qbYc4TDOUI1j3nq5l3tV67OZDdTf1OOCDje5ZMxsk | 228 |
opuscleaner/filters/opusfilter/LengthFilter.json | sha256=SevJVFdM53LFcvZJPQyUv6gCfu9bDDtXFwEKWQqvuFo | 673 |
opuscleaner/filters/opusfilter/LengthRatioFilter.json | sha256=S-ngrz5860Gk9KZV1vL4amsoJEKjeaHxRl_awxRG00g | 471 |
opuscleaner/filters/opusfilter/LongWordFilter.json | sha256=TDh_jEZAJncLxnn3NRwhwOmTcfdrfkyZoFF-4ahuzbI | 323 |
opuscleaner/filters/opusfilter/RegExpFilter.json | sha256=2ifcQteGJ310kqZ3MrXggCd65uY6sSOOE8MZR94S_ec | 1049 |
opuscleaner/filters/opusfilter/RegExpSub.json | sha256=53N3qGVQ4vfNRz0XoNpTN5p2zlN6ej22Se18SAKTt2E | 1039 |
opuscleaner/filters/opusfilter/Tokenizer.json | sha256=uZwMASJHWS6mI0coZozIpiSz0nEWY4qQuFxQqPj0_DM | 1121 |
opuscleaner/filters/opusfilter/WhitespaceNormalizer.json | sha256=bx7knXB0j95ezZ-EU370D8Jo2g7rOuGIYzm18hc3gjA | 375 |
opuscleaner/filters/opusfilter/opusfilter-ersatz.py | sha256=SbbhREz6iPInezvHquC-PzwmA4pVDXt_JQKuwjwXN8I | 1808 |
opuscleaner/frontend/index.html | sha256=jgpRHwj8hDvtaGzSlT2k8DeZLfq0aT6L2PpO70s0A24 | 402 |
opuscleaner/frontend/assets/AddDatasetView.2e3a667a.css | sha256=LjpmetBcZP7hYSCjby6_cek_XCqQDhQoLaDZX3KfIxY | 3663 |
opuscleaner/frontend/assets/AddDatasetView.7bfaf9df.js | sha256=rXPW3ZjI8uGrs984ojRPfTSIiZEGEWRxDMp4MnNitNg | 8937 |
opuscleaner/frontend/assets/EditFiltersView.a241f96f.css | sha256=okH5b4ZNFEWedWsWS6J13n-jCv7401ppAN358a3aYss | 8179 |
opuscleaner/frontend/assets/EditFiltersView.cf6459ba.js | sha256=CqrbaMu45ukfYze5ZAsDI-GW-FYvLqrfhL0bJs6e5qk | 121718 |
opuscleaner/frontend/assets/EditFiltersYamlView.2d7cd2d8.js | sha256=SXWEsIQNqtLfH054jryz-odzywCOGFh96GhOQ8DZPFs | 444 |
opuscleaner/frontend/assets/ListDatasetsView.08ea4530.css | sha256=COpFMNGWKt-aLnmQQBVv_wFTiutaOGMR_wHZ5lFPI3w | 1220 |
opuscleaner/frontend/assets/ListDatasetsView.213a13e8.js | sha256=ju53zqNPVg8FBdT-Q7WXphHAXBN8UPivVoo_lbdrwTA | 2330 |
opuscleaner/frontend/assets/TagsEditor.7106ed08.js | sha256=0c0iet9H2bg72uSOck4KwzE3RLgeHioAvCAnEgoNfMY | 6270 |
opuscleaner/frontend/assets/TagsEditor.ccc03a15.css | sha256=zMA6FSeOsxjbHkqc4EuHjICX3hCc3G_imTHiI-rZVRI | 996 |
opuscleaner/frontend/assets/data-cuate.84693c76.svg | sha256=hGk8dsFT9ol4qwhR8QA-I_HbCSpVlujM2BBhih7dP7k | 38943 |
opuscleaner/frontend/assets/datailor-logo.fbfa6008.svg | sha256=-_pgCN60AlJI95UA8O3hCMuJ8CqBRNOvqjtfN1F_3zI | 5490 |
opuscleaner/frontend/assets/eu.24cff2c1.png | sha256=JM_yweLbcVsFzdYhtwJQkB6EH4afWdsB6bswGiLeHzk | 22166 |
opuscleaner/frontend/assets/hacks.cf860d09.js | sha256=CWFmSiYCKhDfLOqg4Ch39Nc-3E42QjNLsMTrY-qdsxU | 298 |
opuscleaner/frontend/assets/horizon-europe.80625b0c.png | sha256=gGJbDBNHpwv59Y7YqZkxFsgifPeBK7BQYwJ7UuCX80U | 101750 |
opuscleaner/frontend/assets/index.6cae667b.css | sha256=bK5me3hoFvjcCvZZeNyq-GbWpQnn4lmKpj8hmpcs5Vg | 2148 |
opuscleaner/frontend/assets/index.83bb0165.js | sha256=TQxhF1z-zjiIilifn1nlQi2Mu8XcHqC6fWQyUFXeUeU | 129136 |
opuscleaner/frontend/assets/vue-select.b0fac2a1.css | sha256=sPrCoW4W5qxSFUFyi24DqF4F_2ivs71AiB06KZJ6uCY | 7480 |
opuscleaner/frontend/assets/vue-select.f68229d7.js | sha256=TGih1Mt50Xy0lvJILZSxZj7TLraM4cfcaKzAMT7rWUs | 17440 |
opuscleaner-0.4.1.dist-info/METADATA | sha256=6Hl5bdN4Si8QBZCZCO4i00L6dzsitC3HwvcpNUS0swc | 6929 |
opuscleaner-0.4.1.dist-info/WHEEL | sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc | 87 |
opuscleaner-0.4.1.dist-info/entry_points.txt | sha256=62PZEB67IYuokSQn3Ss0clKiDJJgPkLUXArEjuSYcfA | 290 |
opuscleaner-0.4.1.dist-info/RECORD | — | — |
entry_points.txt
opuscleaner-clean = opuscleaner.clean:main
opuscleaner-col = opuscleaner.col:main
opuscleaner-download = opuscleaner.download:main
opuscleaner-sample = opuscleaner.sample:main
opuscleaner-server = opuscleaner.server:main
opuscleaner-threshold = opuscleaner.threshold:main