Metadata-Version: 2.0
Name: retexto
Version: 1.0.1
Summary: Compact interface for Assisted Machine Learning
Home-page: UNKNOWN
Author: Edgar Castañeda
Author-email: edaniel15@gmail.com
License: UNKNOWN
Platform: linux
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.6
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Environment :: Console
Classifier: Operating System :: OS Independent
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Topic :: Text Editors :: Text Processing
Classifier: Topic :: Text Editors :: Word Processors
Requires-Dist: unidecode

# reTexto
Fast text processing for python

### Run

    cd /[project_path]
    docker build -t retext .
    docker run -v $(pwd):/retext:rw -it retext bash

### Test

    invoke test

### Work in

    docker run -v $(pwd):/jiazz:rw -it jiazz bash

## Basic Use

    if __name__ == '__main__':
        s = '@Edux87, i need this www.google.com | https://github.com <br> \
            <strong>UserName: çarlos </strong> \
            i\'m from Perú 😛 \
            #Friends #Text jajajajaja so fffunny  \
            loooveee thiiis 😌😎 \
            @florenciaflor19 Si!!! sé vo… 🐷JUANA🐷 \
            smile! haha jejeje jojojo jujuju jijijijajaja 😂'

        text = ReTexto(s)
        s = text.remove_html() \
                .remove_mentions() \
                .remove_tags() \
                .remove_smiles(by='SMILING') \
                .convert_specials() \
                .convert_emoji() \
                .remove_nochars(preserve_tilde=True) \
                .remove_url() \
                .remove_duplicate(r='a-jp-z') \
                .remove_duplicate_vowels() \
                .remove_duplicate_consonants() \
                .remove_punctuation() \
                .remove_multispaces() \
                .lower() \
                .split_words(uniques=True)
        print(s)
        ['username', 'from', 'love', 'i', 'ned', 'funy', 'juana', 'vo', 'this', 'si', 'im', 'se', 'peru', 'smile', 'so', 'smiling', 'carlos']


