Metadata-Version: 2.4
Name: minimind
Version: 0.1.2
Summary: MiniMind: Lightweight and flexible AI generation library
Home-page: https://github.com/INSECT5386/MiniMind
Author: 신유찬
Author-email: your.email@example.com
Classifier: Programming Language :: Python :: 3
Classifier: Operating System :: OS Independent
Classifier: License :: OSI Approved :: MIT License
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
Requires-Python: >=3.8
Description-Content-Type: text/markdown
Requires-Dist: numpy>=1.21
Requires-Dist: scikit-learn>=1.0
Requires-Dist: autograd>=1.3
Requires-Dist: joblib>=1.2
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: description-content-type
Dynamic: home-page
Dynamic: requires-dist
Dynamic: requires-python
Dynamic: summary

MiniMind Example Code

```GPM
from minimind import GPMGenerator, Sampler, SimpleTokenizer

def main():
    print("MiniMind GPMGenerator 테스트 시작!")

    # 샘플 데이터
    pairs = [
        ("안녕하세요", "안녕하세요 반갑습니다"),
        ("오늘 날씨 어때?", "오늘은 맑고 따뜻해요"),
        ("뭐 먹을래?", "저는 김치찌개 좋아해요"),
    ]

    # 샘플러 생성 (top-k 예시)
    sampler = Sampler(method='top_k', k=3)
    tokenizer = SimpleTokenizer()

    # 생성기 초기화 시 sampler 연결
    gpm = GPMGenerator(sampler=sampler, tokenizer=tokenizer)
    gpm.fit(pairs)

    # 생성 테스트
    prompt = "안녕하세요"
    response = gpm.chat(prompt, max_tokens=10)

    print("입력 프롬프트:", prompt)
    print("생성된 텍스트:", response)

if __name__ == "__main__":
    main()
```

```SAP


from minimind import SAPGenerator
from minimind import SimpleTokenizer
def main():
    print("MiniMind SAPGenerator 테스트 시작!")

    # 간단한 데이터 샘플 (입력-출력 쌍)
    pairs = [
        ("안녕하세요", "안녕하세요"),
        ("오늘 날씨 어때?", "날씨가 좋아요"),
        ("밥 먹었어?", "네, 잘 먹었어요"),
        ("영화 볼래?", "좋아요 같이 보자"),
        ("잘 자요", "안녕히 주무세요"),
    ]

    # SAPGenerator 인스턴스 생성 및 학습
    tokenizer = SimpleTokenizer()
    sap_gen = SAPGenerator(tokenizer=tokenizer)
    sap_gen.fit(pairs)

    # 생성 테스트
    prompt = "오늘"
    print(f"입력: {prompt}")
    generated = sap_gen.chat(prompt, max_tokens=10)
    print(f"생성 결과: {generated}")

if __name__ == "__main__":
    main()
```

```Neural


def main():
    print("MiniMind 패키지 실행 - 테스트 시작!")
    
    # 여기서 간단히 NeuralGenerator 테스트 예시 실행
    from minimind import NeuralGenerator
    from minimind import Sampler

    sampler = Sampler(method='temperature', temperature=0.8)
    
    # 더미 데이터 (토큰 인덱스 배열) 예시
    import numpy as np
    vocab_size = 100
    X_dummy = np.random.randint(0, vocab_size-1, size=(50, 10))  # 50샘플, 길이10 시퀀스
    y_dummy = np.zeros((50, vocab_size))
    for i in range(50):
        y_dummy[i, np.random.randint(0, vocab_size)] = 1.0  # 랜덤 원핫 출력
    
    ng = NeuralGenerator(vocab_size=vocab_size, epochs=3, verbose=True, sampler=sampler)
    ng.fit(X_dummy, y_dummy)
    
    prompt = np.array([1, 2, 3])  # 시작 토큰 시퀀스 예시
    generated_seq = ng.generate(prompt, max_tokens=10)
    print("생성된 시퀀스:", generated_seq)

if __name__ == "__main__":
    main()
```

```Text_sampling
# test_sampling.py

import numpy as np
from minimind import top_k_sampling, top_p_sampling, temperature_sampling, Sampler

def dummy_probs(size=100):
    probs = np.random.rand(size)
    return probs / probs.sum()

def test_sampling_functions():
    probs = dummy_probs()

    print("top_k_sampling:", top_k_sampling(probs, k=5))
    print("top_p_sampling:", top_p_sampling(probs, p=0.8))
    print("temperature_sampling (temp=0.5):", temperature_sampling(probs, temperature=0.5))
    print("temperature_sampling (temp=2.0):", temperature_sampling(probs, temperature=2.0))

def test_sampler_class():
    probs = dummy_probs()
    sampler = Sampler(method='top_p', p=0.9)
    print("Sampler top_p:", sampler.sample(probs))

    sampler.method = 'top_k'
    sampler.k = 3
    print("Sampler top_k:", sampler.sample(probs))

    sampler.method = 'temperature'
    sampler.temperature = 0.7
    print("Sampler temperature:", sampler.sample(probs))

if __name__ == "__main__":
    test_sampling_functions()
    test_sampler_class()
```

```Tokenizer
from minimind import SimpleTokenizer

tokenizer = SimpleTokenizer()

text = "Hello, 안녕하세요! Let's test the tokenizer 123."
tokens = tokenizer.tokenize(text)
print("토큰:", tokens)

reconstructed = tokenizer.detokenize(tokens)
print("복원된 문장:", reconstructed)
```

```Utils
import os
import numpy as np
from minimind import set_seed, save_json, load_json, save_model_weights, load_model_weights, simple_logger


if __name__ == "__main__":
    # 테스트 함수들

    def test_set_seed():
        set_seed(123)
        a = np.random.rand(3)
        set_seed(123)
        b = np.random.rand(3)
        assert np.allclose(a, b), "set_seed 실패!"
        print("set_seed 테스트 통과!")

    def test_save_load_json():
        data = {'name': 'MiniMind', 'version': 1.0}
        filepath = 'test.json'
        save_json(data, filepath)
        loaded = load_json(filepath)
        assert data == loaded, "JSON 저장/로드 실패!"
        os.remove(filepath)
        print("save_json & load_json 테스트 통과!")

    def test_save_load_weights_multi_format():
        weights = {
            'W1': np.array([1, 2, 3]),
            'b1': np.array([0.1, 0.2, 0.3])
        }
        for fmt in ['npz', 'joblib', 'json']:
            filepath = f"weights_test.{fmt}"
            save_model_weights(weights, filepath, format=fmt)
            loaded = load_model_weights(filepath, format=fmt)
            for k in weights:
                assert np.allclose(weights[k], loaded[k]), f"{fmt} {k} 가중치 저장/로드 실패!"
            os.remove(filepath)
        print("멀티 포맷 가중치 저장/로드 테스트 통과!")

    def test_logger():
        simple_logger("테스트 로그 메시지")

    # 실행 테스트 모음
    test_set_seed()
    test_save_load_json()
    test_save_load_weights_multi_format()
    test_logger()
```

```Radec
import numpy as np
from minimind import Radec # 네가 만든 클래스 파일명에 맞게 바꿔!
from minimind import Sampler

# 간단한 샘플용 토크나이저 (공백 기준)
def simple_tokenizer(text):
    return text.strip().split()

# 아주 단순 샘플 샘플러 (확률분포에서 랜덤 샘플링)

def main():

    import csv
    csv_path = "MLdata.csv"

    pairs = []
    with open(csv_path, encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            pairs.append((row['input_text'].strip(), row['output_text'].strip()))


    # 생성기 초기화
    generator = Radec(n_models=2, sampler=Sampler(), tokenizer=simple_tokenizer)

    # 학습
    print("학습 시작...")
    generator.fit(pairs[:200])
    print("학습 완료!")

    # 생성 테스트
    prompt = "오늘 날씨 어때?"
    print(f"'{prompt}'에 대한 생성 결과:")
    generated_tokens = generator.generate(prompt, max_tokens=10)
    print(" ".join(generated_tokens))

if __name__ == "__main__":
    main()
```

```SeProD
# minimind/seprod.py

import csv
import re
import numpy as np
import autograd.numpy as anp
from autograd import grad
from minimind import SeProD

# --- 토크나이저 (공백 단위) ---
def simple_tokenizer(text):
    return re.findall(r'\b\w+\b', text.lower())

# --- vocab 빌드 ---
def build_vocab(tokens, min_freq=2):
    from collections import Counter
    counter = Counter(tokens)
    vocab = [w for w, c in counter.items() if c >= min_freq]
    vocab = sorted(vocab)
    stoi = {w: i for i, w in enumerate(vocab)}
    itos = {i: w for i, w in enumerate(vocab)}
    return stoi, itos

# --- 인코딩 ---
def encode(tokens, stoi):
    return [stoi[t] for t in tokens if t in stoi]

# --- 데이터셋 생성 (패딩 + max_len) ---
def pad_seq(seq, max_len, pad_idx):
    return seq[:max_len] + [pad_idx]*(max_len - len(seq))

def load_dataset(csv_path, max_len=20, min_freq=2, max_samples=1000):
    inputs = []
    outputs = []
    all_tokens = []

    with open(csv_path, encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i >= max_samples:
                break
            inp_tokens = simple_tokenizer(row['input_text'])
            out_tokens = simple_tokenizer(row['output_text']) + ["<EOS>"]
            all_tokens.extend(inp_tokens)
            all_tokens.extend(out_tokens)
            inputs.append(inp_tokens)
            outputs.append(out_tokens)

    stoi, itos = build_vocab(all_tokens, min_freq)
    pad_idx = len(stoi)  # 패딩 토큰은 vocab 끝에 추가

    X_enc = []
    X_dec = []
    Y = []

    for inp_tokens, out_tokens in zip(inputs, outputs):
        enc_encoded = encode(inp_tokens, stoi)
        dec_encoded = encode(out_tokens[:-1], stoi)  # 디코더 입력 (out_tokens - 마지막)
        y_encoded = encode(out_tokens[1:], stoi)      # 타깃 (out_tokens shifted)

        enc_padded = pad_seq(enc_encoded, max_len, pad_idx)
        dec_padded = pad_seq(dec_encoded, max_len, pad_idx)
        y_padded = pad_seq(y_encoded, max_len, pad_idx)

        X_enc.append(enc_padded)
        X_dec.append(dec_padded)
        Y.append(y_padded)

    vocab_size = len(stoi) + 1  # 패딩 포함
    return (np.array(X_enc), np.array(X_dec), np.array(Y), stoi, itos, pad_idx, vocab_size)



# --- 텍스트 생성 (단순 greedy) ---
def generate_text(model, stoi, itos, prompt, max_len=20, pad_idx=None):
    prompt_tokens = simple_tokenizer(prompt)
    enc_input = encode(prompt_tokens, stoi)
    enc_input = enc_input[:max_len]
    pad = pad_idx if pad_idx is not None else 0
    enc_input = enc_input + [pad]*(max_len - len(enc_input))
    enc_input = np.array([enc_input])

    generated = []

    # 디코더 입력 처음은 <BOS> 대신 빈 배열 or 패딩으로 시작
    dec_input = [pad] * max_len
    dec_input = np.array([dec_input])

    for _ in range(max_len):
        probs = model.predict(enc_input, dec_input)[0]  # (seq_len, vocab_size)
        next_token = np.argmax(probs[len(generated)])
        if next_token == pad:
            break
        generated.append(next_token)
        dec_input[0, len(generated)-1] = next_token

    return " ".join([itos.get(tok, "<UNK>") for tok in generated])

# --- 메인 실행 ---
if __name__ == "__main__":
    csv_path = "C:\\Users\\yuchan\\Code\\MLdata.csv"  # 경로 조정

    max_len = 20
    X_enc, X_dec, Y, stoi, itos, pad_idx, vocab_size = load_dataset(csv_path, max_len=max_len)

    model = SeProD(vocab_size=vocab_size, embed_dim=64, hidden_dim=128, max_len=max_len, pad_idx=pad_idx)

    model.fit(X_enc, X_dec, Y, epochs=10, batch_size=64, lr=0.001)

    prompt = "안녕하세요"
    generated_text = generate_text(model, stoi, itos, prompt, max_len=max_len, pad_idx=pad_idx)

    print("Generated Text:")
    print(generated_text)
```

```other_sub_action
from minimind import NeuralGenerator

model = NeuralGenerator(vocab_size=100, embed_dim=32, hidden_layer_sizes=(64, 32))
model.summary()  
```
