import numpy as np
def vectorize_terms(terms):
terms = [term.lower() for term in terms]
terms = [np.array(list(term)) for term in terms]
terms = [np.array([ord(char) for char in term])
for term in terms]
return terms
root = 'Believe'
term1 = 'beleive'
term2 = 'bargain'
term3 = 'Elephant'
terms = [root, term1, term2, term3]
terms
import pandas as pd
# Character vectorization
term_vectors = vectorize_terms(terms)
# show vector representations
vec_df = pd.DataFrame(term_vectors, index=terms)
print(vec_df)
root_term = root
other_terms = [term1, term2, term3]
root_term_vec = vec_df[vec_df.index == root_term].dropna(axis=1).values[0]
other_term_vecs = [vec_df[vec_df.index == term].dropna(axis=1).values[0]
for term in other_terms]
def hamming_distance(u, v, norm=False):
if u.shape != v.shape:
raise ValueError('The vectors must have equal lengths.')
return (u != v).sum() if not norm else (u != v).mean()
def manhattan_distance(u, v, norm=False):
if u.shape != v.shape:
raise ValueError('The vectors must have equal lengths.')
return abs(u - v).sum() if not norm else abs(u - v).mean()
def euclidean_distance(u,v):
if u.shape != v.shape:
raise ValueError('The vectors must have equal lengths.')
distance = np.sqrt(np.sum(np.square(u - v)))
return distance
def cosine_distance(u, v):
distance = 1.0 - (np.dot(u, v) / (np.sqrt(sum(np.square(u))) * np.sqrt(sum(np.square(v))))
)
return distance

