from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('all-MiniLM-L6-v2')
animals = [
    "tiger", "lion", "elephant", "giraffe", "zebra", 
    "rhinoceros", "hippopotamus","crocodile", "monkey", 
    "panda", "koala", "kangaroo","whale", "dolphin", 
    "seal", "penguin", "shark", "snake", "lizard", 
    "turtle", "frog", "butterfly", "bee", "ant", "eagle",
    "sparrow", "pigeon", "parrot", "owl", "duck", "chicken", 
    "dog", "cat", "pig", "cow", "sheep", "horse", "donkey", 
    "rabbit", "squirrel", "fox", "wolf", "bear", "deer", 
    "hedgehog", "bat", "mouse", "chameleon", "snail", "jellyfish"
]
animals_embeddings = model.encode(animals)
avg_animals_embeddings = np.mean(animals_embeddings, axis=0)

animals_words    = ["Camel", "Gorilla", "Cheetah"]
un_animals_words = ["Dream", "Chair", "Mathematics"]
random_words = ["phone","eye","delicious"]

results = {}

for word_list in (animals_words,un_animals_words,random_words):
    projection_scores = np.dot(model.encode(word_list),
                              avg_animals_embeddings)
    results.update({word: score for word,
                    score in zip(word_list, projection_scores)})

for word, score in results.items():
    print(f"'{word}': {score:.4f}")
print(np.round(avg_animals_embeddings[:10], 4))


plt.figure(figsize=(12, 2))
plt.scatter(list(results.values()), np.zeros_like(list(results.values())), marker='o', zorder=2)

for word, score in results.items():
    if word in ("Cheetah","Chair") :
        # Place "Cheetah" slightly higher to avoid overlap
        plt.text(score, 0.02, word, ha='center', va='bottom', fontsize=8) 
    else:
        # Keep other words at the default y-position
        plt.text(score, 0.01, word, ha='center', va='bottom', fontsize=8)

plt.axhline(0, color='black', linewidth=0.5, zorder=1)
plt.yticks([])
plt.xlabel("Projection Score")
plt.title("Word Projection Scores on the 'Animal' Axis")
plt.grid(True, which='both', axis='x', linestyle='--', linewidth=0.5)
plt.show()