# Relevant imports
import os
import numpy as np
import pandas as pd
import openai
import numpy as np, matplotlib.pyplot as plt

def show_xsquared():
    x = np.linspace(-3, 3, 50)
    y = x**2
    
    plt.figure(figsize=(4, 3))
    plt.plot(x, y, label="y = x^2", color="blue")
    plt.title("y = x^2")
    plt.tight_layout()
    plt.show()

show_xsquared()

# The '*' adds all changes made in your local folder
git add *

git clone https://github.com/cafawo/MachineLearning.git

# Text input
malory = ["Do you want ants?",
          "Because that’s how you get ants."]

# All unique tokens from the text input (here words, could be n-grams)
feature_names = ['ants', 'because', 'do', 'get', 'how', 'that', 'want', 'you']

feature_matrix = np.array([[1, 0, 1, 0, 0, 0, 1, 1],
                           [1, 1, 0, 1, 1, 1, 0, 1]])

display(pd.DataFrame(feature_matrix, columns=feature_names, index=malory))

# Simplified word embeddings for cities and countries
word_embeddings = {
    "Paris": np.array([0.8, 0.2, 0.1]),
    "France": np.array([0.7, 0.2, 0.2]),
    "Berlin": np.array([0.6, 0.4, 0.2]),
    "Germany": np.array([0.6, 0.3, 0.3]),
    "Rome": np.array([0.5, 0.6, 0.2]),
    "Italy": np.array([0.5, 0.5, 0.3])
}

# Function to calculate cosine similarity between two vectors
def cosine_similarity(vec_a, vec_b):
    dot_product = np.dot(vec_a, vec_b)
    norm_a = np.linalg.norm(vec_a)
    norm_b = np.linalg.norm(vec_b)
    return dot_product / (norm_a * norm_b)

print(f"Paris|France: {cosine_similarity(word_embeddings['Paris'], word_embeddings['France']):.2f}")
print(f"Paris|Berlin: {cosine_similarity(word_embeddings['Paris'], word_embeddings['Berlin']):.2f}")
print(f"Paris|Italy:  {cosine_similarity(word_embeddings['Paris'], word_embeddings['Italy']):.2f}")

Paris|France: 0.99
Paris|Berlin: 0.93
Paris|Italy:  0.83

# Vector arithmetic: Berlin - Germany + France
result_vector = word_embeddings["Berlin"] - word_embeddings["Germany"] + word_embeddings["France"]

# Find the closest word to the resulting vector
closest_city = None
max_similarity = -1
for word in ["Paris", "Rome", "Italy"]: 
    similarity = cosine_similarity(result_vector, word_embeddings[word])
    if similarity > max_similarity:
        max_similarity = similarity
        closest_word = word

print(f"'Berlin' - 'Germany' + 'France' = '{closest_word}' ({similarity:.2f})")

'Berlin' - 'Germany' + 'France' = 'Paris' (0.90)

word_embeddings = {
    "Paris": np.array([0.8, 0.2, 0.1]),
    "France": np.array([0.7, 0.3, 0.2]),
    "Berlin": np.array([0.6, 0.1, 0.2]),
    "Germany": np.array([0.6, 0.4, 0.3]),
    "Rome": np.array([0.5, 0.2, 0.2]),
    "Italy": np.array([0.5, 0.5, 0.3])
    }

# This code is just illustrative ... look at the steps, not the code!
def transformer_encoder(word_embeddings):
    # Step 1: Input Embedding
    word_embeddings = word_embeddings

    # Step 2: Positional Encoding
    positional_embeddings = {word: vec + 0.1 for word, vec in word_embeddings.items()}

    # Step 3: Attention
    attention_sum = sum(positional_embeddings.values())
    attention_output = {word: vec * attention_sum for word, vec in positional_embeddings.items()}

    # Step 4: Feed-Forward Network
    feed_forward_output = {word: vec + np.array([0.3, 0.3, 0.3]) for word, vec in attention_output.items()}

    return feed_forward_output

def classify_city_country(transformed_embeddings):
    classification = {}
    for word, vec in transformed_embeddings.items():
        # Classification rule: if the second element is greater than 1.2, classify as Country, else as City
        classification[word] = "Country" if vec[1] > 1.2 else "City"
    return classification


# Process the embeddings through the transformer encoder
transformed_embeddings = transformer_encoder(word_embeddings)

# Classify each word as City or Country
classification = classify_city_country(transformed_embeddings)

# Displaying the classification results
for word, category in classification.items():
    print(f"{word} is a {category}")

Paris is a City
France is a Country
Berlin is a City
Germany is a Country
Rome is a City
Italy is a Country

# Use your password to log in here ...
with open('gptpassword.txt', 'r') as file:
    openai.api_key = file.read().strip()

#returns a list of all OpenAI models
models = openai.models.list()
print(f"OpenAI currently offers {len(models.data)} models, e.g.:")
display(models.data[0:3])

OpenAI currently offers 48 models, e.g.:

[Model(id='gpt-4o-audio-preview-2024-10-01', created=1727389042, object='model', owned_by='system'),
 Model(id='gpt-4o-mini-audio-preview', created=1734387424, object='model', owned_by='system'),
 Model(id='gpt-4o-realtime-preview', created=1727659998, object='model', owned_by='system')]

messages = [{"role": "system", "content": 
    "You are a helpful assistant."}]
messages.append({"role": "user", "content": 
    "Classify into two categories, namely, 'City' and 'Country'"})
messages.append({"role": "user", "content": 
    "Classify this: Germany, Paris, France, Berlin, Rome, Italy"})
messages.append({"role": "user", "content": 
    "The output should be in JSON format."})

# Send prompt to API and retrieve results
completion = openai.chat.completions.create(
                model="gpt-4", temperature=0.0, seed=2024, messages=messages
            )
print(completion.choices[0].message.content)

{
  "City": ["Paris", "Berlin", "Rome"],
  "Country": ["Germany", "France", "Italy"]
}

# jupyter nbconvert vdt.ipynb --to slides --post serve --SlidesExporter.reveal_scroll=True
local_dir = %pwd
os.system(f'jupyter nbconvert {local_dir}/lecture.ipynb --to slides --SlidesExporter.reveal_scroll=True')

0

# Generate PDF
#os.system(f'jupyter nbconvert {local_dir}/ml.ipynb --to pdf')

Machine learning¶

Resources¶

ML in economic research¶

Overview¶

Supervised learning¶

Unsupervised¶

Reinforcement learning¶

Universal approximation theorem¶

Selling your black box¶

Cross Validation¶

Explainable AI (XAI)¶

Git(ing) ready¶

Git and GitHub¶

Git (local repository)¶

GitHub (remote repository)¶

Coding style¶

Naming convention¶

Comments¶

Natural language processing (NLP)¶

Bag of words¶

Word embedding¶

Transformer architecture¶

Fine tuning¶

Catastrophic inference/forgetting¶

ChatGPT API¶

Prompt engineering¶

API call¶

Many thanks and happy coding!¶

	ants	because	do	get	how	that	want	you
Do you want ants?	1	0	1	0	0	0	1	1
Because that’s how you get ants.	1	1	0	1	1	1	0	1