LLM Basic Tutorials - chenglong92/LLM4Compiler GitHub Wiki

1. Using LLM on MacBook

(1) install LM Studio;
(2) load any prefered open-source LLM from Huggingface;
(3) in a conda env, install openai, and switch on the API server as below:
image. (4) write a python script using openai as follows' example to launch a multi-round dialoguges:

from openai import OpenAI
import json

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
Model = "deepseek/deepseek-r1-0528-qwen3-8b"
# 初始化对话历史
messages = [
    {"role": "system", "content": "You are a software engineer"}
]

def chat_with_stream():
    while True:
        # 获取用户输入
        user_input = input("[User Prompt]: ")
        if user_input.lower() in ["exit", "quit", "bye"]:
            break
        
        # 添加用户消息到历史记录
        messages.append({"role": "user", "content": user_input})
        
        # 发送流式请求
        response = client.chat.completions.create(
            model=Model,
            messages=messages,
            temperature=0.7,
            stream=True
        )

        # 实时显示模型回复
        print("Answer: ", end="")
        assistant_reply = ""
        for chunk in response:
            if chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content
                print(content, end="", flush=True)
                assistant_reply += content
        print()  # 换行
        
        # 将模型回复添加到历史记录
        messages.append({"role": "assistant", "content": assistant_reply})

def save_history_message(filename):
    with open(filename, "w") as json_file:
        json.dump(messages, json_file, indent = 4)


if __name__ == "__main__":
    print("Start Conversion(Input exit/quit/bye to terminate the conversion)")
    chat_with_stream()
    # save the history messages;
    filename = "topic1.json"
    save_history_message(filename)

Reference

[1] Andrej Karpathy: https://karpathy.ai/zero-to-hero.html