LLaMa 3 is here and other models from Groq API

MaxMLang · Apr 19, 2024 · 1c2cf67 · 1c2cf67
1 parent 752e9e2
commit 1c2cf67
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 10 deletions.
diff --git a/.env.example b/.env.example
@@ -1,4 +1,5 @@
 OPENAI_API_KEY=YOUR-API-KEY
 PINECONE_API_KEY=YOUR-API-KEY
 PINECONE_INDEX_NAME=YOUR-INDEX-NAME
-PINECONE_NAME_SPACE=YOUR-NAME-SPACE
+PINECONE_NAME_SPACE=YOUR-NAME-SPACE
+GROQ_API_KEY=YOUR-API-KEY
diff --git a/README.md b/README.md
@@ -10,6 +10,14 @@ RAG-nificent is a state-of-the-art repository that leverages the power of Retrie
 - **Direct Citation**: Every response from the system includes a direct link to the source PDF page, ensuring traceability and verification.
 - **PDF Directory**: A predefined set of key PDF documents, currently including WHO recommendations on major health topics such as schistosomiasis and malaria.
 
+## Available Models
+- 📘 ChatGPT-3.5: Utilize this advanced iteration of the GPT model for engaging and human-like interactions, suitable for varied conversational tasks.
+- 🦙 **Llama3-70B-8192**: Experience high-end performance with this large-scale model, ideal for complex language tasks and deep learning insights.
+- 🦙 **Llama3-8B-8192**: Harness robust capabilities with this more accessible version of Llama3, perfect for a wide range of AI applications.
+- 🌟 **Mixtral-8x7B-32768**: Leverage the power of ensemble modeling with Mixtral's extensive capacity for nuanced understanding and response generation.
+- 🦙 **Llama2-70B-4096**: Utilize the proven effectiveness of Llama2 for comprehensive language processing and application development.
+- 💎 **Gemma-7B-IT**: Explore specialized interactions and tech-focused solutions with Gemma, tailored for IT and technical content. 
+
 ## Demo
 
 ![RAG-nificent Demo](assets/demo.gif)
@@ -37,6 +45,7 @@ The application utilizes a combination of OpenAI embeddings, Pinecone vector sea
    - `PINECONE_NAME_SPACE`
    - `OPENAI_API_KEY`
    - `PINECONE_API_KEY`
+   - `GROQ_API_KEY
 
 4. Create a [Pinecone](pinecone.io) index with the same name as  `PINECONE_INDEX_NAME`. Set it up with `dimensions=1536` and `metric=cosine`.
 5. Place your PDFs in the `pdf_data` directory and run `data_ingestion.py`

diff --git a/src/app.py b/src/app.py
@@ -4,9 +4,11 @@
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
+from langchain_groq import ChatGroq
 from langchain_pinecone import Pinecone
 from langchain.docstore.document import Document
 from langchain.memory import ChatMessageHistory, ConversationBufferMemory
+from chainlit.input_widget import Select, Switch, Slider
 import re
 import chainlit as cl
 dotenv.load_dotenv()
@@ -18,30 +20,56 @@
 
 @cl.on_chat_start
 async def on_chat_start():
+
+    settings = await cl.ChatSettings(
+        [
+            Select(
+                id="model",
+                label="Choose Model",
+                values=["gpt-3.5-turbo", 'llama3-70b-8192', 'llama3-8b-8192','mixtral-8x7b-32768', 'llama2-70b-4096', 'gemma-7b-it'],
+                initial_index=1,
+            ),
+            Slider(
+                id="temperature",
+                label="OpenAI - Temperature",
+                initial=1,
+                min=0,
+                max=2,
+                step=0.1,
+            )]).send()
+    await setup_agent(settings)
+
+
+    # Let the user know that the system is ready
+    await cl.Message(content="Hello, I am here to help you with questions on your provided PDF files.").send()
+
+
+
+@cl.on_settings_update
+async def setup_agent(settings):
     embeddings = OpenAIEmbeddings()
-    docsearch = Pinecone.from_existing_index(index_name=os.getenv("PINECONE_INDEX_NAME"), embedding=embeddings, namespace= os.getenv("PINECONE_NAME_SPACE"))
+    docsearch = Pinecone.from_existing_index(index_name=os.getenv("PINECONE_INDEX_NAME"), embedding=embeddings,
+                                             namespace=os.getenv("PINECONE_NAME_SPACE"))
     retriever = docsearch.as_retriever(search_type="similarity")
     message_history = ChatMessageHistory()
-
     memory = ConversationBufferMemory(
         memory_key="chat_history",
         output_key="answer",
         chat_memory=message_history,
         return_messages=True,
     )
-
-    # Create a chain that uses the Pinecone vector store
+    if (settings['model'] == "gpt-3.5-turbo"):
+        llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature = settings['temperature'], streaming=True)
+    else:
+        llm = ChatGroq(model_name=settings['model'], temperature = settings['temperature'], streaming= True, api_key=os.getenv("GROQ_API_KEY"))
+        # Create a chain that uses the Pinecone vector store
     chain = ConversationalRetrievalChain.from_llm(
-        ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, streaming=True),
+        llm,
         chain_type="stuff",
         retriever=retriever,
         memory=memory,
         return_source_documents=True,
     )
-
-    # Let the user know that the system is ready
-    await cl.Message(content="Hello, I am here to help you with questions on your provided PDF files.").send()
-
     cl.user_session.set("chain", chain)