Skip to content
Snippets Groups Projects
Commit 28c4eea0 authored by Embruch, Gerd's avatar Embruch, Gerd
Browse files

outsourced Chunk-Size & -Overlap; Fixed missing vectorDB after deletion

parent 1a0d08e1
Branches
No related tags found
No related merge requests found
......@@ -104,10 +104,15 @@ Accept: application/json
Content-Type: application/json
{
"input": "Under what path could members of the working group can find the exam git directory?",
"input": "What's the diameter of mercury",
"model": "llama3"
}
# {
# "input": "Under what path could members of the working group can find the exam git directory?",
# "model": "llama3"
# }
### generate a follow up question
# @name resumeChat
POST {{host}}/ai/chat
......
......@@ -5,7 +5,7 @@ import { ChromaClient } from "chromadb";
// embeddings
import { Chroma } from "@langchain/community/vectorstores/chroma";
import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { CharacterTextSplitter, RecursiveCharacterTextSplitter } from "langchain/text_splitter";
// loaders - https://js.langchain.com/v0.1/docs/modules/data_connection/document_loaders/
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
import {
......@@ -18,7 +18,6 @@ import fs from 'fs';
import path from 'path';
import { PDFLoader } from '@langchain/community/document_loaders/fs/pdf';
import { MultiFileLoader } from "langchain/document_loaders/fs/multi_file";
import { ScoreThresholdRetriever } from 'langchain/retrievers/score_threshold';
// PROVIDE OLLAMA CONNECTION
......@@ -39,28 +38,29 @@ export const chromaVSsettings = {
"hnsw:space": "cosine"
}
};
// PROVIDE VECTOR STORE CONNECTION
// predefine vectorStoreConnection in global scope
let vectorStoreConnection;
/** *******************************************************
* PROVIDE VECTOR STORE CONNECTION
*/
export const vectorStoreConnection = async () => {
try {
// check if cectorDB is reachable
await chroma.heartbeat();
// create connection
vectorStoreConnection = await Chroma.fromExistingCollection(embeddings, chromaVSsettings);
return await Chroma.fromExistingCollection(embeddings, chromaVSsettings);
} catch (error) {
// throw error if connection can't be established
throw new Error(`Error creating VectorDB connection on ${process.env['VECTOR_API_URL']}`);
}
// export vectorStoreConnection
export default vectorStoreConnection;
// PROVIDE RETRIEVER
export const retriever = vectorStoreConnection.asRetriever();
// export const retriever = vectorStoreConnection.asRetriever(1);
// export const retriever = ScoreThresholdRetriever.fromVectorStore(vectorStoreConnection, {
// minSimilarityScore: 0.1, // Finds results with at least this similarity score
// maxK: 100, // The maximum K value to use. Use it based to your chunk size to make sure you don't run out of tokens
// kIncrement: 2, // How much to increase K by each time. It'll fetch N results, then N + kIncrement, then N + kIncrement * 2, etc.
// });
};
/** *******************************************************
* PROVIDE RETRIEVER
*/
export const retriever = async () => {
let vectorStore = await vectorStoreConnection();
return vectorStore.asRetriever();
};
......@@ -362,8 +362,19 @@ export const fileLoader = async (docs = []) => {
* EMBED GIVEN DOCS
*/
export const embedder = async (docs) => {
console.log("🚀 ~ embedder ~ process.env.CHUNK_SIZE:", process.env.CHUNK_SIZE);
console.log("🚀 ~ embedder ~ process.env.CHUNK_OVERLAP:", process.env.CHUNK_OVERLAP);
// chunk docs
const splitter = new RecursiveCharacterTextSplitter();
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: Number(process.env.CHUNK_SIZE),
chunkOverlap: Number(process.env.CHUNK_OVERLAP),
separators: ['\n\n', '\n', ' ', '']
});
const chunks = await splitter.splitDocuments(docs);
// add unix timestamp to metadata of all chunks
......
......@@ -2,13 +2,14 @@ import urlExist from "url-exist";
import { Ollama } from 'ollama';
import { ChatOllama } from "@langchain/community/chat_models/ollama";
import { ChatPromptTemplate, PromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { isCollectionAvailable, retriever } from "../controllers/Embeddings.js";
import { isCollectionAvailable, retriever, vectorStoreConnection } from "../controllers/Embeddings.js";
import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import { createRetrievalChain } from "langchain/chains/retrieval";
import { HumanMessage, AIMessage } from "@langchain/core/messages";
import { extendChat } from "./handleDB.js";
import * as path from 'path';
import { MultiQueryRetriever } from "langchain/retrievers/multi_query";
// PROVIDE OLLAMA CONNECTION TO ALL ROUTES
......@@ -103,7 +104,31 @@ export const summarizeText = async (model, input) => {
/** *******************************************************
* GENERATE CHAT
*/
export const chat = async (req, res, next) => {
// create chat model
const llm = new ChatOllama({
baseUrl: process.env['AI_API_URL'],
model: req.body.model,
temperature: Number(process.env['AI_TEMPERATURE'])
});
let vectorStore = await vectorStoreConnection();
const retriever = MultiQueryRetriever.fromLLM({
llm: llm,
retriever: vectorStore.asRetriever(),
verbose: true,
});
const query = req.body.input;
const retrievedDocs = await retriever.invoke(query);
res.json({ message: 'hi', retrievedDocs });
};
export const chat_OLD = async (req, res, next) => {
// FEATURE apply citations or sources
// sources: https://js.langchain.com/v0.1/docs/use_cases/question_answering/sources/#adding-sources
// citations: https://js.langchain.com/v0.1/docs/use_cases/question_answering/citations/
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment