Gaia x RaiGuild

RAG API Pipeline

Demo Day

Input API manifest

version: "3.0.0"

api_name: "boardroom_api"

api_parameters:
  cname: "aave"
  protocol: "aave"

spec:
  type: Spec
  documentation_url: https://docs.airbyte.com/integrations/sources/boardroom
  connection_specification:
    $schema: http://json-schema.org/draft-07/schema#
    title: Boardroom API Spec
    type: object
    required:
      - api_key
      - cname
      - protocol
    additionalProperties: true
    properties:
      # 'TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information.':
      api_key:
        type: string
        description: >-
          Boardroom API Key. See <a href="https://docs.boardroom.io/docs/api/05c1fb6d88a07-governance-api">here</a>
          for details.
        airbyte-secret: true
      cname:
        type: string
        description: >-
          Protocol ID a.k.a cname
        examples:
          - aave
          - hopprotocol
      protocol:
        type: string
        description: >-
          Alias for cname. Needed as there are a few endpoints that use `protocol` as parameter id
        examples:
          - aave
          - metacartel

definitions:
  selector:
    type: RecordSelector
    extractor:
      type: DpathExtractor
      field_path: []
  requester_base:
    type: HttpRequester
    url_base: "https://api.boardroom.info/v1"
    http_method: "GET"
    authenticator:
      type: ApiKeyAuthenticator
      api_token: "{{ config['api_key'] }}"
      inject_into:
        type: RequestOption
        field_name: "key"
        inject_into: request_parameter
  retriever_base:
    type: SimpleRetriever
    record_selector:
      $ref: "#/definitions/selector"
  paginator:
    type: DefaultPaginator
    pagination_strategy:
      type: CursorPagination
      cursor_value: "{{ response.get('nextCursor', '') }}"
      stop_condition: "{{ 'nextCursor' not in response }}"
    page_token_option:
      type: RequestOption
      field_name: "cursor"
      inject_into: "request_parameter"

schemas:
  boardroom_api:
    type: object
    $schema: http://json-schema.org/draft-07/schema#
    properties:
      data:
        type: array
      nextCursor:
        type:
          - string
          - "null"
  boardroom_api_protocol:
    type: object
    $schema: http://json-schema.org/draft-07/schema#
    properties:
      data:
        type: object
      nextCursor:
        type:
          - "null"

api_config:
    request_method: "get"
    content_type: "application/json"
    response_entrypoint_field: "data"
    response_primary_key: "nextCursor"

endpoints:
  /discourseTopics:
    id: "discourseTopics"
    responseSchema: "#/schemas/boardroom_api"
    textSchema:
      $ref: "#/textSchemas/DiscourseTopic"
  /discourseCategories:
    id: "discourseCategories"
    responseSchema: "#/schemas/boardroom_api"
    textSchema:
      $ref: "#/textSchemas/DiscourseCategory"
  /discourseTopicPosts:
    id: "discourseTopicPosts"
    responseSchema: "#/schemas/boardroom_api"
    textSchema:
      $ref: "#/textSchemas/DiscourseTopicPost"
  "/protocols/{cname}":
    id: "protocol"
    responseSchema: "#/schemas/boardroom_api_protocol"
    textSchema:
      $ref: "#/textSchemas/Protocol"
  "/protocols/{cname}/proposals":
    id: "proposals"
    responseSchema: "#/schemas/boardroom_api"
    textSchema:
      $ref: "#/textSchemas/Proposal"

textSchemas:
  Protocol:
    type: object
    properties:
      - cname:
          type: string
      - name:
          type: string
      - categories:
          type: array
          items:
            type: string
  Proposal:
    type: object
    properties:
      - title:
          type: string
      - content:
          type: string
      - summary:
          type: string
  DiscourseTopic:
    type: object
    properties:
      - title:
          type: string
  DiscourseCategory:
    type: object
    properties:
      - name:
          type: string
      - description:
          type: string
  DiscourseTopicPost:
    type: object
    properties:
      - body:
          type: string

chunking_param:
  mode: "elements"
  chunking_strategy: "by_title"
  include_orig_elements: true
  max_characters: 1500
  new_after_n_chars: 1024
  overlap: 0
  overlap_all: false
  combine_text_under_n_chars: 0
  multipage_sections: true

Generated Source Connector

check:
  stream_names:
  - discourseTopics
  - discourseCategories
  - discourseTopicPosts
  - protocol
  - proposals
  type: CheckStream
definitions:
  discourseCategories_stream:
    $parameters:
      name: discourseCategories
      path: '"/discourseCategories"'
      primary_key: nextCursor
    retriever:
      $ref: '#/definitions/retriever_base'
      paginator:
        $ref: '#/definitions/paginator'
      requester:
        $ref: '#/definitions/requester_base'
        request_parameters:
          protocol: '{{ config[''protocol''] }}'
    schema_loader:
      schema:
        $ref: '#/schemas/boardroom_api'
      type: InlineSchemaLoader
    type: DeclarativeStream
  discourseTopicPosts_stream:
    $parameters:
      name: discourseTopicPosts
      path: '"/discourseTopicPosts"'
      primary_key: nextCursor
    retriever:
      $ref: '#/definitions/retriever_base'
      paginator:
        $ref: '#/definitions/paginator'
      requester:
        $ref: '#/definitions/requester_base'
        request_parameters:
          protocol: '{{ config[''protocol''] }}'
    schema_loader:
      schema:
        $ref: '#/schemas/boardroom_api'
      type: InlineSchemaLoader
    type: DeclarativeStream
  discourseTopics_stream:
    $parameters:
      name: discourseTopics
      path: '"/discourseTopics"'
      primary_key: nextCursor
    retriever:
      $ref: '#/definitions/retriever_base'
      paginator:
        $ref: '#/definitions/paginator'
      requester:
        $ref: '#/definitions/requester_base'
        request_parameters:
          protocol: '{{ config[''protocol''] }}'
    schema_loader:
      schema:
        $ref: '#/schemas/boardroom_api'
      type: InlineSchemaLoader
    type: DeclarativeStream
  paginator:
    page_token_option:
      field_name: cursor
      inject_into: request_parameter
      type: RequestOption
    pagination_strategy:
      cursor_value: '{{ response.get(''nextCursor'', '''') }}'
      stop_condition: '{{ ''nextCursor'' not in response }}'
      type: CursorPagination
    type: DefaultPaginator
  proposals_stream:
    $parameters:
      name: proposals
      path: '"/protocols/{{ config[''cname''] }}/proposals"'
      primary_key: nextCursor
    retriever:
      $ref: '#/definitions/retriever_base'
      paginator:
        $ref: '#/definitions/paginator'
      requester:
        $ref: '#/definitions/requester_base'
    schema_loader:
      schema:
        $ref: '#/schemas/boardroom_api'
      type: InlineSchemaLoader
    type: DeclarativeStream
  protocol_stream:
    $parameters:
      name: protocol
      path: '"/protocols/{{ config[''cname''] }}"'
      primary_key: nextCursor
    retriever:
      $ref: '#/definitions/retriever_base'
      paginator:
        type: NoPagination
      requester:
        $ref: '#/definitions/requester_base'
    schema_loader:
      schema:
        $ref: '#/schemas/boardroom_api_protocol'
      type: InlineSchemaLoader
    type: DeclarativeStream
  requester_base:
    authenticator:
      api_token: '{{ config[''api_key''] }}'
      inject_into:
        field_name: key
        inject_into: request_parameter
        type: RequestOption
      type: ApiKeyAuthenticator
    http_method: GET
    type: HttpRequester
    url_base: https://api.boardroom.info/v1
  retriever_base:
    record_selector:
      $ref: '#/definitions/selector'
    type: SimpleRetriever
  selector:
    extractor:
      field_path: []
      type: DpathExtractor
    type: RecordSelector
schemas:
  boardroom_api:
    $schema: http://json-schema.org/draft-07/schema#
    properties:
      data:
        type: array
      nextCursor:
        type:
        - string
        - 'null'
    type: object
  boardroom_api_protocol:
    $schema: http://json-schema.org/draft-07/schema#
    properties:
      data:
        type: object
      nextCursor:
        type:
        - 'null'
    type: object
spec:
  connection_specification:
    $schema: http://json-schema.org/draft-07/schema#
    additionalProperties: true
    properties:
      api_key:
        airbyte-secret: true
        description: Boardroom API Key. See <a href="https://docs.boardroom.io/docs/api/05c1fb6d88a07-governance-api">here</a>
          for details.
        type: string
      cname:
        description: Protocol ID a.k.a cname
        examples:
        - aave
        - hopprotocol
        type: string
      protocol:
        description: Alias for cname. Needed as there are a few endpoints that use
          `protocol` as parameter id
        examples:
        - aave
        - metacartel
        type: string
    required:
    - api_key
    - cname
    - protocol
    title: Boardroom API Spec
    type: object
  documentation_url: https://docs.airbyte.com/integrations/sources/boardroom
  type: Spec
streams:
- '#/definitions/discourseTopics_stream'
- '#/definitions/discourseCategories_stream'
- '#/definitions/discourseTopicPosts_stream'
- '#/definitions/protocol_stream'
- '#/definitions/proposals_stream'
version: 3.0.0

Gaia node config

{
  "address": "0x198d8dec810883422c5cb417fb70db55ee3a4001",
  "chat": "https://huggingface.co/gaianet/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q5_K_M.gguf",
  "chat_batch_size": "16",
  "chat_ctx_size": "4096",
  "chat_name": "Phi-3-mini-4k-instruct",
  "description": "The default GaiaNet node config with a Phi-3-mini-4k model and a Paris tour guide knowledge base.",
  "domain": "us.gaianet.network",
  "embedding": "https://huggingface.co/gaianet/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf",
  "embedding_batch_size": "2048",
  "embedding_collection_name": "boardroom_api_collection",
  "embedding_ctx_size": "2048",
  "embedding_name": "Nomic-embed-text-v1.5",
  "llamaedge_port": "8080",
  "prompt_template": "phi-3-chat",
  "qdrant_limit": "1",
  "qdrant_score_threshold": "0.5",
  "rag_policy": "system-message",
  "rag_prompt": "Use the following pieces of context to answer the user's question. Respond directly to the user with your answer, do not say 'this is the answer' or 'this is the answer' or similar language. Never mention your knowledge base or say 'according to the context' or 'hypothetical' or other similar language. Use json metadata included in knowledge base whenever possible enrich your answers. The term aave refers the DAO protocol where discussions and proposals are posted. If you don't know the answer, don't try to make up an answer. \n----------------\n",
  "reverse_prompt": "",
  "snapshot": "boardroom_api_collection-3656816618511259-2024-09-03-04-54-33.snapshot.tar.gz",
  "system_prompt": "You are an AI assistant designed to provide clear, concise, and accurate answers to user queries. Your primary functions include retrieving relevant information from the provided RAG (Retrieval-Augmented Generation) data and utilizing your pre-training data when necessary. Use json metadata included in RAG data whenever possible enrich your answers. The term aave refers the DAO protocol where discussions and proposals are posted. If no relevant information is found, you will inform the user that you are not familiar with the knowledge."
}

wasmedge --dir .:./dashboard --env NODE_VERSION=0.3.2 --nn-preload default:GGML:AUTO:Phi-3-mini-4k-instruct-Q5_K_M.gguf --nn-preload embedding:GGML:AUTO:nomic-embed-text-v1.5.f16.gguf rag-api-server.wasm --model-name Phi-3-mini-4k-instruct,Nomic-embed-text-v1.5 --ctx-size 4096,2048 --batch-size 16,2048 --prompt-template phi-3-chat,embedding --rag-policy system-message --qdrant-collection-name boardroom_api_collection --qdrant-limit 1 --qdrant-score-threshold 0.5 --web-ui ./ --socket-addr 0.0.0.0:8080 --rag-prompt "Use the following pieces of context to answer the user's question. Respond directly to the user with your answer, do not say 'this is the answer' or 'this is the answer' or similar language. Never mention your knowledge base or say 'according to the context' or 'hypothetical' or other similar language. Use json metadata included in knowledge base whenever possible enrich your answers such as proposal voting choices and results. The term aave refers the DAO protocol where discussions and proposals are posted. If you don't know the answer, don't try to make up an answer. \n----------------\n"

Gaia x RaiGuild RAG API Pipeline Demo Day

Gaia x RaiGuild

RAG API Pipeline

Demo Day

Agenda

Architecture Overview

Tech stack

Tech stack

Tech stack

Tech stack

Defining an API Pipeline Manifest

API Spec

Base Data retriever

Base Data retriever

Base Response Schemas

Endpoint streams definition

Fields for text preprocessing

**Other fields will be added as metadata

Chunking parameters

rag-api-pipeline - CLI Walkthrough

Demo - Boardroom API

rag-api-pipeline run-all

Input API manifest

Generated Source Connector

rag-api-pipeline from-normalized

Question about Disourse Post

Question about Proposal

Question about Proposal Results

Results from AAVE data from Boardroom API

Issues Found and TODOs

Gaia node config

Issues Found and TODOs

Next steps