目录
关于图片检索的样例
/      

关于图片检索的样例

实现图片检索

安装 OpenSearch

启动前的配置:

sudo sysctl -w vm.max_map_count=262144

启动命令

OPENSEARCH_INITIAL_ADMIN_PASSWORD=gggMz888. docker compose up -d

启动成功后,可以访问 http://localhost:5601 查看是否启动成功
账户名:admin
密码:gggMz888.

创建索引

curl -X PUT "http://localhost:9200/image-index" -H 'Content-Type: application/json' -d'
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "index": {
      "knn": true
    }
  },
  "mappings": {
    "properties": {
      "image_vector": {
        "type": "knn_vector",   # 用于 kNN 检索
        "dimension": 512        # 特征向量的维度,比如 CLIP 输出的是 512 维向量
      },
      "ocr_text": {
        "type": "text"          # 用于存储 OCR 的文本
      },
      "image_path": {
        "type": "keyword"       # 用于存储图片路径
      }
    }
  }
}'

或者

PUT image-index
{
  "settings": {
    "number_of_shards": 1,
    "number_of_replicas": 0,
    "index": {
      "knn": true
    }
  },
  "mappings": {
    "properties": {
      "image_vector": {
        "type": "knn_vector", 
        "dimension": 512   
      },
      "ocr_text": {
        "type": "text"   
      },
      "image_path": {
        "type": "keyword"  
      }
    }
  }
}

图片批量采集

来源:https://www.pexels.com/zh-cn/search/%E6%97%A5%E5%B8%B8%E7%89%A9%E5%93%81/

手工下载几十张

搭建一个临时的文件服务器,可以通过URL访问到图片

图片信息入库(图片特征提取、图片OCR)

图片批量入库

import os

pic_path = '/home/gitsilence/Pictures/test'

from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import numpy as np
from paddleocr import PaddleOCR
from opensearchpy import OpenSearch

ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
# 加载 CLIP 模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 初始化 OpenSearch 客户端
client = OpenSearch(
    hosts=[{'host': '127.0.0.1', 'port': 9200}],
    http_auth=('admin', 'gggMz88.'),  # 默认用户名和密码,如果已修改需替换
    use_ssl=True,
    verify_certs=False,
    ssl_show_warn=False
)

def get_img_list() -> list:
    img_list = []
    files = os.listdir(pic_path)
    for file in files:
        if file.endswith("jpg") or file.endswith("png"):
            print(file)
            img_list.append(file)
    return img_list

def extract_vec_text(img: str):
    # 加载并处理图像
    image = Image.open(os.path.join(pic_path, img))
    inputs = processor(images=image, return_tensors="pt", padding=True)

    # 获取图像特征向量
    with torch.no_grad():
        vector = model.get_image_features(**inputs).squeeze().numpy()

    # vector = image_features.numpy().flatten()  # 将特征向量展平
    vector /= np.linalg.norm(vector)  # 归一化
    vector = vector.astype(float).tolist()  # 转换为浮点数列表

    # OCR 文本提取
    ocr_result = ocr.ocr(os.path.join(pic_path, img), cls=True)
    if ocr_result is None or ocr_result[0] is None:
        return vector, ""
    result = [line[1][0] for line in ocr_result[0]]
    ocr_text = "" if result is None else "".join(result)  # 提取文字内容
    return vector, ocr_text

def upload_to_opensearch(image_path, vector, ocr_text):
    """将特征向量和 OCR 文本写入 OpenSearch"""
    doc = {
        "image_vector": vector,  # 转为列表存储
        "ocr_text": ocr_text,
        "image_path": "http://127.0.0.1:5500/" + image_path
    }
    response = client.index(index="image-index", body=doc)
    print("Document indexed:", response["_id"])
    print(client, doc)

if __name__ == '__main__':
    imgs = get_img_list()
    for img in imgs:
        vex, text = extract_vec_text(img)
        print(text)
        upload_to_opensearch(img, vex, text)
        print("--------------------------")
    pass

检索测试

图片检索

def search_by_image(image_path, top_k=5):
    """以图搜图"""
    vector = extract_image_features(image_path)
    # print(vector)
    query = {
        "query": {
            "knn": {
                "image_vector": {
                    "vector": vector,
                    "k": top_k
                }
            }
        }
    }
    response = client.search(index=index_name, body=query)
    return response['hits']['hits']

def extract_image_features(image_path):
    """提取图片特征向量"""
    image = Image.open(image_path).convert("RGB")
    inputs = preprocess(image).unsqueeze(0).to(device)
    image_features = model.encode_image(inputs)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    return image_features.detach().squeeze().cpu().numpy().astype(float).tolist()



# 以图搜图
    image_path = "../ocr/img.png"  # 替换为你的图片路径
    print("以图搜图结果:")
    image_results = search_by_image(image_path)
    for hit in image_results:
        print(f"Score: {hit['_score']}, Image Path: {hit['_source']['image_path']}")

图片内容检索

def search_by_text(query_text, top_k=5):
    """以文搜图"""
    query = {
        "query": {
            "match": {
                "ocr_text": {
                    "query": query_text,
                    "fuzziness": "AUTO"  # 支持模糊匹配
                }
            }
        },
        "size": top_k
    }

    response = client.search(index=index_name, body=query)
    return response['hits']['hits']

    # 以文搜图
    query_text = "计算机"  # 替换为你的搜索关键字
    print("\n以文搜图结果:")
    text_results = search_by_text(query_text)
    for hit in text_results:
        print(
            f"Score: {hit['_score']}, OCR Text: {hit['_source']['ocr_text']}, Image Path: {hit['_source']['image_path']}")

图片文字转向量检索

def search_by_text_vertor(query_text, top_k=5):
    """
    获取搜索关键字的向量
    :param query_text:
    :param top_k:
    :return:
    """
    text = clip.tokenize(query_text.split(",")).to(device)
    text_features = model.encode_text(text)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    vector = text_features.detach().squeeze().cpu().numpy().astype(float).tolist()
    query = {
        "query": {
            "knn": {
                "image_vector": {
                    "vector": vector,
                    "k": top_k
                }
            }
        }
    }
    response = client.search(index=index_name, body=query)
    return response['hits']['hits']

    query_text = "手机"  # 替换为你的搜索关键字
    print("\n以特征描述搜图结果:")
    text_results = search_by_text_vertor(query_text)
    for hit in text_results:
        print(
            f"Score: {hit['_score']}, OCR Text: {hit['_source']['ocr_text']}, Image Path: {hit['_source']['image_path']}")

总结

总共测试两个模型向量化,检索的前两个结果还是很明显的,后面的就不太匹配了。
还需要多找些样本测试。

https://github.com/OFA-Sys/Chinese-CLIP

代码:https://github.com/MrNiebit/langchain/tree/master/picture_search


标题:关于图片检索的样例
作者:gitsilence
地址:https://blog.lacknb.cn/articles/2024/11/24/1732434631753.html