启动前的配置:
sudo sysctl -w vm.max_map_count=262144
启动命令
OPENSEARCH_INITIAL_ADMIN_PASSWORD=gggMz888. docker compose up -d
启动成功后,可以访问 http://localhost:5601 查看是否启动成功
账户名:admin
密码:gggMz888.
curl -X PUT "http://localhost:9200/image-index" -H 'Content-Type: application/json' -d'
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"knn": true
}
},
"mappings": {
"properties": {
"image_vector": {
"type": "knn_vector", # 用于 kNN 检索
"dimension": 512 # 特征向量的维度,比如 CLIP 输出的是 512 维向量
},
"ocr_text": {
"type": "text" # 用于存储 OCR 的文本
},
"image_path": {
"type": "keyword" # 用于存储图片路径
}
}
}
}'
或者
PUT image-index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index": {
"knn": true
}
},
"mappings": {
"properties": {
"image_vector": {
"type": "knn_vector",
"dimension": 512
},
"ocr_text": {
"type": "text"
},
"image_path": {
"type": "keyword"
}
}
}
}
来源:https://www.pexels.com/zh-cn/search/%E6%97%A5%E5%B8%B8%E7%89%A9%E5%93%81/
手工下载几十张
搭建一个临时的文件服务器,可以通过URL访问到图片
import os
pic_path = '/home/gitsilence/Pictures/test'
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import numpy as np
from paddleocr import PaddleOCR
from opensearchpy import OpenSearch
ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
# 加载 CLIP 模型和处理器
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# 初始化 OpenSearch 客户端
client = OpenSearch(
hosts=[{'host': '127.0.0.1', 'port': 9200}],
http_auth=('admin', 'gggMz88.'), # 默认用户名和密码,如果已修改需替换
use_ssl=True,
verify_certs=False,
ssl_show_warn=False
)
def get_img_list() -> list:
img_list = []
files = os.listdir(pic_path)
for file in files:
if file.endswith("jpg") or file.endswith("png"):
print(file)
img_list.append(file)
return img_list
def extract_vec_text(img: str):
# 加载并处理图像
image = Image.open(os.path.join(pic_path, img))
inputs = processor(images=image, return_tensors="pt", padding=True)
# 获取图像特征向量
with torch.no_grad():
vector = model.get_image_features(**inputs).squeeze().numpy()
# vector = image_features.numpy().flatten() # 将特征向量展平
vector /= np.linalg.norm(vector) # 归一化
vector = vector.astype(float).tolist() # 转换为浮点数列表
# OCR 文本提取
ocr_result = ocr.ocr(os.path.join(pic_path, img), cls=True)
if ocr_result is None or ocr_result[0] is None:
return vector, ""
result = [line[1][0] for line in ocr_result[0]]
ocr_text = "" if result is None else "".join(result) # 提取文字内容
return vector, ocr_text
def upload_to_opensearch(image_path, vector, ocr_text):
"""将特征向量和 OCR 文本写入 OpenSearch"""
doc = {
"image_vector": vector, # 转为列表存储
"ocr_text": ocr_text,
"image_path": "http://127.0.0.1:5500/" + image_path
}
response = client.index(index="image-index", body=doc)
print("Document indexed:", response["_id"])
print(client, doc)
if __name__ == '__main__':
imgs = get_img_list()
for img in imgs:
vex, text = extract_vec_text(img)
print(text)
upload_to_opensearch(img, vex, text)
print("--------------------------")
pass
def search_by_image(image_path, top_k=5):
"""以图搜图"""
vector = extract_image_features(image_path)
# print(vector)
query = {
"query": {
"knn": {
"image_vector": {
"vector": vector,
"k": top_k
}
}
}
}
response = client.search(index=index_name, body=query)
return response['hits']['hits']
def extract_image_features(image_path):
"""提取图片特征向量"""
image = Image.open(image_path).convert("RGB")
inputs = preprocess(image).unsqueeze(0).to(device)
image_features = model.encode_image(inputs)
image_features /= image_features.norm(dim=-1, keepdim=True)
return image_features.detach().squeeze().cpu().numpy().astype(float).tolist()
# 以图搜图
image_path = "../ocr/img.png" # 替换为你的图片路径
print("以图搜图结果:")
image_results = search_by_image(image_path)
for hit in image_results:
print(f"Score: {hit['_score']}, Image Path: {hit['_source']['image_path']}")
def search_by_text(query_text, top_k=5):
"""以文搜图"""
query = {
"query": {
"match": {
"ocr_text": {
"query": query_text,
"fuzziness": "AUTO" # 支持模糊匹配
}
}
},
"size": top_k
}
response = client.search(index=index_name, body=query)
return response['hits']['hits']
# 以文搜图
query_text = "计算机" # 替换为你的搜索关键字
print("\n以文搜图结果:")
text_results = search_by_text(query_text)
for hit in text_results:
print(
f"Score: {hit['_score']}, OCR Text: {hit['_source']['ocr_text']}, Image Path: {hit['_source']['image_path']}")
def search_by_text_vertor(query_text, top_k=5):
"""
获取搜索关键字的向量
:param query_text:
:param top_k:
:return:
"""
text = clip.tokenize(query_text.split(",")).to(device)
text_features = model.encode_text(text)
text_features /= text_features.norm(dim=-1, keepdim=True)
vector = text_features.detach().squeeze().cpu().numpy().astype(float).tolist()
query = {
"query": {
"knn": {
"image_vector": {
"vector": vector,
"k": top_k
}
}
}
}
response = client.search(index=index_name, body=query)
return response['hits']['hits']
query_text = "手机" # 替换为你的搜索关键字
print("\n以特征描述搜图结果:")
text_results = search_by_text_vertor(query_text)
for hit in text_results:
print(
f"Score: {hit['_score']}, OCR Text: {hit['_source']['ocr_text']}, Image Path: {hit['_source']['image_path']}")
总共测试两个模型向量化,检索的前两个结果还是很明显的,后面的就不太匹配了。
还需要多找些样本测试。
https://github.com/OFA-Sys/Chinese-CLIP
代码:https://github.com/MrNiebit/langchain/tree/master/picture_search