112 lines
3.1 KiB
Python
112 lines
3.1 KiB
Python
import glob
|
|
|
|
import numpy as np
|
|
import cv2
|
|
from ultralytics import YOLO
|
|
import matplotlib.pyplot as plt
|
|
|
|
# --- Wczytanie kalibracji ---
|
|
data = np.load("calibration_3cams_1.npz")
|
|
|
|
K1, D1 = data["K1"], data["D1"]
|
|
K2, D2 = data["K2"], data["D2"]
|
|
K3, D3 = data["K3"], data["D3"]
|
|
R12, T12 = data["R12"], data["T12"]
|
|
R13, T13 = data["R13"], data["T13"]
|
|
|
|
# Naprawa wymiarów translacji
|
|
T12 = T12.reshape(3,1)
|
|
T13 = T13.reshape(3,1)
|
|
|
|
# Kamera 1 = układ odniesienia
|
|
P1 = K1 @ np.hstack((np.eye(3), np.zeros((3,1))))
|
|
P2 = K2 @ np.hstack((R12, T12))
|
|
P3 = K3 @ np.hstack((R13, T13))
|
|
|
|
# --- Funkcja triangulacji 3D z trzech kamer ---
|
|
def triangulate_three_views(p1, p2, p3):
|
|
"""
|
|
Triangulacja 3D z trzech kamer metodą OpenCV + średnia dla stabilności
|
|
p1, p2, p3: punkty w pikselach (2,)
|
|
"""
|
|
# Zamiana na odpowiedni kształt (2,1)
|
|
p1 = p1.reshape(2,1)
|
|
p2 = p2.reshape(2,1)
|
|
p3 = p3.reshape(2,1)
|
|
|
|
# Triangulacja pary kamer 1-2
|
|
X12_h = cv2.triangulatePoints(P1, P2, p1, p2)
|
|
X12 = (X12_h / X12_h[3])[:3].reshape(3)
|
|
|
|
# Triangulacja pary kamer 1-3
|
|
X13_h = cv2.triangulatePoints(P1, P3, p1, p3)
|
|
X13 = (X13_h / X13_h[3])[:3].reshape(3)
|
|
|
|
# Średnia dla większej stabilności
|
|
X_avg = (X12 + X13) / 2
|
|
return X_avg
|
|
|
|
# --- Wczytanie YOLOv11 Pose ---
|
|
model = YOLO("yolo11x-pose.pt")
|
|
|
|
skeleton = [
|
|
[0,1],[0,2],[1,3],[2,4],[0,5],[0,6],
|
|
[5,7],[7,9],[6,8],[8,10],[5,6],
|
|
[11,12],[12,14],[14,16],[11,13],[13,15]
|
|
]
|
|
|
|
plt.ion() # włączenie trybu interaktywnego
|
|
fig = plt.figure()
|
|
ax = fig.add_subplot(111, projection='3d')
|
|
|
|
# Tworzymy początkowy wykres punktów
|
|
points_plot = ax.scatter([], [], [], c='r', marker='o', s=50)
|
|
lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton]
|
|
|
|
ax.set_xlabel('X')
|
|
ax.set_ylabel('Y')
|
|
ax.set_zlabel('Z')
|
|
ax.view_init(elev=20, azim=-60)
|
|
|
|
points3DList = {}
|
|
|
|
for i in sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group())):
|
|
# Zakładamy, że mamy 3 obrazy z 3 kamer
|
|
data = i.replace(f'video/camA\\', "")
|
|
img1 = cv2.imread(f"video/camA/{data}")
|
|
img2 = cv2.imread(f"video/camB/{data}")
|
|
img3 = cv2.imread(f"video/camC/{data}")
|
|
|
|
# Predykcja keypoints
|
|
results1 = model(img1, verbose=False)[0]
|
|
results2 = model(img2, verbose=False)[0]
|
|
results3 = model(img3, verbose=False)[0]
|
|
|
|
# Zakładamy jedną osobę na scenie
|
|
if len(results1.keypoints.xy) == 0: continue
|
|
if len(results2.keypoints.xy) == 0: continue
|
|
if len(results3.keypoints.xy) == 0: continue
|
|
|
|
yolo_cam1 = results1.keypoints.xy[0].cpu().numpy() # shape (17,2)
|
|
yolo_cam2 = results2.keypoints.xy[0].cpu().numpy()
|
|
yolo_cam3 = results3.keypoints.xy[0].cpu().numpy()
|
|
|
|
# --- Triangulacja wszystkich punktów ---
|
|
points3D = []
|
|
|
|
for i in range(yolo_cam1.shape[0]): # 17 punktów COCO
|
|
p1 = yolo_cam1[i]
|
|
p2 = yolo_cam2[i]
|
|
p3 = yolo_cam3[i]
|
|
|
|
X = triangulate_three_views(p1, p2, p3)
|
|
points3D.append(X)
|
|
|
|
points3D = np.array(points3D)
|
|
print(points3D)
|
|
points3DList[data] = points3D
|
|
|
|
import pickle
|
|
|
|
with open("replay_tpose.pkl", "wb") as f:
|
|
pickle.dump(points3DList, f) |