v2 working
This commit is contained in:
116
moves_videopose3d.py
Normal file
116
moves_videopose3d.py
Normal file
@ -0,0 +1,116 @@
|
||||
import cv2
|
||||
import torch
|
||||
import numpy as np
|
||||
from common.model import TemporalModel
|
||||
from common.camera import *
|
||||
# from common.utils import evaluate
|
||||
from ultralytics import YOLO
|
||||
import matplotlib.pyplot as plt
|
||||
from mpl_toolkits.mplot3d import Axes3D # not strictly needed in newer matplotlib
|
||||
import time
|
||||
|
||||
# --- 1. Inicjalizacja modelu 3D VideoPose3D ---
|
||||
model_3d = TemporalModel(
|
||||
num_joints_in=17,
|
||||
in_features=2,
|
||||
num_joints_out=17,
|
||||
filter_widths=[3,3,3,3],
|
||||
causal=False
|
||||
)
|
||||
|
||||
chk = torch.load("checkpoint/pretrained_h36m_detectron_coco.bin", map_location='cpu')
|
||||
model_3d.load_state_dict(chk, strict=False)
|
||||
model_3d.eval()
|
||||
|
||||
# --- 2. Inicjalizacja modelu YOLO (pose keypoints) ---
|
||||
yolo = YOLO('yolo11s-pose.pt') # używamy najmniejszej wersji dla szybkości
|
||||
|
||||
# --- 3. Wczytanie wideo ---
|
||||
cap = cv2.VideoCapture("input.mp4")
|
||||
frame_buffer = []
|
||||
BUFFER_SIZE = 243 # VideoPose3D potrzebuje sekwencji
|
||||
|
||||
fig = plt.figure(figsize=(5, 5))
|
||||
ax = fig.add_subplot(111, projection='3d')
|
||||
|
||||
# inicjalizacja scatter i linii szkieletu
|
||||
scatter = ax.scatter([], [], [], c='r')
|
||||
|
||||
skeleton = [ (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14), (7, 10), (10, 11), (11, 12) ]
|
||||
|
||||
skeleton_lines = []
|
||||
for _ in skeleton:
|
||||
line, = ax.plot([], [], [], c='b')
|
||||
skeleton_lines.append(line)
|
||||
|
||||
ax.set_xlim3d(-1, 1)
|
||||
ax.set_ylim3d(-1, 1)
|
||||
ax.set_zlim3d(0, 2)
|
||||
ax.view_init(elev=20, azim=-70)
|
||||
plt.ion()
|
||||
plt.show()
|
||||
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# --- 4. Detekcja keypointów z YOLO ---
|
||||
results = yolo(frame)
|
||||
if len(results) == 0 or len(results[0].keypoints.xy) == 0:
|
||||
continue
|
||||
|
||||
# Zakładamy 1 osobę na klatkę (dla uproszczenia)
|
||||
keypoints = results[0].keypoints.xy[0] # shape [17, 2]
|
||||
keypoints = np.array(keypoints)
|
||||
|
||||
# Normalizacja do [0,1] (opcjonalnie zależnie od VideoPose3D)
|
||||
keypoints[:, 0] /= frame.shape[1]
|
||||
keypoints[:, 1] /= frame.shape[0]
|
||||
|
||||
frame_buffer.append(keypoints)
|
||||
|
||||
# --- 5. Jeśli mamy pełną sekwencję, predykcja 3D ---
|
||||
skeleton = [
|
||||
(0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6),
|
||||
(0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14),
|
||||
(7, 10), (10, 11), (11, 12)
|
||||
]
|
||||
|
||||
# --- after getting pred_3d ---
|
||||
if len(frame_buffer) == BUFFER_SIZE:
|
||||
seq_2d = torch.tensor(np.array(frame_buffer)).unsqueeze(0).float()
|
||||
with torch.no_grad():
|
||||
pred_3d = model_3d(seq_2d)
|
||||
|
||||
pose_3d = pred_3d[0, -1].numpy() # [17,3]
|
||||
|
||||
# --- 2D overlay ---
|
||||
# for i, kp in enumerate(frame_buffer[-1]):
|
||||
# x, y = int(kp[0] * frame.shape[1]), int(kp[1] * frame.shape[0])
|
||||
# cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
|
||||
# cv2.imshow("2D Pose", frame)
|
||||
# cv2.waitKey(1)
|
||||
|
||||
pose_3d = pose_3d[:, [0, 2, 1]] # X, Z, Y
|
||||
pose_3d[:, 2] *= -1
|
||||
|
||||
# --- 3D update ---
|
||||
xs, ys, zs = pose_3d[:, 0], pose_3d[:, 1], pose_3d[:, 2]
|
||||
|
||||
# update scatter
|
||||
scatter._offsets3d = (xs, ys, zs)
|
||||
|
||||
# update skeleton lines
|
||||
for idx, (a, b) in enumerate(skeleton):
|
||||
skeleton_lines[idx].set_data([xs[a], xs[b]], [ys[a], ys[b]])
|
||||
skeleton_lines[idx].set_3d_properties([zs[a], zs[b]])
|
||||
|
||||
plt.draw()
|
||||
plt.pause(0.001)
|
||||
print(pose_3d.tolist())
|
||||
|
||||
frame_buffer.pop(0)
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
Reference in New Issue
Block a user