diff --git a/.idea/JustTwerk.iml b/.idea/JustTwerk.iml
index 022092a..beae115 100644
--- a/.idea/JustTwerk.iml
+++ b/.idea/JustTwerk.iml
@@ -6,8 +6,9 @@
+
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 8506274..ac1a72c 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index d23dc60..191ffd8 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,13 +5,30 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
@@ -41,7 +58,11 @@
"keyToString": {
"ModuleVcsDetector.initialDetectionPerformed": "true",
"Python.02_whole_body_from_image.executor": "Run",
+ "Python.3cams_3D_v2.executor": "Run",
+ "Python.3cams_3d.executor": "Run",
"Python.3d.executor": "Run",
+ "Python.3ddisplay_replay.executor": "Run",
+ "Python.3ddisplay_replay_smoothed.executor": "Run",
"Python.body3d.executor": "Run",
"Python.body3d_pose_lifter_demo.executor": "Run",
"Python.calculate.executor": "Run",
@@ -50,11 +71,18 @@
"Python.filter.executor": "Run",
"Python.is_torch.executor": "Run",
"Python.local_visualizer_3d.executor": "Run",
+ "Python.mac.executor": "Run",
"Python.main.executor": "Run",
+ "Python.moves_3d_mp4.executor": "Run",
"Python.moves_dump.executor": "Run",
+ "Python.moves_videopose3d.executor": "Run",
"Python.openpose.executor": "Run",
"Python.receive_images.executor": "Run",
"Python.receiver.executor": "Run",
+ "Python.record.executor": "Run",
+ "Python.record_one_pose.executor": "Run",
+ "Python.record_video_pose.executor": "Run",
+ "Python.rotate.executor": "Run",
"Python.sender.executor": "Run",
"Python.test.executor": "Run",
"Python.ultralytics-test.executor": "Run",
@@ -69,13 +97,17 @@
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
- "settings.editor.selected.configurable": "configurable.group.editor",
+ "settings.editor.selected.configurable": "preferences.pluginManager",
"vue.rearranger.settings.migration": "true"
}
}]]>
+
+
+
+
@@ -125,6 +157,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -148,6 +204,12 @@
+
+
+
+
+
+
@@ -184,6 +246,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -237,23 +313,34 @@
-
-
-
-
-
-
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/3cams_3D_v2.py b/3cams_3D_v2.py
new file mode 100644
index 0000000..434c672
--- /dev/null
+++ b/3cams_3D_v2.py
@@ -0,0 +1,85 @@
+import glob
+import numpy as np
+import cv2
+import tqdm
+from ultralytics import YOLO
+import matplotlib.pyplot as plt
+
+# --- Wczytanie kalibracji ---
+data = np.load("calib_relative_to_A_3cams.npz")
+
+K_A, D_A = data["K_A"], data["D_A"]
+K_B, D_B = data["K_B"], data["D_B"]
+K_C, D_C = data["K_C"], data["D_C"]
+
+R_AA, T_AA = data["R_AA"], data["T_AA"]
+R_BA, T_BA = data["R_BA"], data["T_BA"]
+R_CA, T_CA = data["R_CA"], data["T_CA"]
+
+# reshape translacji
+T_AA = T_AA.reshape(3,1)
+T_BA = T_BA.reshape(3,1)
+T_CA = T_CA.reshape(3,1)
+
+# Kamera A = układ odniesienia
+P_A = K_A @ np.hstack((np.eye(3), np.zeros((3,1))))
+P_B = K_B @ np.hstack((R_BA, T_BA))
+P_C = K_C @ np.hstack((R_CA, T_CA))
+
+
+def triangulate_three_views(pA, pB, pC):
+ pA = pA.reshape(2,1)
+ pB = pB.reshape(2,1)
+ pC = pC.reshape(2,1)
+
+ XAB_h = cv2.triangulatePoints(P_A, P_B, pA, pB)
+ XAB = (XAB_h / XAB_h[3])[:3].reshape(3)
+
+ XAC_h = cv2.triangulatePoints(P_A, P_C, pA, pC)
+ XAC = (XAC_h / XAC_h[3])[:3].reshape(3)
+
+ return (XAB + XAC)/2
+
+
+# --- YOLO Pose ---
+model = YOLO("yolo11x-pose.pt")
+
+skeleton = [
+ [0,1],[0,2],[1,3],[2,4],[0,5],[0,6],
+ [5,7],[7,9],[6,8],[8,10],[5,6],
+ [11,12],[12,14],[14,16],[11,13],[13,15]
+]
+
+points3DList = {}
+
+frames = sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group()))
+
+for frame in tqdm.tqdm(frames):
+ name = frame.replace('video/camA\\',"")
+ imgA = cv2.imread(f"video/camA/{name}")
+ imgB = cv2.imread(f"video/camB/{name}")
+ imgC = cv2.imread(f"video/camC/{name}")
+
+ rA = model(imgA, verbose=False)[0]
+ rB = model(imgB, verbose=False)[0]
+ rC = model(imgC, verbose=False)[0]
+
+ if len(rA.keypoints.xy)==0: continue
+ if len(rB.keypoints.xy)==0: continue
+ if len(rC.keypoints.xy)==0: continue
+
+ kpA = rA.keypoints.xy[0].cpu().numpy()
+ kpB = rB.keypoints.xy[0].cpu().numpy()
+ kpC = rC.keypoints.xy[0].cpu().numpy()
+
+ pts = []
+ for i in range(kpA.shape[0]):
+ X = triangulate_three_views(kpA[i], kpB[i], kpC[i])
+ pts.append(X)
+
+ pts = np.array(pts)
+ points3DList[name] = pts
+
+import pickle
+with open("replay_tpose.pkl", "wb") as f:
+ pickle.dump(points3DList, f)
diff --git a/3cams_3d.py b/3cams_3d.py
new file mode 100644
index 0000000..72985d3
--- /dev/null
+++ b/3cams_3d.py
@@ -0,0 +1,112 @@
+import glob
+
+import numpy as np
+import cv2
+from ultralytics import YOLO
+import matplotlib.pyplot as plt
+
+# --- Wczytanie kalibracji ---
+data = np.load("calibration_3cams_1.npz")
+
+K1, D1 = data["K1"], data["D1"]
+K2, D2 = data["K2"], data["D2"]
+K3, D3 = data["K3"], data["D3"]
+R12, T12 = data["R12"], data["T12"]
+R13, T13 = data["R13"], data["T13"]
+
+# Naprawa wymiarów translacji
+T12 = T12.reshape(3,1)
+T13 = T13.reshape(3,1)
+
+# Kamera 1 = układ odniesienia
+P1 = K1 @ np.hstack((np.eye(3), np.zeros((3,1))))
+P2 = K2 @ np.hstack((R12, T12))
+P3 = K3 @ np.hstack((R13, T13))
+
+# --- Funkcja triangulacji 3D z trzech kamer ---
+def triangulate_three_views(p1, p2, p3):
+ """
+ Triangulacja 3D z trzech kamer metodą OpenCV + średnia dla stabilności
+ p1, p2, p3: punkty w pikselach (2,)
+ """
+ # Zamiana na odpowiedni kształt (2,1)
+ p1 = p1.reshape(2,1)
+ p2 = p2.reshape(2,1)
+ p3 = p3.reshape(2,1)
+
+ # Triangulacja pary kamer 1-2
+ X12_h = cv2.triangulatePoints(P1, P2, p1, p2)
+ X12 = (X12_h / X12_h[3])[:3].reshape(3)
+
+ # Triangulacja pary kamer 1-3
+ X13_h = cv2.triangulatePoints(P1, P3, p1, p3)
+ X13 = (X13_h / X13_h[3])[:3].reshape(3)
+
+ # Średnia dla większej stabilności
+ X_avg = (X12 + X13) / 2
+ return X_avg
+
+# --- Wczytanie YOLOv11 Pose ---
+model = YOLO("yolo11x-pose.pt")
+
+skeleton = [
+ [0,1],[0,2],[1,3],[2,4],[0,5],[0,6],
+ [5,7],[7,9],[6,8],[8,10],[5,6],
+ [11,12],[12,14],[14,16],[11,13],[13,15]
+]
+
+plt.ion() # włączenie trybu interaktywnego
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+
+# Tworzymy początkowy wykres punktów
+points_plot = ax.scatter([], [], [], c='r', marker='o', s=50)
+lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton]
+
+ax.set_xlabel('X')
+ax.set_ylabel('Y')
+ax.set_zlabel('Z')
+ax.view_init(elev=20, azim=-60)
+
+points3DList = {}
+
+for i in sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group())):
+ # Zakładamy, że mamy 3 obrazy z 3 kamer
+ data = i.replace(f'video/camA\\', "")
+ img1 = cv2.imread(f"video/camA/{data}")
+ img2 = cv2.imread(f"video/camB/{data}")
+ img3 = cv2.imread(f"video/camC/{data}")
+
+ # Predykcja keypoints
+ results1 = model(img1, verbose=False)[0]
+ results2 = model(img2, verbose=False)[0]
+ results3 = model(img3, verbose=False)[0]
+
+ # Zakładamy jedną osobę na scenie
+ if len(results1.keypoints.xy) == 0: continue
+ if len(results2.keypoints.xy) == 0: continue
+ if len(results3.keypoints.xy) == 0: continue
+
+ yolo_cam1 = results1.keypoints.xy[0].cpu().numpy() # shape (17,2)
+ yolo_cam2 = results2.keypoints.xy[0].cpu().numpy()
+ yolo_cam3 = results3.keypoints.xy[0].cpu().numpy()
+
+ # --- Triangulacja wszystkich punktów ---
+ points3D = []
+
+ for i in range(yolo_cam1.shape[0]): # 17 punktów COCO
+ p1 = yolo_cam1[i]
+ p2 = yolo_cam2[i]
+ p3 = yolo_cam3[i]
+
+ X = triangulate_three_views(p1, p2, p3)
+ points3D.append(X)
+
+ points3D = np.array(points3D)
+ print(points3D)
+ points3DList[data] = points3D
+
+import pickle
+
+with open("replay_tpose.pkl", "wb") as f:
+ pickle.dump(points3DList, f)
\ No newline at end of file
diff --git a/3ddisplay_replay.py b/3ddisplay_replay.py
new file mode 100644
index 0000000..5594ed3
--- /dev/null
+++ b/3ddisplay_replay.py
@@ -0,0 +1,57 @@
+import pickle
+
+from matplotlib import pyplot as plt
+
+with open("replay_xyz.pkl", "rb") as f:
+ points3DList = pickle.load(f)
+
+skeleton = [
+ [0, 1], [0, 2], # nose -> eyes
+ [1, 3], [2, 4], # eyes -> ears
+ # [0, 5], [0, 6], # nose -> shoulders
+ [5, 7], [7, 9], # left arm
+ [6, 8], [8, 10], # right arm
+ [5, 6], # shoulders
+ [5, 11], [6, 12], # shoulders -> hips
+ [11, 12], # hips
+ [11, 13], [13, 15], # left leg
+ [12, 14], [14, 16] # right leg
+]
+
+plt.ion() # włączenie trybu interaktywnego
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+
+# Tworzymy początkowy wykres punktów
+points_plot = ax.scatter([], [], [], c='r', marker='o', s=50)
+lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton]
+
+ax.set_xlabel('X')
+ax.set_ylabel('Y')
+ax.set_zlabel('Z')
+ax.set_xlim(-0.6, 0.4)
+ax.set_ylim(1.2, 2.2)
+ax.set_zlim(-0.5, 1.1)
+ax.view_init(elev=20, azim=-60)
+
+i = 0
+
+for points3Dkey in points3DList:
+ points3D = points3DList[points3Dkey]
+ print("3D points:\n", points3D)
+
+ # --- Wizualizacja 3D ---d
+ X = points3D[:,0] - 0.25
+ Z = -points3D[:,1] + 0.5
+ Y = points3D[:,2]
+
+ points_plot._offsets3d = (X, Y, Z)
+
+ # Aktualizacja linii (szkielet)
+ for idx, (i, j) in enumerate(skeleton):
+ lines_plot[idx].set_data([X[i], X[j]], [Y[i], Y[j]])
+ lines_plot[idx].set_3d_properties([Z[i], Z[j]])
+
+ fig.canvas.draw()
+ fig.canvas.flush_events()
+ plt.pause(0.01)
diff --git a/3ddisplay_replay_smoothed.py b/3ddisplay_replay_smoothed.py
new file mode 100644
index 0000000..3223a93
--- /dev/null
+++ b/3ddisplay_replay_smoothed.py
@@ -0,0 +1,58 @@
+from scipy.signal import savgol_filter
+import numpy as np
+import pickle
+from matplotlib import pyplot as plt
+
+with open("replay_xyz.pkl", "rb") as f:
+ points3DList = pickle.load(f)
+
+skeleton = [
+ [0, 1], [0, 2],
+ [1, 3], [2, 4],
+ [5, 7], [7, 9],
+ [6, 8], [8, 10],
+ [5, 6],
+ [5, 11], [6, 12],
+ [11, 12],
+ [11, 13], [13, 15],
+ [12, 14], [14, 16]
+]
+
+keys_sorted = sorted(points3DList.keys())
+points_sequence = np.array([points3DList[k] for k in keys_sorted]) # (frames, points, 3)
+
+# --- Filtr Savitzky-Golaya ---
+window_length = 7 # musi być nieparzyste
+polyorder = 2
+smoothed_sequence = savgol_filter(points_sequence, window_length=window_length,
+ polyorder=polyorder, axis=0, mode='nearest')
+
+plt.ion()
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+
+points_plot = ax.scatter([], [], [], c='r', marker='o', s=50)
+lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton]
+
+ax.set_xlabel('X')
+ax.set_ylabel('Y')
+ax.set_zlabel('Z')
+ax.set_xlim(-0.6, 0.4)
+ax.set_ylim(1.2, 2.2)
+ax.set_zlim(-0.5, 1.1)
+ax.view_init(elev=20, azim=-60)
+
+for frame_points in smoothed_sequence:
+ X = frame_points[:,0] - 0.25
+ Z = -frame_points[:,1] + 0.5
+ Y = frame_points[:,2]
+
+ points_plot._offsets3d = (X, Y, Z)
+
+ for idx, (i, j) in enumerate(skeleton):
+ lines_plot[idx].set_data([X[i], X[j]], [Y[i], Y[j]])
+ lines_plot[idx].set_3d_properties([Z[i], Z[j]])
+
+ fig.canvas.draw()
+ fig.canvas.flush_events()
+ plt.pause(0.001)
diff --git a/calculate.py b/calculate.py
index c272cf2..db52ce5 100644
--- a/calculate.py
+++ b/calculate.py
@@ -52,7 +52,7 @@ def compare_poses(f1, f2):
def compare_poses_boolean(f1, f2):
l2, cos_sim = compare_poses(f1, f2)
- return l2 < 0.7 and cos_sim > 0.90
+ return l2 < 1.2 and cos_sim > 0.85
def center(keypoints):
mid_hip = (keypoints[11] + keypoints[12]) / 2 # left_hip=11, right_hip=12
diff --git a/checkpoint/pretrained_h36m_detectron_coco.bin b/checkpoint/pretrained_h36m_detectron_coco.bin
new file mode 100644
index 0000000..9fa1c30
Binary files /dev/null and b/checkpoint/pretrained_h36m_detectron_coco.bin differ
diff --git a/main.py b/main.py
index 84c9689..3df657a 100644
--- a/main.py
+++ b/main.py
@@ -1,10 +1,15 @@
+import json
+import os
import pickle
+import socket
import sys
+import torch
from ultralytics import YOLO
import cv2
import time
+import poses
import utils
from calculate import normalize_pose, compare_poses_boolean
from draw import draw_new
@@ -12,6 +17,7 @@ from utils import find_closest
from video_methods import initialize_method
model = YOLO("yolo11s-pose.pt")
+model.to(torch.device('cuda:0'))
if len(sys.argv) == 2:
method_type = sys.argv[1]
@@ -40,123 +46,174 @@ def main():
mehCount = 0
goodCount = 0
failCount = 0
- failRate = 2
+ failRate = 10
+
+ server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ server_socket.bind(("0.0.0.0", 13425))
+ server_socket.listen()
+
+ print("czekam na clienta...")
+
+ client_socket, _ = server_socket.accept()
+ print("mam clienta!")
+
+ data = client_socket.recv(1024).decode()
+ if not data.startswith("id_"):
+ client_socket.close()
+ server_socket.close()
+ return
+
+ map = data.replace("id_", "").replace("\n", "").strip()
+ if not os.path.isfile(f'moves_{map}.pkl'):
+ print(map)
+ print("moves_" + map + ".pkl")
+ client_socket.sendall("not_exists".encode())
+ client_socket.close()
+ server_socket.close()
+ return
moves = []
- with open('moves.pkl', 'rb') as f: # 'rb' = read binary
+ with open(f'moves_{map}.pkl', 'rb') as f: # 'rb' = read binary
moves = pickle.load(f)
startValue = moves[0][0]
totalCount = len(moves)
for i, move in enumerate(moves):
- moves[i] = (move[0] - startValue, move[1], move[2])
+ moves[i] = ((move[0] - startValue) / 1000, move[1], move[2])
- print(moves)
+ currIndex = 1
+ currTimeIndex = time.time()
+ deltaTime = time.time()
+ currStatus = f"Zaczoles tanczyc {currIndex}"
+ currMove = moves[0]
+
+
+ doStreak = False
+ streak = 0
+ doing = 0
+ actuallyDoing = False
while True:
+ doing += 1
frame = method.receive_frame()
-
frame = cv2.flip(frame, 1)
-
results = model(frame, verbose=False)
+ if not actuallyDoing:
+ client_socket.sendall("start".encode())
+ actuallyDoing = True
+
current_time = time.time()
delta = current_time - last_time
last_time = current_time
+ if doing % 30 == 0:
+ if doStreak:
+ streak += 5
+ client_socket.sendall(f"streak_{streak}".encode())
+ else:
+ streak = 0
+ client_socket.sendall(f"streak_0".encode())
+
fps = 1 / delta if delta > 0 else float('inf')
# print(f"\rDelta: {delta:.4f}s, FPS: {fps:.2f}", end="")
- if len(results) == 0:
- continue
+ if len(results) != 0:
- result = results[0]
- kpts = result.keypoints.data[0] if len(result.keypoints.data) else None
+ result = results[0]
+ kpts = result.keypoints.data[0] if len(result.keypoints.data) else None
- if kpts is None:
- continue
+ if kpts is None:
+ continue
- img = frame
+ img = frame
- normalized = normalize_pose(result.keypoints.xy.cpu().numpy()[0])
+ normalized = normalize_pose(result.keypoints.xy.cpu().numpy()[0])
- draw = utils.normalize(result.keypoints.xy.cpu().numpy()[0])
- cv2.imshow('you', draw_new(draw * 100 + 100))
+ draw = utils.normalize(result.keypoints.xy.cpu().numpy()[0])
+ cv2.imshow('you', draw_new(draw * 100 + 100))
- if currTimeIndex != 0 and moves.index(find_closest(moves, time.time() - currTimeIndex)) == len(moves) - 1:
- mehCount = totalCount - failCount - goodCount
+ if currTimeIndex != 0 and moves.index(find_closest(moves, time.time() - currTimeIndex)) == len(moves) - 1:
+ mehCount = abs(totalCount - (failCount + goodCount))
- print(
- f"PODSUMOWANIE: FAIL {failCount} MEH: {mehCount} PERFECT: {goodCount} PERCENTAGE: {(goodCount + (0.95 * mehCount)) / totalCount * 100}%")
- exit(1)
+ stats = {
+ "failCount": failCount,
+ "goodCount": goodCount,
+ "mehCount": mehCount,
+ "percentage": (goodCount + (0.85 * mehCount)) / totalCount * 100
+ }
- if currMove is None:
- if compare_poses_boolean(moves[0][1], normalized):
- currIndex = 1
- currTimeIndex = time.time()
- deltaTime = time.time()
- currStatus = f"Zaczoles tanczyc {currIndex}"
- currMove = moves[0]
+ client_socket.sendall(f"finish_{json.dumps(stats)}".encode())
+ print(
+ f"PODSUMOWANIE: FAIL {failCount} MEH: {mehCount} PERFECT: {goodCount} PERCENTAGE: {(goodCount + (0.85 * mehCount)) / totalCount * 100}%")
+ cv2.destroyAllWindows()
+ break
+ # thread = Thread(target=print_animation, args=(moves, False))
+ # thread.start()
+ else:
+ changed = False
- # thread = Thread(target=print_animation, args=(moves, False))
- # thread.start()
- else:
- changed = False
+ closest = find_closest(moves, time.time() - currTimeIndex)
+ cv2.imshow('Dots', draw_new(utils.normalize(closest[2]) * 250 + 250))
- closest = find_closest(moves, time.time() - currTimeIndex)
- cv2.imshow('Dots', draw_new(closest[2]))
+ if abs((time.time() - currTimeIndex) - moves[currIndex][0]) > failRate:
+ currStatus = f"FAIL!"
+ failCount += 1
+ doStreak = False
- if abs((time.time() - currTimeIndex) - moves[currIndex][0]) > failRate:
- currStatus = f"FAIL!"
- failCount += 1
+ if compare_poses_boolean(closest[1], normalized):
+ # delays += (time.time() - deltaTime - moves[0][0]) * 1000
+ # delaysCount += 1
- if compare_poses_boolean(closest[1], normalized):
- # delays += (time.time() - deltaTime - moves[0][0]) * 1000
- # delaysCount += 1
+ # currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms"
+ deltaTime = time.time()
- currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms"
- deltaTime = time.time()
+ currIndex = moves.index(closest) + 1
+ goodCount += 1
+ changed = True
+ doStreak = True
- currIndex = moves.index(closest) + 1
- goodCount += 1
- changed = True
+ if not changed and compare_poses_boolean(moves[currIndex][1], normalized):
+ # delays += (time.time() - deltaTime - moves[0][0]) * 1000
+ # delaysCount += 1
- if not changed and compare_poses_boolean(moves[currIndex][1], normalized):
- # delays += (time.time() - deltaTime - moves[0][0]) * 1000
- # delaysCount += 1
+ # currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms"
+ deltaTime = time.time()
- currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms"
- deltaTime = time.time()
+ changed = True
- changed = True
+ currIndex += 1
+ goodCount += 1
+ doStreak = True
- currIndex += 1
- goodCount += 1
+ # if do_pose_shot:
+ # moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0]))
+ # elif len(moves) != 0:
+ # with open('moves.pkl', 'wb') as f: # 'wb' = write binary
+ # pickle.dump
+ # (moves, f)
+ #
+ # exit(1)
- # if do_pose_shot:
- # moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0]))
- # elif len(moves) != 0:
- # with open('moves.pkl', 'wb') as f: # 'wb' = write binary
- # pickle.dump
- # (moves, f)
- #
- # exit(1)
-
- cv2.putText(
- img, # obraz
- currStatus, # tekst
- (50, 100), # pozycja (x, y) lewego dolnego rogu tekstu
- cv2.FONT_HERSHEY_SIMPLEX, # czcionka
- 1, # rozmiar (skalowanie)
- (0, 0, 255), # kolor (BGR) - tutaj czerwony
- 2, # grubość linii
- cv2.LINE_AA # typ antyaliasingu
- )
+ cv2.putText(
+ img, # obraz
+ currStatus, # tekst
+ (50, 100), # pozycja (x, y) lewego dolnego rogu tekstu
+ cv2.FONT_HERSHEY_SIMPLEX, # czcionka
+ 1, # rozmiar (skalowanie)
+ (0, 0, 255), # kolor (BGR) - tutaj czerwony
+ 2, # grubość linii
+ cv2.LINE_AA # typ antyaliasingu
+ )
cv2.imshow('Klatka z kamerki', img)
cv2.setMouseCallback('Klatka z kamerki', click_event)
cv2.waitKey(1) # Czekaj na naciśnięcie klawisza
-main()
\ No newline at end of file
+try:
+ while True:
+ main()
+except KeyboardInterrupt:
+ pass
\ No newline at end of file
diff --git a/moves.pkl b/moves.pkl
index 2ed77d7..b732468 100644
Binary files a/moves.pkl and b/moves.pkl differ
diff --git a/moves_3d_mp4.py b/moves_3d_mp4.py
index ffba887..d753a7f 100644
--- a/moves_3d_mp4.py
+++ b/moves_3d_mp4.py
@@ -1,33 +1,27 @@
-import cv2
-import mediapipe as mp
-import matplotlib
-matplotlib.use("Agg") # <-- ważne: wyłącza GUI
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-import numpy as np
+import pickle
+import time
-# ---------------------
-# Wideo wejściowe
-# ---------------------
+import cv2
+from ultralytics import YOLO
+
+from calculate import normalize_pose
+from utils import normalize
+
+# Wczytanie wideo
cap = cv2.VideoCapture("input.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
-width = 640
-height = 640
-
-# ---------------------
-# Wideo wyjściowe
-# ---------------------
-fourcc = cv2.VideoWriter_fourcc(*"MJPG")
-
+width = 1920
+height = 1080
+# Ustawienia zapisu wideo
+fourcc = cv2.VideoWriter_fourcc(*"avc1")
out = cv2.VideoWriter("output.mp4", fourcc, fps, (width, height))
-# ---------------------
-# MediaPipe Pose
-# ---------------------
-mp_pose = mp.solutions.pose
-pose = mp_pose.Pose(static_image_mode=False, model_complexity=1)
+# Wczytanie modelu YOLOv8 Pose
+model = YOLO("yolo11x-pose.pt", verbose=False) # Twój model pose
+moves = []
+started = False
frame_id = 0
while True:
@@ -35,58 +29,32 @@ while True:
if not ok:
break
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
- results = pose.process(rgb)
+ # Skalowanie do 640x640
+ frame_resized = cv2.resize(frame, (width, height))
- # -----------------------------------------
- # 3D landmarki: pose_world_landmarks
- # -----------------------------------------
- if results.pose_world_landmarks:
- lm = results.pose_world_landmarks.landmark
+ # Wykrywanie poz
+ results = model.predict(frame_resized, verbose=False)
- xs = np.array([p.x for p in lm])
- ys = np.array([p.y for p in lm])
- zs = np.array([p.z for p in lm])
+ # Rysowanie punktów 2D
+ for result in results:
+ if result.keypoints is not None and len(result.keypoints.xy) > 0:
+ for keypoint in result.keypoints.xy[0]: # keypoints[0] bo dla jednej osoby
+ if not started:
+ frame_id = 0
+ started = True
+ x, y = keypoint # współrzędne + confidence
+ x = int(x)
+ y = int(y)
+ cv2.circle(frame_resized, (x, y), 5, (0, 255, 0), -1) # zielone kropki
- # -----------------------------
- # RYSOWANIE 3D w Matplotlib
- # -----------------------------
- fig = plt.figure(figsize=(6.4, 6.4), dpi=100)
- ax = fig.add_subplot(111, projection="3d")
-
- ax.scatter(xs, zs, ys, s=20)
-
- ax.set_xlim([-1, 1])
- ax.set_ylim([-1, 1])
- ax.set_zlim([-1, 1])
-
- ax.set_xlabel("X")
- ax.set_ylabel("Y")
- ax.set_zlabel("Z")
- ax.invert_zaxis()
-
- # -----------------------------------------
- # Konwersja wykresu Matplotlib → klatka do MP4
- # -----------------------------------------
- fig.canvas.draw()
- renderer = fig.canvas.get_renderer()
-
- w, h = fig.canvas.get_width_height()
-
- buf = renderer.buffer_rgba()
- plot_img = np.frombuffer(buf, dtype=np.uint8).reshape((h, w, 4))[:, :, :3]
-
-
- plt.close(fig)
-
- # Dopasowanie rozmiaru do wideo
- plot_img = cv2.resize(plot_img, (width, height))
- plot_img = cv2.cvtColor(plot_img, cv2.COLOR_RGB2BGR)
-
- out.write(plot_img)
+ moves.append((frame_id * 1 / fps, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), normalize(result.keypoints.xy.cpu()[0]) * 160 + 250))
+ out.write(frame_resized)
frame_id += 1
+with open('moves2.pkl', 'wb') as f: # 'wb' = write binary
+ pickle.dump(moves, f)
+
cap.release()
out.release()
print("Zapisano: output.mp4")
diff --git a/moves_dump.py b/moves_dump.py
index 238c241..9961205 100644
--- a/moves_dump.py
+++ b/moves_dump.py
@@ -8,11 +8,26 @@ import numpy as np
import utils
from draw import draw_new
-moves = []
+# moves = {}
better_moves = {}
-with open('moves.pkl', 'rb') as f: # 'rb' = read binary
+with open('replay_tpose.pkl', 'rb') as f: # 'rb' = read binary
moves = pickle.load(f)
+ movesCopy = {}
+
+ for move in moves:
+ # listx = moves[move].tolist()
+ # print(type(listx))
+
+ # if type(listx) != list:
+ # listx = listx.tolist()
+
+ movesCopy[move.replace(".jpg", "")] = moves[move].tolist()
+
+ with open("plikv10.json", "w", encoding="utf-8") as f:
+ json.dump(movesCopy, f)
+
+exit(1)
startValue = moves[0][0]
totalCount = len(moves)
@@ -35,8 +50,13 @@ for i, move in enumerate(moves):
# Do rysowania (np. przesunięcie na ekran)
- draw = utils.normalize(move[2]) * 200 + 250
+ draw = utils.normalize(move[2])
+ better_moves[round((move[0] - startValue) * 1000)] = draw.tolist()
- cv2.imshow('you', draw_new(draw))
- cv2.waitKey(1)
- time.sleep(0.1)
+ # cv2.imshow('you', draw_new(draw))
+ # cv2.waitKey(1)
+ # time.sleep(0.1)
+
+
+with open("plik-234.json", "w", encoding="utf-8") as f:
+ json.dump(better_moves, f)
diff --git a/moves_dump_2.py b/moves_dump_2.py
new file mode 100644
index 0000000..63a5f30
--- /dev/null
+++ b/moves_dump_2.py
@@ -0,0 +1,47 @@
+import json
+import pickle
+import time
+
+import cv2
+import numpy as np
+
+import utils
+from draw import draw_new
+
+moves = []
+better_moves = {}
+
+with open('moves.pkl', 'rb') as f: # 'rb' = read binary
+ moves = pickle.load(f)
+
+startValue = moves[0][0]
+totalCount = len(moves)
+
+for i, move in enumerate(moves):
+ moves[i] = (move[0] - startValue, move[1], move[2])
+
+ # left_hip = move[2][11] # Left Hip
+ # right_hip = move[2][12] # Right Hip
+ # center = (left_hip + right_hip) / 2
+ #
+ # # Normalizacja względem środka ciała
+ # normalized_keypoints = move[2] - center
+ #
+ # better_moves[round((move[0] - startValue) * 1000)] = normalized_keypoints.tolist()
+ #
+ # # scale = utils.distance(move[2][11], move[2][12])
+ # # print(scale)
+ # draw = normalized_keypoints + 200
+
+
+ # Do rysowania (np. przesunięcie na ekran)
+ draw = utils.normalize(move[2])
+ better_moves[round((move[0] - startValue) * 1000)] = draw.tolist()
+
+ # cv2.imshow('you', draw_new(draw))
+ # cv2.waitKey(1)
+ # time.sleep(0.1)
+
+
+with open("plik.json", "w", encoding="utf-8") as f:
+ json.dump(better_moves, f)
diff --git a/moves_videopose3d.py b/moves_videopose3d.py
new file mode 100644
index 0000000..1f19685
--- /dev/null
+++ b/moves_videopose3d.py
@@ -0,0 +1,116 @@
+import cv2
+import torch
+import numpy as np
+from common.model import TemporalModel
+from common.camera import *
+# from common.utils import evaluate
+from ultralytics import YOLO
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D # not strictly needed in newer matplotlib
+import time
+
+# --- 1. Inicjalizacja modelu 3D VideoPose3D ---
+model_3d = TemporalModel(
+ num_joints_in=17,
+ in_features=2,
+ num_joints_out=17,
+ filter_widths=[3,3,3,3],
+ causal=False
+)
+
+chk = torch.load("checkpoint/pretrained_h36m_detectron_coco.bin", map_location='cpu')
+model_3d.load_state_dict(chk, strict=False)
+model_3d.eval()
+
+# --- 2. Inicjalizacja modelu YOLO (pose keypoints) ---
+yolo = YOLO('yolo11s-pose.pt') # używamy najmniejszej wersji dla szybkości
+
+# --- 3. Wczytanie wideo ---
+cap = cv2.VideoCapture("input.mp4")
+frame_buffer = []
+BUFFER_SIZE = 243 # VideoPose3D potrzebuje sekwencji
+
+fig = plt.figure(figsize=(5, 5))
+ax = fig.add_subplot(111, projection='3d')
+
+# inicjalizacja scatter i linii szkieletu
+scatter = ax.scatter([], [], [], c='r')
+
+skeleton = [ (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14), (7, 10), (10, 11), (11, 12) ]
+
+skeleton_lines = []
+for _ in skeleton:
+ line, = ax.plot([], [], [], c='b')
+ skeleton_lines.append(line)
+
+ax.set_xlim3d(-1, 1)
+ax.set_ylim3d(-1, 1)
+ax.set_zlim3d(0, 2)
+ax.view_init(elev=20, azim=-70)
+plt.ion()
+plt.show()
+
+while True:
+ ret, frame = cap.read()
+ if not ret:
+ break
+
+ # --- 4. Detekcja keypointów z YOLO ---
+ results = yolo(frame)
+ if len(results) == 0 or len(results[0].keypoints.xy) == 0:
+ continue
+
+ # Zakładamy 1 osobę na klatkę (dla uproszczenia)
+ keypoints = results[0].keypoints.xy[0] # shape [17, 2]
+ keypoints = np.array(keypoints)
+
+ # Normalizacja do [0,1] (opcjonalnie zależnie od VideoPose3D)
+ keypoints[:, 0] /= frame.shape[1]
+ keypoints[:, 1] /= frame.shape[0]
+
+ frame_buffer.append(keypoints)
+
+ # --- 5. Jeśli mamy pełną sekwencję, predykcja 3D ---
+ skeleton = [
+ (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6),
+ (0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14),
+ (7, 10), (10, 11), (11, 12)
+ ]
+
+ # --- after getting pred_3d ---
+ if len(frame_buffer) == BUFFER_SIZE:
+ seq_2d = torch.tensor(np.array(frame_buffer)).unsqueeze(0).float()
+ with torch.no_grad():
+ pred_3d = model_3d(seq_2d)
+
+ pose_3d = pred_3d[0, -1].numpy() # [17,3]
+
+ # --- 2D overlay ---
+ # for i, kp in enumerate(frame_buffer[-1]):
+ # x, y = int(kp[0] * frame.shape[1]), int(kp[1] * frame.shape[0])
+ # cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
+ # cv2.imshow("2D Pose", frame)
+ # cv2.waitKey(1)
+
+ pose_3d = pose_3d[:, [0, 2, 1]] # X, Z, Y
+ pose_3d[:, 2] *= -1
+
+ # --- 3D update ---
+ xs, ys, zs = pose_3d[:, 0], pose_3d[:, 1], pose_3d[:, 2]
+
+ # update scatter
+ scatter._offsets3d = (xs, ys, zs)
+
+ # update skeleton lines
+ for idx, (a, b) in enumerate(skeleton):
+ skeleton_lines[idx].set_data([xs[a], xs[b]], [ys[a], ys[b]])
+ skeleton_lines[idx].set_3d_properties([zs[a], zs[b]])
+
+ plt.draw()
+ plt.pause(0.001)
+ print(pose_3d.tolist())
+
+ frame_buffer.pop(0)
+
+cap.release()
+cv2.destroyAllWindows()
diff --git a/poses.py b/poses.py
new file mode 100644
index 0000000..c745237
--- /dev/null
+++ b/poses.py
@@ -0,0 +1 @@
+t_pose = [ 0.079178 , 0.1963 , 0.098774 , 0.033032 , 0.99646 , 0.08401 , 0.99999, 0.0049546, -0.99134 , 0.13132, -0.99791, -0.064541, 0.063086, 0.99801, -0.03562, 0.99936, -0.012939, 0.99992, 0.02004, 0.9998]
\ No newline at end of file
diff --git a/record.py b/record.py
new file mode 100644
index 0000000..ba3ea2e
--- /dev/null
+++ b/record.py
@@ -0,0 +1,68 @@
+import pickle
+import sys
+
+import torch
+from ultralytics import YOLO
+import cv2
+import time
+
+import utils
+from calculate import normalize_pose, compare_poses_boolean
+from draw import draw_new
+from utils import find_closest
+from video_methods import initialize_method
+
+model = YOLO("yolo11x-pose.pt")
+model.to(torch.device('cuda:0'))
+
+if len(sys.argv) == 2:
+ method_type = sys.argv[1]
+else:
+ print("Podaj argument 'cam', albo 'net'.")
+ exit(1)
+
+method = initialize_method(method_type)
+
+do_pose_shot = False
+startTime = 0
+
+def click_event(event, x, y, flags, param):
+ global do_pose_shot, startTime
+
+ if event == cv2.EVENT_LBUTTONDOWN: # lewy przycisk myszy
+ do_pose_shot = not do_pose_shot
+ if do_pose_shot:
+ startTime = time.time()
+
+def main():
+ moves = []
+
+ while True:
+ frame = method.receive_frame()
+
+ frame = cv2.flip(frame, 1)
+ results = model(frame, verbose=False)
+
+ if len(results) != 0:
+
+ result = results[0]
+ kpts = result.keypoints.data[0] if len(result.keypoints.data) else None
+
+ if kpts is None:
+ continue
+
+ img = frame
+
+ if do_pose_shot:
+ moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0]))
+ elif len(moves) != 0:
+ with open('moves.pkl', 'wb') as f: # 'wb' = write binary
+ pickle.dump(moves, f)
+ exit(1)
+
+
+ cv2.imshow('Klatka z kamerki', img)
+ cv2.setMouseCallback('Klatka z kamerki', click_event)
+ cv2.waitKey(1) # Czekaj na naciśnięcie klawisza
+
+main()
\ No newline at end of file
diff --git a/record_one_pose.py b/record_one_pose.py
new file mode 100644
index 0000000..e83d59c
--- /dev/null
+++ b/record_one_pose.py
@@ -0,0 +1,58 @@
+import pickle
+import sys
+
+import torch
+from ultralytics import YOLO
+import cv2
+import time
+
+import utils
+from calculate import normalize_pose, compare_poses_boolean
+from draw import draw_new
+from utils import find_closest
+from video_methods import initialize_method
+
+model = YOLO("yolo11x-pose.pt")
+model.to(torch.device('cuda:0'))
+
+method = initialize_method("cam")
+
+do_pose_shot = False
+startTime = 0
+
+def click_event(event, x, y, flags, param):
+ global do_pose_shot, startTime
+
+ if event == cv2.EVENT_LBUTTONDOWN: # lewy przycisk myszy
+ do_pose_shot = not do_pose_shot
+ if do_pose_shot:
+ startTime = time.time()
+
+def main():
+ moves = []
+
+ while True:
+ frame = method.receive_frame()
+
+ frame = cv2.flip(frame, 1)
+ results = model(frame, verbose=False)
+
+ if len(results) != 0:
+
+ result = results[0]
+ kpts = result.keypoints.data[0] if len(result.keypoints.data) else None
+
+ if kpts is None:
+ continue
+
+ img = frame
+
+ if do_pose_shot:
+ print(normalize_pose(result.keypoints.xy.cpu().numpy()[0]))
+ exit(0)
+
+ cv2.imshow('Klatka z kamerki', img)
+ cv2.setMouseCallback('Klatka z kamerki', click_event)
+ cv2.waitKey(1) # Czekaj na naciśnięcie klawisza
+
+main()
\ No newline at end of file
diff --git a/record_video_pose.py b/record_video_pose.py
new file mode 100644
index 0000000..d3638b7
--- /dev/null
+++ b/record_video_pose.py
@@ -0,0 +1,40 @@
+import glob
+import pickle
+from tqdm import tqdm
+
+import torch
+from ultralytics import YOLO
+import cv2
+import time
+
+import utils
+from calculate import normalize_pose, compare_poses_boolean
+from draw import draw_new
+from utils import find_closest
+from video_methods import initialize_method
+
+model = YOLO("yolo11x-pose.pt")
+model.to(torch.device('cuda:0'))
+
+startTime = 0
+def main():
+ moves = []
+
+ for i in tqdm(sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group()))):
+ data = i.replace(f'video/camA\\', "")
+ frame = cv2.imread(f"video/camA/{data}")
+ results = model(frame, verbose=False)
+
+ if len(results) != 0:
+ result = results[0]
+ kpts = result.keypoints.data[0] if len(result.keypoints.data) else None
+
+ if kpts is None:
+ continue
+
+ moves.append((int(data.replace(".jpg", "")), normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0]))
+
+ with open('moves_makarena_best.pkl', 'wb') as f: # 'wb' = write binary
+ pickle.dump(moves, f)
+
+main()
\ No newline at end of file
diff --git a/rotate.py b/rotate.py
new file mode 100644
index 0000000..94aef05
--- /dev/null
+++ b/rotate.py
@@ -0,0 +1,19 @@
+import cv2
+import os
+
+folder = r"video/camC" # ← podaj swoją ścieżkę
+
+# rozszerzenia jakie chcesz obracać
+ext = (".jpg", ".jpeg", ".png")
+
+for filename in os.listdir(folder):
+ if filename.lower().endswith(ext):
+ path = os.path.join(folder, filename)
+
+ img = cv2.imread(path)
+ rotated = cv2.rotate(img, cv2.ROTATE_180)
+
+ cv2.imwrite(path, rotated) # nadpisanie pliku
+ print(f"Obrócono: {filename}")
+
+print("Gotowe ✔️")