diff --git a/.idea/JustTwerk.iml b/.idea/JustTwerk.iml index 022092a..beae115 100644 --- a/.idea/JustTwerk.iml +++ b/.idea/JustTwerk.iml @@ -6,8 +6,9 @@ + - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 8506274..ac1a72c 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ - + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml index d23dc60..191ffd8 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,13 +5,30 @@ - + + + + + + + + + + + + + + + + + - - + + + + + + + @@ -125,6 +157,30 @@ - - - - - - + + + - + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/3cams_3D_v2.py b/3cams_3D_v2.py new file mode 100644 index 0000000..434c672 --- /dev/null +++ b/3cams_3D_v2.py @@ -0,0 +1,85 @@ +import glob +import numpy as np +import cv2 +import tqdm +from ultralytics import YOLO +import matplotlib.pyplot as plt + +# --- Wczytanie kalibracji --- +data = np.load("calib_relative_to_A_3cams.npz") + +K_A, D_A = data["K_A"], data["D_A"] +K_B, D_B = data["K_B"], data["D_B"] +K_C, D_C = data["K_C"], data["D_C"] + +R_AA, T_AA = data["R_AA"], data["T_AA"] +R_BA, T_BA = data["R_BA"], data["T_BA"] +R_CA, T_CA = data["R_CA"], data["T_CA"] + +# reshape translacji +T_AA = T_AA.reshape(3,1) +T_BA = T_BA.reshape(3,1) +T_CA = T_CA.reshape(3,1) + +# Kamera A = układ odniesienia +P_A = K_A @ np.hstack((np.eye(3), np.zeros((3,1)))) +P_B = K_B @ np.hstack((R_BA, T_BA)) +P_C = K_C @ np.hstack((R_CA, T_CA)) + + +def triangulate_three_views(pA, pB, pC): + pA = pA.reshape(2,1) + pB = pB.reshape(2,1) + pC = pC.reshape(2,1) + + XAB_h = cv2.triangulatePoints(P_A, P_B, pA, pB) + XAB = (XAB_h / XAB_h[3])[:3].reshape(3) + + XAC_h = cv2.triangulatePoints(P_A, P_C, pA, pC) + XAC = (XAC_h / XAC_h[3])[:3].reshape(3) + + return (XAB + XAC)/2 + + +# --- YOLO Pose --- +model = YOLO("yolo11x-pose.pt") + +skeleton = [ + [0,1],[0,2],[1,3],[2,4],[0,5],[0,6], + [5,7],[7,9],[6,8],[8,10],[5,6], + [11,12],[12,14],[14,16],[11,13],[13,15] +] + +points3DList = {} + +frames = sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group())) + +for frame in tqdm.tqdm(frames): + name = frame.replace('video/camA\\',"") + imgA = cv2.imread(f"video/camA/{name}") + imgB = cv2.imread(f"video/camB/{name}") + imgC = cv2.imread(f"video/camC/{name}") + + rA = model(imgA, verbose=False)[0] + rB = model(imgB, verbose=False)[0] + rC = model(imgC, verbose=False)[0] + + if len(rA.keypoints.xy)==0: continue + if len(rB.keypoints.xy)==0: continue + if len(rC.keypoints.xy)==0: continue + + kpA = rA.keypoints.xy[0].cpu().numpy() + kpB = rB.keypoints.xy[0].cpu().numpy() + kpC = rC.keypoints.xy[0].cpu().numpy() + + pts = [] + for i in range(kpA.shape[0]): + X = triangulate_three_views(kpA[i], kpB[i], kpC[i]) + pts.append(X) + + pts = np.array(pts) + points3DList[name] = pts + +import pickle +with open("replay_tpose.pkl", "wb") as f: + pickle.dump(points3DList, f) diff --git a/3cams_3d.py b/3cams_3d.py new file mode 100644 index 0000000..72985d3 --- /dev/null +++ b/3cams_3d.py @@ -0,0 +1,112 @@ +import glob + +import numpy as np +import cv2 +from ultralytics import YOLO +import matplotlib.pyplot as plt + +# --- Wczytanie kalibracji --- +data = np.load("calibration_3cams_1.npz") + +K1, D1 = data["K1"], data["D1"] +K2, D2 = data["K2"], data["D2"] +K3, D3 = data["K3"], data["D3"] +R12, T12 = data["R12"], data["T12"] +R13, T13 = data["R13"], data["T13"] + +# Naprawa wymiarów translacji +T12 = T12.reshape(3,1) +T13 = T13.reshape(3,1) + +# Kamera 1 = układ odniesienia +P1 = K1 @ np.hstack((np.eye(3), np.zeros((3,1)))) +P2 = K2 @ np.hstack((R12, T12)) +P3 = K3 @ np.hstack((R13, T13)) + +# --- Funkcja triangulacji 3D z trzech kamer --- +def triangulate_three_views(p1, p2, p3): + """ + Triangulacja 3D z trzech kamer metodą OpenCV + średnia dla stabilności + p1, p2, p3: punkty w pikselach (2,) + """ + # Zamiana na odpowiedni kształt (2,1) + p1 = p1.reshape(2,1) + p2 = p2.reshape(2,1) + p3 = p3.reshape(2,1) + + # Triangulacja pary kamer 1-2 + X12_h = cv2.triangulatePoints(P1, P2, p1, p2) + X12 = (X12_h / X12_h[3])[:3].reshape(3) + + # Triangulacja pary kamer 1-3 + X13_h = cv2.triangulatePoints(P1, P3, p1, p3) + X13 = (X13_h / X13_h[3])[:3].reshape(3) + + # Średnia dla większej stabilności + X_avg = (X12 + X13) / 2 + return X_avg + +# --- Wczytanie YOLOv11 Pose --- +model = YOLO("yolo11x-pose.pt") + +skeleton = [ + [0,1],[0,2],[1,3],[2,4],[0,5],[0,6], + [5,7],[7,9],[6,8],[8,10],[5,6], + [11,12],[12,14],[14,16],[11,13],[13,15] +] + +plt.ion() # włączenie trybu interaktywnego +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') + +# Tworzymy początkowy wykres punktów +points_plot = ax.scatter([], [], [], c='r', marker='o', s=50) +lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton] + +ax.set_xlabel('X') +ax.set_ylabel('Y') +ax.set_zlabel('Z') +ax.view_init(elev=20, azim=-60) + +points3DList = {} + +for i in sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group())): + # Zakładamy, że mamy 3 obrazy z 3 kamer + data = i.replace(f'video/camA\\', "") + img1 = cv2.imread(f"video/camA/{data}") + img2 = cv2.imread(f"video/camB/{data}") + img3 = cv2.imread(f"video/camC/{data}") + + # Predykcja keypoints + results1 = model(img1, verbose=False)[0] + results2 = model(img2, verbose=False)[0] + results3 = model(img3, verbose=False)[0] + + # Zakładamy jedną osobę na scenie + if len(results1.keypoints.xy) == 0: continue + if len(results2.keypoints.xy) == 0: continue + if len(results3.keypoints.xy) == 0: continue + + yolo_cam1 = results1.keypoints.xy[0].cpu().numpy() # shape (17,2) + yolo_cam2 = results2.keypoints.xy[0].cpu().numpy() + yolo_cam3 = results3.keypoints.xy[0].cpu().numpy() + + # --- Triangulacja wszystkich punktów --- + points3D = [] + + for i in range(yolo_cam1.shape[0]): # 17 punktów COCO + p1 = yolo_cam1[i] + p2 = yolo_cam2[i] + p3 = yolo_cam3[i] + + X = triangulate_three_views(p1, p2, p3) + points3D.append(X) + + points3D = np.array(points3D) + print(points3D) + points3DList[data] = points3D + +import pickle + +with open("replay_tpose.pkl", "wb") as f: + pickle.dump(points3DList, f) \ No newline at end of file diff --git a/3ddisplay_replay.py b/3ddisplay_replay.py new file mode 100644 index 0000000..5594ed3 --- /dev/null +++ b/3ddisplay_replay.py @@ -0,0 +1,57 @@ +import pickle + +from matplotlib import pyplot as plt + +with open("replay_xyz.pkl", "rb") as f: + points3DList = pickle.load(f) + +skeleton = [ + [0, 1], [0, 2], # nose -> eyes + [1, 3], [2, 4], # eyes -> ears + # [0, 5], [0, 6], # nose -> shoulders + [5, 7], [7, 9], # left arm + [6, 8], [8, 10], # right arm + [5, 6], # shoulders + [5, 11], [6, 12], # shoulders -> hips + [11, 12], # hips + [11, 13], [13, 15], # left leg + [12, 14], [14, 16] # right leg +] + +plt.ion() # włączenie trybu interaktywnego +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') + +# Tworzymy początkowy wykres punktów +points_plot = ax.scatter([], [], [], c='r', marker='o', s=50) +lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton] + +ax.set_xlabel('X') +ax.set_ylabel('Y') +ax.set_zlabel('Z') +ax.set_xlim(-0.6, 0.4) +ax.set_ylim(1.2, 2.2) +ax.set_zlim(-0.5, 1.1) +ax.view_init(elev=20, azim=-60) + +i = 0 + +for points3Dkey in points3DList: + points3D = points3DList[points3Dkey] + print("3D points:\n", points3D) + + # --- Wizualizacja 3D ---d + X = points3D[:,0] - 0.25 + Z = -points3D[:,1] + 0.5 + Y = points3D[:,2] + + points_plot._offsets3d = (X, Y, Z) + + # Aktualizacja linii (szkielet) + for idx, (i, j) in enumerate(skeleton): + lines_plot[idx].set_data([X[i], X[j]], [Y[i], Y[j]]) + lines_plot[idx].set_3d_properties([Z[i], Z[j]]) + + fig.canvas.draw() + fig.canvas.flush_events() + plt.pause(0.01) diff --git a/3ddisplay_replay_smoothed.py b/3ddisplay_replay_smoothed.py new file mode 100644 index 0000000..3223a93 --- /dev/null +++ b/3ddisplay_replay_smoothed.py @@ -0,0 +1,58 @@ +from scipy.signal import savgol_filter +import numpy as np +import pickle +from matplotlib import pyplot as plt + +with open("replay_xyz.pkl", "rb") as f: + points3DList = pickle.load(f) + +skeleton = [ + [0, 1], [0, 2], + [1, 3], [2, 4], + [5, 7], [7, 9], + [6, 8], [8, 10], + [5, 6], + [5, 11], [6, 12], + [11, 12], + [11, 13], [13, 15], + [12, 14], [14, 16] +] + +keys_sorted = sorted(points3DList.keys()) +points_sequence = np.array([points3DList[k] for k in keys_sorted]) # (frames, points, 3) + +# --- Filtr Savitzky-Golaya --- +window_length = 7 # musi być nieparzyste +polyorder = 2 +smoothed_sequence = savgol_filter(points_sequence, window_length=window_length, + polyorder=polyorder, axis=0, mode='nearest') + +plt.ion() +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') + +points_plot = ax.scatter([], [], [], c='r', marker='o', s=50) +lines_plot = [ax.plot([0,0],[0,0],[0,0], c='b')[0] for _ in skeleton] + +ax.set_xlabel('X') +ax.set_ylabel('Y') +ax.set_zlabel('Z') +ax.set_xlim(-0.6, 0.4) +ax.set_ylim(1.2, 2.2) +ax.set_zlim(-0.5, 1.1) +ax.view_init(elev=20, azim=-60) + +for frame_points in smoothed_sequence: + X = frame_points[:,0] - 0.25 + Z = -frame_points[:,1] + 0.5 + Y = frame_points[:,2] + + points_plot._offsets3d = (X, Y, Z) + + for idx, (i, j) in enumerate(skeleton): + lines_plot[idx].set_data([X[i], X[j]], [Y[i], Y[j]]) + lines_plot[idx].set_3d_properties([Z[i], Z[j]]) + + fig.canvas.draw() + fig.canvas.flush_events() + plt.pause(0.001) diff --git a/calculate.py b/calculate.py index c272cf2..db52ce5 100644 --- a/calculate.py +++ b/calculate.py @@ -52,7 +52,7 @@ def compare_poses(f1, f2): def compare_poses_boolean(f1, f2): l2, cos_sim = compare_poses(f1, f2) - return l2 < 0.7 and cos_sim > 0.90 + return l2 < 1.2 and cos_sim > 0.85 def center(keypoints): mid_hip = (keypoints[11] + keypoints[12]) / 2 # left_hip=11, right_hip=12 diff --git a/checkpoint/pretrained_h36m_detectron_coco.bin b/checkpoint/pretrained_h36m_detectron_coco.bin new file mode 100644 index 0000000..9fa1c30 Binary files /dev/null and b/checkpoint/pretrained_h36m_detectron_coco.bin differ diff --git a/main.py b/main.py index 84c9689..3df657a 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,15 @@ +import json +import os import pickle +import socket import sys +import torch from ultralytics import YOLO import cv2 import time +import poses import utils from calculate import normalize_pose, compare_poses_boolean from draw import draw_new @@ -12,6 +17,7 @@ from utils import find_closest from video_methods import initialize_method model = YOLO("yolo11s-pose.pt") +model.to(torch.device('cuda:0')) if len(sys.argv) == 2: method_type = sys.argv[1] @@ -40,123 +46,174 @@ def main(): mehCount = 0 goodCount = 0 failCount = 0 - failRate = 2 + failRate = 10 + + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.bind(("0.0.0.0", 13425)) + server_socket.listen() + + print("czekam na clienta...") + + client_socket, _ = server_socket.accept() + print("mam clienta!") + + data = client_socket.recv(1024).decode() + if not data.startswith("id_"): + client_socket.close() + server_socket.close() + return + + map = data.replace("id_", "").replace("\n", "").strip() + if not os.path.isfile(f'moves_{map}.pkl'): + print(map) + print("moves_" + map + ".pkl") + client_socket.sendall("not_exists".encode()) + client_socket.close() + server_socket.close() + return moves = [] - with open('moves.pkl', 'rb') as f: # 'rb' = read binary + with open(f'moves_{map}.pkl', 'rb') as f: # 'rb' = read binary moves = pickle.load(f) startValue = moves[0][0] totalCount = len(moves) for i, move in enumerate(moves): - moves[i] = (move[0] - startValue, move[1], move[2]) + moves[i] = ((move[0] - startValue) / 1000, move[1], move[2]) - print(moves) + currIndex = 1 + currTimeIndex = time.time() + deltaTime = time.time() + currStatus = f"Zaczoles tanczyc {currIndex}" + currMove = moves[0] + + + doStreak = False + streak = 0 + doing = 0 + actuallyDoing = False while True: + doing += 1 frame = method.receive_frame() - frame = cv2.flip(frame, 1) - results = model(frame, verbose=False) + if not actuallyDoing: + client_socket.sendall("start".encode()) + actuallyDoing = True + current_time = time.time() delta = current_time - last_time last_time = current_time + if doing % 30 == 0: + if doStreak: + streak += 5 + client_socket.sendall(f"streak_{streak}".encode()) + else: + streak = 0 + client_socket.sendall(f"streak_0".encode()) + fps = 1 / delta if delta > 0 else float('inf') # print(f"\rDelta: {delta:.4f}s, FPS: {fps:.2f}", end="") - if len(results) == 0: - continue + if len(results) != 0: - result = results[0] - kpts = result.keypoints.data[0] if len(result.keypoints.data) else None + result = results[0] + kpts = result.keypoints.data[0] if len(result.keypoints.data) else None - if kpts is None: - continue + if kpts is None: + continue - img = frame + img = frame - normalized = normalize_pose(result.keypoints.xy.cpu().numpy()[0]) + normalized = normalize_pose(result.keypoints.xy.cpu().numpy()[0]) - draw = utils.normalize(result.keypoints.xy.cpu().numpy()[0]) - cv2.imshow('you', draw_new(draw * 100 + 100)) + draw = utils.normalize(result.keypoints.xy.cpu().numpy()[0]) + cv2.imshow('you', draw_new(draw * 100 + 100)) - if currTimeIndex != 0 and moves.index(find_closest(moves, time.time() - currTimeIndex)) == len(moves) - 1: - mehCount = totalCount - failCount - goodCount + if currTimeIndex != 0 and moves.index(find_closest(moves, time.time() - currTimeIndex)) == len(moves) - 1: + mehCount = abs(totalCount - (failCount + goodCount)) - print( - f"PODSUMOWANIE: FAIL {failCount} MEH: {mehCount} PERFECT: {goodCount} PERCENTAGE: {(goodCount + (0.95 * mehCount)) / totalCount * 100}%") - exit(1) + stats = { + "failCount": failCount, + "goodCount": goodCount, + "mehCount": mehCount, + "percentage": (goodCount + (0.85 * mehCount)) / totalCount * 100 + } - if currMove is None: - if compare_poses_boolean(moves[0][1], normalized): - currIndex = 1 - currTimeIndex = time.time() - deltaTime = time.time() - currStatus = f"Zaczoles tanczyc {currIndex}" - currMove = moves[0] + client_socket.sendall(f"finish_{json.dumps(stats)}".encode()) + print( + f"PODSUMOWANIE: FAIL {failCount} MEH: {mehCount} PERFECT: {goodCount} PERCENTAGE: {(goodCount + (0.85 * mehCount)) / totalCount * 100}%") + cv2.destroyAllWindows() + break + # thread = Thread(target=print_animation, args=(moves, False)) + # thread.start() + else: + changed = False - # thread = Thread(target=print_animation, args=(moves, False)) - # thread.start() - else: - changed = False + closest = find_closest(moves, time.time() - currTimeIndex) + cv2.imshow('Dots', draw_new(utils.normalize(closest[2]) * 250 + 250)) - closest = find_closest(moves, time.time() - currTimeIndex) - cv2.imshow('Dots', draw_new(closest[2])) + if abs((time.time() - currTimeIndex) - moves[currIndex][0]) > failRate: + currStatus = f"FAIL!" + failCount += 1 + doStreak = False - if abs((time.time() - currTimeIndex) - moves[currIndex][0]) > failRate: - currStatus = f"FAIL!" - failCount += 1 + if compare_poses_boolean(closest[1], normalized): + # delays += (time.time() - deltaTime - moves[0][0]) * 1000 + # delaysCount += 1 - if compare_poses_boolean(closest[1], normalized): - # delays += (time.time() - deltaTime - moves[0][0]) * 1000 - # delaysCount += 1 + # currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms" + deltaTime = time.time() - currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms" - deltaTime = time.time() + currIndex = moves.index(closest) + 1 + goodCount += 1 + changed = True + doStreak = True - currIndex = moves.index(closest) + 1 - goodCount += 1 - changed = True + if not changed and compare_poses_boolean(moves[currIndex][1], normalized): + # delays += (time.time() - deltaTime - moves[0][0]) * 1000 + # delaysCount += 1 - if not changed and compare_poses_boolean(moves[currIndex][1], normalized): - # delays += (time.time() - deltaTime - moves[0][0]) * 1000 - # delaysCount += 1 + # currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms" + deltaTime = time.time() - currStatus = f"SUPER! {currIndex} Zostalo {len(moves)} Delay {(time.time() - currTimeIndex - closest[0]) / 1000}ms" - deltaTime = time.time() + changed = True - changed = True + currIndex += 1 + goodCount += 1 + doStreak = True - currIndex += 1 - goodCount += 1 + # if do_pose_shot: + # moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0])) + # elif len(moves) != 0: + # with open('moves.pkl', 'wb') as f: # 'wb' = write binary + # pickle.dump + # (moves, f) + # + # exit(1) - # if do_pose_shot: - # moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0])) - # elif len(moves) != 0: - # with open('moves.pkl', 'wb') as f: # 'wb' = write binary - # pickle.dump - # (moves, f) - # - # exit(1) - - cv2.putText( - img, # obraz - currStatus, # tekst - (50, 100), # pozycja (x, y) lewego dolnego rogu tekstu - cv2.FONT_HERSHEY_SIMPLEX, # czcionka - 1, # rozmiar (skalowanie) - (0, 0, 255), # kolor (BGR) - tutaj czerwony - 2, # grubość linii - cv2.LINE_AA # typ antyaliasingu - ) + cv2.putText( + img, # obraz + currStatus, # tekst + (50, 100), # pozycja (x, y) lewego dolnego rogu tekstu + cv2.FONT_HERSHEY_SIMPLEX, # czcionka + 1, # rozmiar (skalowanie) + (0, 0, 255), # kolor (BGR) - tutaj czerwony + 2, # grubość linii + cv2.LINE_AA # typ antyaliasingu + ) cv2.imshow('Klatka z kamerki', img) cv2.setMouseCallback('Klatka z kamerki', click_event) cv2.waitKey(1) # Czekaj na naciśnięcie klawisza -main() \ No newline at end of file +try: + while True: + main() +except KeyboardInterrupt: + pass \ No newline at end of file diff --git a/moves.pkl b/moves.pkl index 2ed77d7..b732468 100644 Binary files a/moves.pkl and b/moves.pkl differ diff --git a/moves_3d_mp4.py b/moves_3d_mp4.py index ffba887..d753a7f 100644 --- a/moves_3d_mp4.py +++ b/moves_3d_mp4.py @@ -1,33 +1,27 @@ -import cv2 -import mediapipe as mp -import matplotlib -matplotlib.use("Agg") # <-- ważne: wyłącza GUI -import matplotlib.pyplot as plt -from mpl_toolkits.mplot3d import Axes3D -import numpy as np +import pickle +import time -# --------------------- -# Wideo wejściowe -# --------------------- +import cv2 +from ultralytics import YOLO + +from calculate import normalize_pose +from utils import normalize + +# Wczytanie wideo cap = cv2.VideoCapture("input.mp4") fps = cap.get(cv2.CAP_PROP_FPS) -width = 640 -height = 640 - -# --------------------- -# Wideo wyjściowe -# --------------------- -fourcc = cv2.VideoWriter_fourcc(*"MJPG") - +width = 1920 +height = 1080 +# Ustawienia zapisu wideo +fourcc = cv2.VideoWriter_fourcc(*"avc1") out = cv2.VideoWriter("output.mp4", fourcc, fps, (width, height)) -# --------------------- -# MediaPipe Pose -# --------------------- -mp_pose = mp.solutions.pose -pose = mp_pose.Pose(static_image_mode=False, model_complexity=1) +# Wczytanie modelu YOLOv8 Pose +model = YOLO("yolo11x-pose.pt", verbose=False) # Twój model pose +moves = [] +started = False frame_id = 0 while True: @@ -35,58 +29,32 @@ while True: if not ok: break - rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - results = pose.process(rgb) + # Skalowanie do 640x640 + frame_resized = cv2.resize(frame, (width, height)) - # ----------------------------------------- - # 3D landmarki: pose_world_landmarks - # ----------------------------------------- - if results.pose_world_landmarks: - lm = results.pose_world_landmarks.landmark + # Wykrywanie poz + results = model.predict(frame_resized, verbose=False) - xs = np.array([p.x for p in lm]) - ys = np.array([p.y for p in lm]) - zs = np.array([p.z for p in lm]) + # Rysowanie punktów 2D + for result in results: + if result.keypoints is not None and len(result.keypoints.xy) > 0: + for keypoint in result.keypoints.xy[0]: # keypoints[0] bo dla jednej osoby + if not started: + frame_id = 0 + started = True + x, y = keypoint # współrzędne + confidence + x = int(x) + y = int(y) + cv2.circle(frame_resized, (x, y), 5, (0, 255, 0), -1) # zielone kropki - # ----------------------------- - # RYSOWANIE 3D w Matplotlib - # ----------------------------- - fig = plt.figure(figsize=(6.4, 6.4), dpi=100) - ax = fig.add_subplot(111, projection="3d") - - ax.scatter(xs, zs, ys, s=20) - - ax.set_xlim([-1, 1]) - ax.set_ylim([-1, 1]) - ax.set_zlim([-1, 1]) - - ax.set_xlabel("X") - ax.set_ylabel("Y") - ax.set_zlabel("Z") - ax.invert_zaxis() - - # ----------------------------------------- - # Konwersja wykresu Matplotlib → klatka do MP4 - # ----------------------------------------- - fig.canvas.draw() - renderer = fig.canvas.get_renderer() - - w, h = fig.canvas.get_width_height() - - buf = renderer.buffer_rgba() - plot_img = np.frombuffer(buf, dtype=np.uint8).reshape((h, w, 4))[:, :, :3] - - - plt.close(fig) - - # Dopasowanie rozmiaru do wideo - plot_img = cv2.resize(plot_img, (width, height)) - plot_img = cv2.cvtColor(plot_img, cv2.COLOR_RGB2BGR) - - out.write(plot_img) + moves.append((frame_id * 1 / fps, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), normalize(result.keypoints.xy.cpu()[0]) * 160 + 250)) + out.write(frame_resized) frame_id += 1 +with open('moves2.pkl', 'wb') as f: # 'wb' = write binary + pickle.dump(moves, f) + cap.release() out.release() print("Zapisano: output.mp4") diff --git a/moves_dump.py b/moves_dump.py index 238c241..9961205 100644 --- a/moves_dump.py +++ b/moves_dump.py @@ -8,11 +8,26 @@ import numpy as np import utils from draw import draw_new -moves = [] +# moves = {} better_moves = {} -with open('moves.pkl', 'rb') as f: # 'rb' = read binary +with open('replay_tpose.pkl', 'rb') as f: # 'rb' = read binary moves = pickle.load(f) + movesCopy = {} + + for move in moves: + # listx = moves[move].tolist() + # print(type(listx)) + + # if type(listx) != list: + # listx = listx.tolist() + + movesCopy[move.replace(".jpg", "")] = moves[move].tolist() + + with open("plikv10.json", "w", encoding="utf-8") as f: + json.dump(movesCopy, f) + +exit(1) startValue = moves[0][0] totalCount = len(moves) @@ -35,8 +50,13 @@ for i, move in enumerate(moves): # Do rysowania (np. przesunięcie na ekran) - draw = utils.normalize(move[2]) * 200 + 250 + draw = utils.normalize(move[2]) + better_moves[round((move[0] - startValue) * 1000)] = draw.tolist() - cv2.imshow('you', draw_new(draw)) - cv2.waitKey(1) - time.sleep(0.1) + # cv2.imshow('you', draw_new(draw)) + # cv2.waitKey(1) + # time.sleep(0.1) + + +with open("plik-234.json", "w", encoding="utf-8") as f: + json.dump(better_moves, f) diff --git a/moves_dump_2.py b/moves_dump_2.py new file mode 100644 index 0000000..63a5f30 --- /dev/null +++ b/moves_dump_2.py @@ -0,0 +1,47 @@ +import json +import pickle +import time + +import cv2 +import numpy as np + +import utils +from draw import draw_new + +moves = [] +better_moves = {} + +with open('moves.pkl', 'rb') as f: # 'rb' = read binary + moves = pickle.load(f) + +startValue = moves[0][0] +totalCount = len(moves) + +for i, move in enumerate(moves): + moves[i] = (move[0] - startValue, move[1], move[2]) + + # left_hip = move[2][11] # Left Hip + # right_hip = move[2][12] # Right Hip + # center = (left_hip + right_hip) / 2 + # + # # Normalizacja względem środka ciała + # normalized_keypoints = move[2] - center + # + # better_moves[round((move[0] - startValue) * 1000)] = normalized_keypoints.tolist() + # + # # scale = utils.distance(move[2][11], move[2][12]) + # # print(scale) + # draw = normalized_keypoints + 200 + + + # Do rysowania (np. przesunięcie na ekran) + draw = utils.normalize(move[2]) + better_moves[round((move[0] - startValue) * 1000)] = draw.tolist() + + # cv2.imshow('you', draw_new(draw)) + # cv2.waitKey(1) + # time.sleep(0.1) + + +with open("plik.json", "w", encoding="utf-8") as f: + json.dump(better_moves, f) diff --git a/moves_videopose3d.py b/moves_videopose3d.py new file mode 100644 index 0000000..1f19685 --- /dev/null +++ b/moves_videopose3d.py @@ -0,0 +1,116 @@ +import cv2 +import torch +import numpy as np +from common.model import TemporalModel +from common.camera import * +# from common.utils import evaluate +from ultralytics import YOLO +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D # not strictly needed in newer matplotlib +import time + +# --- 1. Inicjalizacja modelu 3D VideoPose3D --- +model_3d = TemporalModel( + num_joints_in=17, + in_features=2, + num_joints_out=17, + filter_widths=[3,3,3,3], + causal=False +) + +chk = torch.load("checkpoint/pretrained_h36m_detectron_coco.bin", map_location='cpu') +model_3d.load_state_dict(chk, strict=False) +model_3d.eval() + +# --- 2. Inicjalizacja modelu YOLO (pose keypoints) --- +yolo = YOLO('yolo11s-pose.pt') # używamy najmniejszej wersji dla szybkości + +# --- 3. Wczytanie wideo --- +cap = cv2.VideoCapture("input.mp4") +frame_buffer = [] +BUFFER_SIZE = 243 # VideoPose3D potrzebuje sekwencji + +fig = plt.figure(figsize=(5, 5)) +ax = fig.add_subplot(111, projection='3d') + +# inicjalizacja scatter i linii szkieletu +scatter = ax.scatter([], [], [], c='r') + +skeleton = [ (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14), (7, 10), (10, 11), (11, 12) ] + +skeleton_lines = [] +for _ in skeleton: + line, = ax.plot([], [], [], c='b') + skeleton_lines.append(line) + +ax.set_xlim3d(-1, 1) +ax.set_ylim3d(-1, 1) +ax.set_zlim3d(0, 2) +ax.view_init(elev=20, azim=-70) +plt.ion() +plt.show() + +while True: + ret, frame = cap.read() + if not ret: + break + + # --- 4. Detekcja keypointów z YOLO --- + results = yolo(frame) + if len(results) == 0 or len(results[0].keypoints.xy) == 0: + continue + + # Zakładamy 1 osobę na klatkę (dla uproszczenia) + keypoints = results[0].keypoints.xy[0] # shape [17, 2] + keypoints = np.array(keypoints) + + # Normalizacja do [0,1] (opcjonalnie zależnie od VideoPose3D) + keypoints[:, 0] /= frame.shape[1] + keypoints[:, 1] /= frame.shape[0] + + frame_buffer.append(keypoints) + + # --- 5. Jeśli mamy pełną sekwencję, predykcja 3D --- + skeleton = [ + (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), + (0, 7), (7, 8), (8, 9), (7, 12), (12, 13), (13, 14), + (7, 10), (10, 11), (11, 12) + ] + + # --- after getting pred_3d --- + if len(frame_buffer) == BUFFER_SIZE: + seq_2d = torch.tensor(np.array(frame_buffer)).unsqueeze(0).float() + with torch.no_grad(): + pred_3d = model_3d(seq_2d) + + pose_3d = pred_3d[0, -1].numpy() # [17,3] + + # --- 2D overlay --- + # for i, kp in enumerate(frame_buffer[-1]): + # x, y = int(kp[0] * frame.shape[1]), int(kp[1] * frame.shape[0]) + # cv2.circle(frame, (x, y), 5, (0, 255, 0), -1) + # cv2.imshow("2D Pose", frame) + # cv2.waitKey(1) + + pose_3d = pose_3d[:, [0, 2, 1]] # X, Z, Y + pose_3d[:, 2] *= -1 + + # --- 3D update --- + xs, ys, zs = pose_3d[:, 0], pose_3d[:, 1], pose_3d[:, 2] + + # update scatter + scatter._offsets3d = (xs, ys, zs) + + # update skeleton lines + for idx, (a, b) in enumerate(skeleton): + skeleton_lines[idx].set_data([xs[a], xs[b]], [ys[a], ys[b]]) + skeleton_lines[idx].set_3d_properties([zs[a], zs[b]]) + + plt.draw() + plt.pause(0.001) + print(pose_3d.tolist()) + + frame_buffer.pop(0) + +cap.release() +cv2.destroyAllWindows() diff --git a/poses.py b/poses.py new file mode 100644 index 0000000..c745237 --- /dev/null +++ b/poses.py @@ -0,0 +1 @@ +t_pose = [ 0.079178 , 0.1963 , 0.098774 , 0.033032 , 0.99646 , 0.08401 , 0.99999, 0.0049546, -0.99134 , 0.13132, -0.99791, -0.064541, 0.063086, 0.99801, -0.03562, 0.99936, -0.012939, 0.99992, 0.02004, 0.9998] \ No newline at end of file diff --git a/record.py b/record.py new file mode 100644 index 0000000..ba3ea2e --- /dev/null +++ b/record.py @@ -0,0 +1,68 @@ +import pickle +import sys + +import torch +from ultralytics import YOLO +import cv2 +import time + +import utils +from calculate import normalize_pose, compare_poses_boolean +from draw import draw_new +from utils import find_closest +from video_methods import initialize_method + +model = YOLO("yolo11x-pose.pt") +model.to(torch.device('cuda:0')) + +if len(sys.argv) == 2: + method_type = sys.argv[1] +else: + print("Podaj argument 'cam', albo 'net'.") + exit(1) + +method = initialize_method(method_type) + +do_pose_shot = False +startTime = 0 + +def click_event(event, x, y, flags, param): + global do_pose_shot, startTime + + if event == cv2.EVENT_LBUTTONDOWN: # lewy przycisk myszy + do_pose_shot = not do_pose_shot + if do_pose_shot: + startTime = time.time() + +def main(): + moves = [] + + while True: + frame = method.receive_frame() + + frame = cv2.flip(frame, 1) + results = model(frame, verbose=False) + + if len(results) != 0: + + result = results[0] + kpts = result.keypoints.data[0] if len(result.keypoints.data) else None + + if kpts is None: + continue + + img = frame + + if do_pose_shot: + moves.append((time.time() - startTime, normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0])) + elif len(moves) != 0: + with open('moves.pkl', 'wb') as f: # 'wb' = write binary + pickle.dump(moves, f) + exit(1) + + + cv2.imshow('Klatka z kamerki', img) + cv2.setMouseCallback('Klatka z kamerki', click_event) + cv2.waitKey(1) # Czekaj na naciśnięcie klawisza + +main() \ No newline at end of file diff --git a/record_one_pose.py b/record_one_pose.py new file mode 100644 index 0000000..e83d59c --- /dev/null +++ b/record_one_pose.py @@ -0,0 +1,58 @@ +import pickle +import sys + +import torch +from ultralytics import YOLO +import cv2 +import time + +import utils +from calculate import normalize_pose, compare_poses_boolean +from draw import draw_new +from utils import find_closest +from video_methods import initialize_method + +model = YOLO("yolo11x-pose.pt") +model.to(torch.device('cuda:0')) + +method = initialize_method("cam") + +do_pose_shot = False +startTime = 0 + +def click_event(event, x, y, flags, param): + global do_pose_shot, startTime + + if event == cv2.EVENT_LBUTTONDOWN: # lewy przycisk myszy + do_pose_shot = not do_pose_shot + if do_pose_shot: + startTime = time.time() + +def main(): + moves = [] + + while True: + frame = method.receive_frame() + + frame = cv2.flip(frame, 1) + results = model(frame, verbose=False) + + if len(results) != 0: + + result = results[0] + kpts = result.keypoints.data[0] if len(result.keypoints.data) else None + + if kpts is None: + continue + + img = frame + + if do_pose_shot: + print(normalize_pose(result.keypoints.xy.cpu().numpy()[0])) + exit(0) + + cv2.imshow('Klatka z kamerki', img) + cv2.setMouseCallback('Klatka z kamerki', click_event) + cv2.waitKey(1) # Czekaj na naciśnięcie klawisza + +main() \ No newline at end of file diff --git a/record_video_pose.py b/record_video_pose.py new file mode 100644 index 0000000..d3638b7 --- /dev/null +++ b/record_video_pose.py @@ -0,0 +1,40 @@ +import glob +import pickle +from tqdm import tqdm + +import torch +from ultralytics import YOLO +import cv2 +import time + +import utils +from calculate import normalize_pose, compare_poses_boolean +from draw import draw_new +from utils import find_closest +from video_methods import initialize_method + +model = YOLO("yolo11x-pose.pt") +model.to(torch.device('cuda:0')) + +startTime = 0 +def main(): + moves = [] + + for i in tqdm(sorted(glob.glob("video/camA/*.jpg"), key=lambda f: int(__import__("re").search(r"\d+", f).group()))): + data = i.replace(f'video/camA\\', "") + frame = cv2.imread(f"video/camA/{data}") + results = model(frame, verbose=False) + + if len(results) != 0: + result = results[0] + kpts = result.keypoints.data[0] if len(result.keypoints.data) else None + + if kpts is None: + continue + + moves.append((int(data.replace(".jpg", "")), normalize_pose(result.keypoints.xy.cpu().numpy()[0]), result.keypoints.xy.cpu()[0])) + + with open('moves_makarena_best.pkl', 'wb') as f: # 'wb' = write binary + pickle.dump(moves, f) + +main() \ No newline at end of file diff --git a/rotate.py b/rotate.py new file mode 100644 index 0000000..94aef05 --- /dev/null +++ b/rotate.py @@ -0,0 +1,19 @@ +import cv2 +import os + +folder = r"video/camC" # ← podaj swoją ścieżkę + +# rozszerzenia jakie chcesz obracać +ext = (".jpg", ".jpeg", ".png") + +for filename in os.listdir(folder): + if filename.lower().endswith(ext): + path = os.path.join(folder, filename) + + img = cv2.imread(path) + rotated = cv2.rotate(img, cv2.ROTATE_180) + + cv2.imwrite(path, rotated) # nadpisanie pliku + print(f"Obrócono: {filename}") + +print("Gotowe ✔️")