commit 46ba2b54cca1c57b65763eb3462525525016c8af Author: zhanglijie Date: Thu Jun 26 09:37:46 2025 +0800 特征值计算 diff --git a/cel_index.py b/cel_index.py new file mode 100644 index 0000000..473f067 --- /dev/null +++ b/cel_index.py @@ -0,0 +1,166 @@ +import numpy as np +from collections import defaultdict +def cel_index(freq): + #####聚类######## + points = np.array(list(freq.keys())) + weights = np.array([freq[tuple(p)] for p in points]) + # 初始化聚类中心:选择权重最大的两个点 + top_indices = np.argsort(weights)[-2:] + centroids = points[top_indices].astype(float) + # K-means聚类 + max_iters = 100 + tolerance = 1e-6 + prev_centroids = None + + for _ in range(max_iters): + # 计算每个点到各中心的距离 + dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids), + 360 - np.abs(points[:, np.newaxis, :] - centroids)) + dists = np.sqrt((dists_temp ** 2).sum(axis=2)) + + # 分配点到最近的中心 + cluster_labels = np.argmin(dists, axis=1) + + # 更新中心位置 + new_centroids = [] + for i in range(2): + mask = (cluster_labels == i) + if np.any(mask): + weighted_sum = (points[mask].T * weights[mask]).sum(axis=1) + total_weight = weights[mask].sum() + new_center = weighted_sum / total_weight + new_centroids.append(new_center) + else: + new_centroids.append(centroids[i]) # 防止空簇 + + new_centroids = np.array(new_centroids) + + # 检查收敛 + if prev_centroids is not None and np.linalg.norm(new_centroids - prev_centroids) < tolerance: + break + + centroids = new_centroids + prev_centroids = centroids.copy() + + # 最终分配 + dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids), + 360 - np.abs(points[:, np.newaxis, :] - centroids)) + dists = np.sqrt((dists_temp ** 2).sum(axis=2)) + cluster_labels = np.argmin(dists, axis=1) + + # 三倍标准差过滤离群点并计算统计信息 + result_list = [] # 存储最终12个值的列表 + filtered_data = [] # 存储过滤后的点信息用于可视化 + + for cluster_idx in range(2): + mask = (cluster_labels == cluster_idx) + cluster_pts = points[mask] + cluster_wts = weights[mask] + + # 直接使用原始数据计算中心 + weighted_sum = (cluster_pts.T * cluster_wts).sum(axis=1) + total_weight = cluster_wts.sum() + center = weighted_sum / total_weight # 原始中心(不再需要过滤后的center_filtered) + + # 计算统计信息(直接使用原始数据) + total_points = total_weight # 总权重即为有效点数 + + # 计算坐标绝对偏差(加权平均) + if total_weight > 0 and len(cluster_pts) > 0: + x_abs_diff = np.abs(cluster_pts[:, 0] - center[0]) + avg_x_distance = np.sum(x_abs_diff * cluster_wts) / total_weight + + y_abs_diff = np.abs(cluster_pts[:, 1] - center[1]) + avg_y_distance = np.sum(y_abs_diff * cluster_wts) / total_weight + # 新增:计算最大/最小点距离中心的距离 + # 找到横坐标最大和最小的点 + max_x_point = cluster_pts[np.argmax(cluster_pts[:, 0])] + min_x_point = cluster_pts[np.argmin(cluster_pts[:, 0])] + max_y_point = cluster_pts[np.argmax(cluster_pts[:, 1])] + min_y_point = cluster_pts[np.argmin(cluster_pts[:, 1])] + + # 计算这些点到中心的距离(欧氏距离) + max_x_dist = np.linalg.norm(max_x_point - center) + min_x_dist = np.linalg.norm(min_x_point - center) + max_y_dist = np.linalg.norm(max_y_point - center) + min_y_dist = np.linalg.norm(min_y_point - center) + else: + avg_x_distance = 0 + avg_y_distance = 0 + max_x_dist = min_x_dist = max_y_dist = min_y_dist = 0 + + result_list.extend([ + center[0], # 中心横坐标(原始计算) + center[1], # 中心纵坐标(原始计算) + avg_x_distance, # 横坐标到中心的平均距离 + avg_y_distance, # 纵坐标到中心的平均距离 + max_x_dist, # 横坐标最大点到中心的距离 + min_x_dist, # 横坐标最小点到中心的距离 + max_y_dist, # 纵坐标最大点到中心的距离 + min_y_dist # 纵坐标最小点到中心的距离 + ]) + # 添加两个聚类中心的距离特征 + if len(centroids) == 2: + # 计算横向距离(x方向) + dist_x = abs(centroids[0][0] - centroids[1][0]) + + # 计算纵向距离(y方向) + dist_y = abs(centroids[0][1] - centroids[1][1]) + + else: + # 如果聚类中心不足两个,添加默认值 + result_list.extend([0, 0]) + + # 全局幅值统计 + if len(points) > 0: + # 提取所有幅值(考虑权重) + amps = points[:, 1] # 幅值在points的第二列 + weighted_amps = np.repeat(amps, weights) # 根据权重重复幅值 + + # 幅值偏斜度/陡峭度(使用无偏估计) + n = len(weighted_amps) + mean_amp = np.mean(weighted_amps) + std_amp = np.std(weighted_amps, ddof=1) # 样本标准差 + + if std_amp > 1e-8: # 避免除零 + amp_skew = np.sum((weighted_amps - mean_amp) ** 3) / (n * std_amp ** 3) + amp_kurt = np.sum((weighted_amps - mean_amp) ** 4) / (n * std_amp ** 4) - 3 + else: + amp_skew = amp_kurt = 0 + else: + amp_skew = amp_kurt = 0 + + # 计算低幅值比例(直接从points和weights计算) + total_points = weights.sum() # 总非零点数(带权重) + if total_points > 0: + low_amp_mask = (points[:, 1] < 32) # 幅值小于32的点 + low_amp_count = weights[low_amp_mask].sum() + low_amp_ratio = low_amp_count / total_points + else: + low_amp_ratio = 0 + + x_bins = 36 + x_edges = np.linspace(0, 360, x_bins + 1) + + # 统计当前簇在每个区间的权重总和 + x_hist, _ = np.histogram(points[:, 0], bins=x_edges, + range=(0, 360)) + variance = np.var(x_hist) + zero_count = np.sum(x_hist == 0) + total = len(x_hist) + a = zero_count / total + + result_list.extend([ + dist_x, # 聚类中心X距离 + # dist_y, # 聚类中心Y距离 + + low_amp_ratio, # 原有低幅值比例 + + amp_skew, # 全局幅值偏斜度 + amp_kurt, # 全局幅值陡峭度 + a, + + + ]) + + return result_list