commit 46ba2b54cca1c57b65763eb3462525525016c8af
Author: zhanglijie <zhanglijie@landgroup.com.cn>
Date:   Thu Jun 26 09:37:46 2025 +0800

    特征值计算

diff --git a/cel_index.py b/cel_index.py
new file mode 100644
index 0000000..473f067
--- /dev/null
+++ b/cel_index.py
@@ -0,0 +1,166 @@
+import numpy as np
+from collections import defaultdict
+def cel_index(freq):
+    #####聚类########
+    points = np.array(list(freq.keys()))
+    weights = np.array([freq[tuple(p)] for p in points])
+    # 初始化聚类中心：选择权重最大的两个点
+    top_indices = np.argsort(weights)[-2:]
+    centroids = points[top_indices].astype(float)
+    # K-means聚类
+    max_iters = 100
+    tolerance = 1e-6
+    prev_centroids = None
+
+    for _ in range(max_iters):
+        # 计算每个点到各中心的距离
+        dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids),
+                                360 - np.abs(points[:, np.newaxis, :] - centroids))
+        dists = np.sqrt((dists_temp ** 2).sum(axis=2))
+
+        # 分配点到最近的中心
+        cluster_labels = np.argmin(dists, axis=1)
+
+        # 更新中心位置
+        new_centroids = []
+        for i in range(2):
+            mask = (cluster_labels == i)
+            if np.any(mask):
+                weighted_sum = (points[mask].T * weights[mask]).sum(axis=1)
+                total_weight = weights[mask].sum()
+                new_center = weighted_sum / total_weight
+                new_centroids.append(new_center)
+            else:
+                new_centroids.append(centroids[i])  # 防止空簇
+
+        new_centroids = np.array(new_centroids)
+
+        # 检查收敛
+        if prev_centroids is not None and np.linalg.norm(new_centroids - prev_centroids) < tolerance:
+            break
+
+        centroids = new_centroids
+        prev_centroids = centroids.copy()
+
+    # 最终分配
+    dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids),
+                            360 - np.abs(points[:, np.newaxis, :] - centroids))
+    dists = np.sqrt((dists_temp ** 2).sum(axis=2))
+    cluster_labels = np.argmin(dists, axis=1)
+
+    # 三倍标准差过滤离群点并计算统计信息
+    result_list = []  # 存储最终12个值的列表
+    filtered_data = []  # 存储过滤后的点信息用于可视化
+
+    for cluster_idx in range(2):
+        mask = (cluster_labels == cluster_idx)
+        cluster_pts = points[mask]
+        cluster_wts = weights[mask]
+
+        # 直接使用原始数据计算中心
+        weighted_sum = (cluster_pts.T * cluster_wts).sum(axis=1)
+        total_weight = cluster_wts.sum()
+        center = weighted_sum / total_weight  # 原始中心（不再需要过滤后的center_filtered）
+
+        # 计算统计信息（直接使用原始数据）
+        total_points = total_weight  # 总权重即为有效点数
+
+        # 计算坐标绝对偏差（加权平均）
+        if total_weight > 0 and len(cluster_pts) > 0:
+            x_abs_diff = np.abs(cluster_pts[:, 0] - center[0])
+            avg_x_distance = np.sum(x_abs_diff * cluster_wts) / total_weight
+
+            y_abs_diff = np.abs(cluster_pts[:, 1] - center[1])
+            avg_y_distance = np.sum(y_abs_diff * cluster_wts) / total_weight
+            # 新增：计算最大/最小点距离中心的距离
+            # 找到横坐标最大和最小的点
+            max_x_point = cluster_pts[np.argmax(cluster_pts[:, 0])]
+            min_x_point = cluster_pts[np.argmin(cluster_pts[:, 0])]
+            max_y_point = cluster_pts[np.argmax(cluster_pts[:, 1])]
+            min_y_point = cluster_pts[np.argmin(cluster_pts[:, 1])]
+
+            # 计算这些点到中心的距离（欧氏距离）
+            max_x_dist = np.linalg.norm(max_x_point - center)
+            min_x_dist = np.linalg.norm(min_x_point - center)
+            max_y_dist = np.linalg.norm(max_y_point - center)
+            min_y_dist = np.linalg.norm(min_y_point - center)
+        else:
+            avg_x_distance = 0
+            avg_y_distance = 0
+            max_x_dist = min_x_dist = max_y_dist = min_y_dist = 0
+
+        result_list.extend([
+            center[0],  # 中心横坐标（原始计算）
+            center[1],  # 中心纵坐标（原始计算）
+            avg_x_distance,  # 横坐标到中心的平均距离
+            avg_y_distance,  # 纵坐标到中心的平均距离
+            max_x_dist,  # 横坐标最大点到中心的距离
+            min_x_dist,  # 横坐标最小点到中心的距离
+            max_y_dist,  # 纵坐标最大点到中心的距离
+            min_y_dist  # 纵坐标最小点到中心的距离
+        ])
+    # 添加两个聚类中心的距离特征
+    if len(centroids) == 2:
+        # 计算横向距离（x方向）
+        dist_x = abs(centroids[0][0] - centroids[1][0])
+
+        # 计算纵向距离（y方向）
+        dist_y = abs(centroids[0][1] - centroids[1][1])
+
+    else:
+        # 如果聚类中心不足两个，添加默认值
+        result_list.extend([0, 0])
+
+    # 全局幅值统计
+    if len(points) > 0:
+        # 提取所有幅值（考虑权重）
+        amps = points[:, 1]  # 幅值在points的第二列
+        weighted_amps = np.repeat(amps, weights)  # 根据权重重复幅值
+
+        # 幅值偏斜度/陡峭度（使用无偏估计）
+        n = len(weighted_amps)
+        mean_amp = np.mean(weighted_amps)
+        std_amp = np.std(weighted_amps, ddof=1)  # 样本标准差
+
+        if std_amp > 1e-8:  # 避免除零
+            amp_skew = np.sum((weighted_amps - mean_amp) ** 3) / (n * std_amp ** 3)
+            amp_kurt = np.sum((weighted_amps - mean_amp) ** 4) / (n * std_amp ** 4) - 3
+        else:
+            amp_skew = amp_kurt = 0
+    else:
+        amp_skew = amp_kurt = 0
+
+    # 计算低幅值比例（直接从points和weights计算）
+    total_points = weights.sum()  # 总非零点数（带权重）
+    if total_points > 0:
+        low_amp_mask = (points[:, 1] < 32)  # 幅值小于32的点
+        low_amp_count = weights[low_amp_mask].sum()
+        low_amp_ratio = low_amp_count / total_points
+    else:
+        low_amp_ratio = 0
+
+    x_bins = 36
+    x_edges = np.linspace(0, 360, x_bins + 1)
+
+    # 统计当前簇在每个区间的权重总和
+    x_hist, _ = np.histogram(points[:, 0], bins=x_edges,
+                             range=(0, 360))
+    variance = np.var(x_hist)
+    zero_count = np.sum(x_hist == 0)
+    total = len(x_hist)
+    a = zero_count / total
+
+    result_list.extend([
+        dist_x,  # 聚类中心X距离
+        # dist_y,  # 聚类中心Y距离
+
+        low_amp_ratio,  # 原有低幅值比例
+
+        amp_skew,  # 全局幅值偏斜度
+        amp_kurt,  # 全局幅值陡峭度
+        a,
+
+
+    ])
+
+    return  result_list