|
|
import numpy as np
|
|
|
from collections import defaultdict
|
|
|
def cel_index(freq):
|
|
|
#####聚类########
|
|
|
points = np.array(list(freq.keys()))
|
|
|
weights = np.array([freq[tuple(p)] for p in points])
|
|
|
# 初始化聚类中心:选择权重最大的两个点
|
|
|
top_indices = np.argsort(weights)[-2:]
|
|
|
centroids = points[top_indices].astype(float)
|
|
|
# K-means聚类
|
|
|
max_iters = 100
|
|
|
tolerance = 1e-6
|
|
|
prev_centroids = None
|
|
|
|
|
|
for _ in range(max_iters):
|
|
|
# 计算每个点到各中心的距离
|
|
|
dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids),
|
|
|
360 - np.abs(points[:, np.newaxis, :] - centroids))
|
|
|
dists = np.sqrt((dists_temp ** 2).sum(axis=2))
|
|
|
|
|
|
# 分配点到最近的中心
|
|
|
cluster_labels = np.argmin(dists, axis=1)
|
|
|
|
|
|
# 更新中心位置
|
|
|
new_centroids = []
|
|
|
for i in range(2):
|
|
|
mask = (cluster_labels == i)
|
|
|
if np.any(mask):
|
|
|
weighted_sum = (points[mask].T * weights[mask]).sum(axis=1)
|
|
|
total_weight = weights[mask].sum()
|
|
|
new_center = weighted_sum / total_weight
|
|
|
new_centroids.append(new_center)
|
|
|
else:
|
|
|
new_centroids.append(centroids[i]) # 防止空簇
|
|
|
|
|
|
new_centroids = np.array(new_centroids)
|
|
|
|
|
|
# 检查收敛
|
|
|
if prev_centroids is not None and np.linalg.norm(new_centroids - prev_centroids) < tolerance:
|
|
|
break
|
|
|
|
|
|
centroids = new_centroids
|
|
|
prev_centroids = centroids.copy()
|
|
|
|
|
|
# 最终分配
|
|
|
dists_temp = np.minimum(np.abs(points[:, np.newaxis, :] - centroids),
|
|
|
360 - np.abs(points[:, np.newaxis, :] - centroids))
|
|
|
dists = np.sqrt((dists_temp ** 2).sum(axis=2))
|
|
|
cluster_labels = np.argmin(dists, axis=1)
|
|
|
|
|
|
# 三倍标准差过滤离群点并计算统计信息
|
|
|
result_list = [] # 存储最终12个值的列表
|
|
|
filtered_data = [] # 存储过滤后的点信息用于可视化
|
|
|
|
|
|
for cluster_idx in range(2):
|
|
|
mask = (cluster_labels == cluster_idx)
|
|
|
cluster_pts = points[mask]
|
|
|
cluster_wts = weights[mask]
|
|
|
|
|
|
# 直接使用原始数据计算中心
|
|
|
weighted_sum = (cluster_pts.T * cluster_wts).sum(axis=1)
|
|
|
total_weight = cluster_wts.sum()
|
|
|
center = weighted_sum / total_weight # 原始中心(不再需要过滤后的center_filtered)
|
|
|
|
|
|
# 计算统计信息(直接使用原始数据)
|
|
|
total_points = total_weight # 总权重即为有效点数
|
|
|
|
|
|
# 计算坐标绝对偏差(加权平均)
|
|
|
if total_weight > 0 and len(cluster_pts) > 0:
|
|
|
x_abs_diff = np.abs(cluster_pts[:, 0] - center[0])
|
|
|
avg_x_distance = np.sum(x_abs_diff * cluster_wts) / total_weight
|
|
|
|
|
|
y_abs_diff = np.abs(cluster_pts[:, 1] - center[1])
|
|
|
avg_y_distance = np.sum(y_abs_diff * cluster_wts) / total_weight
|
|
|
# 新增:计算最大/最小点距离中心的距离
|
|
|
# 找到横坐标最大和最小的点
|
|
|
max_x_point = cluster_pts[np.argmax(cluster_pts[:, 0])]
|
|
|
min_x_point = cluster_pts[np.argmin(cluster_pts[:, 0])]
|
|
|
max_y_point = cluster_pts[np.argmax(cluster_pts[:, 1])]
|
|
|
min_y_point = cluster_pts[np.argmin(cluster_pts[:, 1])]
|
|
|
|
|
|
# 计算这些点到中心的距离(欧氏距离)
|
|
|
max_x_dist = np.linalg.norm(max_x_point - center)
|
|
|
min_x_dist = np.linalg.norm(min_x_point - center)
|
|
|
max_y_dist = np.linalg.norm(max_y_point - center)
|
|
|
min_y_dist = np.linalg.norm(min_y_point - center)
|
|
|
else:
|
|
|
avg_x_distance = 0
|
|
|
avg_y_distance = 0
|
|
|
max_x_dist = min_x_dist = max_y_dist = min_y_dist = 0
|
|
|
|
|
|
result_list.extend([
|
|
|
center[0], # 中心横坐标(原始计算)
|
|
|
center[1], # 中心纵坐标(原始计算)
|
|
|
avg_x_distance, # 横坐标到中心的平均距离
|
|
|
avg_y_distance, # 纵坐标到中心的平均距离
|
|
|
max_x_dist, # 横坐标最大点到中心的距离
|
|
|
min_x_dist, # 横坐标最小点到中心的距离
|
|
|
max_y_dist, # 纵坐标最大点到中心的距离
|
|
|
min_y_dist # 纵坐标最小点到中心的距离
|
|
|
])
|
|
|
# 添加两个聚类中心的距离特征
|
|
|
if len(centroids) == 2:
|
|
|
# 计算横向距离(x方向)
|
|
|
dist_x = abs(centroids[0][0] - centroids[1][0])
|
|
|
|
|
|
# 计算纵向距离(y方向)
|
|
|
dist_y = abs(centroids[0][1] - centroids[1][1])
|
|
|
|
|
|
else:
|
|
|
# 如果聚类中心不足两个,添加默认值
|
|
|
result_list.extend([0, 0])
|
|
|
|
|
|
# 全局幅值统计
|
|
|
if len(points) > 0:
|
|
|
# 提取所有幅值(考虑权重)
|
|
|
amps = points[:, 1] # 幅值在points的第二列
|
|
|
weighted_amps = np.repeat(amps, weights) # 根据权重重复幅值
|
|
|
|
|
|
# 幅值偏斜度/陡峭度(使用无偏估计)
|
|
|
n = len(weighted_amps)
|
|
|
mean_amp = np.mean(weighted_amps)
|
|
|
std_amp = np.std(weighted_amps, ddof=1) # 样本标准差
|
|
|
|
|
|
if std_amp > 1e-8: # 避免除零
|
|
|
amp_skew = np.sum((weighted_amps - mean_amp) ** 3) / (n * std_amp ** 3)
|
|
|
amp_kurt = np.sum((weighted_amps - mean_amp) ** 4) / (n * std_amp ** 4) - 3
|
|
|
else:
|
|
|
amp_skew = amp_kurt = 0
|
|
|
else:
|
|
|
amp_skew = amp_kurt = 0
|
|
|
|
|
|
# 计算低幅值比例(直接从points和weights计算)
|
|
|
total_points = weights.sum() # 总非零点数(带权重)
|
|
|
if total_points > 0:
|
|
|
low_amp_mask = (points[:, 1] < 32) # 幅值小于32的点
|
|
|
low_amp_count = weights[low_amp_mask].sum()
|
|
|
low_amp_ratio = low_amp_count / total_points
|
|
|
else:
|
|
|
low_amp_ratio = 0
|
|
|
|
|
|
x_bins = 36
|
|
|
x_edges = np.linspace(0, 360, x_bins + 1)
|
|
|
|
|
|
# 统计当前簇在每个区间的权重总和
|
|
|
x_hist, _ = np.histogram(points[:, 0], bins=x_edges,
|
|
|
range=(0, 360))
|
|
|
variance = np.var(x_hist)
|
|
|
zero_count = np.sum(x_hist == 0)
|
|
|
total = len(x_hist)
|
|
|
a = zero_count / total
|
|
|
|
|
|
result_list.extend([
|
|
|
dist_x, # 聚类中心X距离
|
|
|
# dist_y, # 聚类中心Y距离
|
|
|
|
|
|
low_amp_ratio, # 原有低幅值比例
|
|
|
|
|
|
amp_skew, # 全局幅值偏斜度
|
|
|
amp_kurt, # 全局幅值陡峭度
|
|
|
a,
|
|
|
|
|
|
|
|
|
])
|
|
|
|
|
|
return result_list
|