|
|
# tools/excel_merge.py
|
|
|
from common.logger import tool_logger, error_logger
|
|
|
import pandas as pd
|
|
|
|
|
|
def read_file(file_path):
|
|
|
ext = file_path.lower().split(".")[-1]
|
|
|
|
|
|
if ext in ["xlsx", "xls"]:
|
|
|
df = pd.read_excel(file_path, dtype=str)
|
|
|
|
|
|
elif ext == "csv":
|
|
|
df = None
|
|
|
for enc in ["utf-8", "gbk", "gb18030"]:
|
|
|
try:
|
|
|
df = pd.read_csv(file_path, encoding=enc, dtype=str)
|
|
|
break
|
|
|
except:
|
|
|
pass
|
|
|
|
|
|
if df is None:
|
|
|
tool_logger.exception("CSV编码无法识别")
|
|
|
raise Exception("CSV编码无法识别")
|
|
|
|
|
|
else:
|
|
|
tool_logger.exception("仅支持 xlsx/xls/csv")
|
|
|
raise Exception("仅支持 xlsx/xls/csv")
|
|
|
|
|
|
df.columns = df.columns.astype(str).str.strip()
|
|
|
for c in df.columns:
|
|
|
df[c] = df[c].astype(str).str.strip()
|
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
def merge_df(df1, df2, key1, key2, join_type):
|
|
|
if key1 not in df1.columns:
|
|
|
tool_logger.exception(f"文件1不存在字段:{key1}")
|
|
|
raise Exception(f"文件1不存在字段:{key1}")
|
|
|
|
|
|
if key2 not in df2.columns:
|
|
|
tool_logger.exception(f"文件2不存在字段:{key2}")
|
|
|
raise Exception(f"文件2不存在字段:{key2}")
|
|
|
|
|
|
merged_df = pd.merge(
|
|
|
df1,
|
|
|
df2,
|
|
|
left_on=key1,
|
|
|
right_on=key2,
|
|
|
how=join_type
|
|
|
)
|
|
|
|
|
|
return merged_df
|
|
|
|
|
|
|
|
|
def export_excel(df, path):
|
|
|
df.to_excel(path, index=False)
|
|
|
|
|
|
def build_preview(df):
|
|
|
return df.head(100).fillna("").to_html(
|
|
|
classes="table table-bordered table-hover preview-table",
|
|
|
table_id="previewTable",
|
|
|
index=False,
|
|
|
border=0
|
|
|
) |