我尝试了一种简单的算法来使用去识别技术对数据进行匿名化。但是代码对我不起作用。我想通过稍微改变字符串和整数的值来匿名化数据。数据样本可在此处获得
这是我尝试过的。
import pandas as pd
import uuid as u
import datetime as dt
# generate a pseudo-identifier sequesnce using python random number generator library uudi.
def uudi_generator(length):
uudi_list= list()
i=0
while i < length:
uudi_list.append(u.uuid4())
i+=1
return uudi_list
#import original originaL dataset
dataset = pd.read_csv('bankcredit-data.csv')
# pseudo identifier
sLength = len(dataset['housing'])
dataset.insert(0, 'uuid', pd.Series(uudi_generator(sLength), index=dataset.index))
# Transaction record attached to the original
dataset.insert(0, 'transaction_date', pd.Series([dt.datetime.now]*sLength, index=dataset.index))
#transcation record is attached to originaL data file
dataset.to_csv('bankcredit-data.csv')
#delete identifiabLe record from dataset
del dataset['firstnamme']
del dataset['lastname']
# export de-identified dataset as csv to be shared with the user
dataset.to_csv('deidentified-data.csv')