我不完全确定这是否是主题,所以如果不是,请告诉我。我在阅读机器学习文献时不断看到 YAML 文件的想法。我的问题是,YAML 文件到底是什么,它与机器学习和数据科学项目有什么关系?
什么是 YAML 文件,它在机器学习环境中的用途是什么?
数据挖掘
机器学习
神经网络
深度学习
序列化
2021-10-05 02:59:09
2个回答
有很多方法可以序列化数据。例如,JSON、XML 和 CSV 是可能的序列化方法。另一种方式是 YAML。它是人类可读的,这意味着它包含使用常用单词编写的可读文本。这反对二进制序列化。每当您打开文件时,您将无法看到写入的内容,因为它是以二进制格式序列化的,而不是人类可读的格式。您可能会在这里找到它,它主要用于配置文件。在 ML 上下文中,根据您的任务,您可能会面临不同的文件格式。例如,有监督的 ML 任务广泛使用的格式之一是 CSV,尽管您可能会发现其他人使用他们最喜欢的编码。您可能还想进行自己的序列化,这在公司内部操作中很少见。
除了数据之外,yaml 文件通常用作人类可读的配置文件……这里是一个 docker compose yaml 文件的示例
# version: "3.5"
version: '3'
# version: '2'
services:
nodejs-admin:
image: ${GKE_APP_IMAGE_ADMIN}
hostname: admin
container_name: loud_admin
restart: always
depends_on:
- loudmongo
# - loudmail
- loudmail
volumes:
- /cryptdata4/var/log/loudlog-admin:/loudlog-admin
- /cryptdata5/var/log/blobs:/blobs
- /cryptdata5/var:/cryptdata5/var
- /cryptdata5/var/tools:/tools
- /cryptdata6/var/log/loudlog-enduser:/loudlog-enduser
# - ${TMPDIR_GRAND_PARENT}/curr/loud-build/${PROJECT_ID}/webapp/admin/bundle:/tmp
- ${TMPDIR_GRAND_PARENT}/${bundleNormal}/loud-build/${PROJECT_ID}/webapp/admin/bundle:/tmp
# - $SOURCE_REPO_DIR/tests:/tmp/tests
environment:
- MONGO_SERVICE_HOST=loudmongo
- MONGO_SERVICE_PORT=$GKE_MONGO_PORT
- MONGO_URL=mongodb://loudmongo:$GKE_MONGO_PORT/admin
- METEOR_SETTINGS=${METEOR_SETTINGS}
# when sending to port 587 will NOT get routed through email content filter however port 25 will get routed
- MAIL_URL=smtp://support@${GKE_DOMAIN_NAME}:ignore_this@loudmail:587/
# - MAIL_URL=smtp://support@${GKE_DOMAIN_NAME}:ignore_this@loudmail:25/
- GKE_NOTIF_TASK_OVERDUE=$GKE_NOTIF_TASK_OVERDUE
- GKE_NOTIF_PLANS_RECUR=$GKE_NOTIF_PLANS_RECUR
- GKE_NOTIF_SHIFTS_RUN=$GKE_NOTIF_SHIFTS_RUN
- GKE_NOTIF_LOAD_RUN=$GKE_NOTIF_LOAD_RUN
- GKE_NOTIF_ORGS_HEALTH_RUN=$GKE_NOTIF_ORGS_HEALTH_RUN
links:
- loudmongo
# - loudmail
- loudmail
ports:
- 127.0.0.1:${PORT_ADMIN}:3001
# networks:
# - loudthink-network
working_dir: /tmp
command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
nodejs-enduser:
# image: ${GKE_APP_REPO_PREFIX}/${PROJECT_ID}/loudweb-enduser
image: ${GKE_APP_IMAGE_ENDUSER}
hostname: enduser
container_name: loud_enduser
restart: always
depends_on:
- nodejs-admin
- loudmongo
# - loudmail
- loudmail
volumes:
- /cryptdata6/var/log/loudlog-enduser:/loudlog-enduser
- /cryptdata5/var/log/blobs:/blobs
# - ${TMPDIR_GRAND_PARENT}/curr/loud-build/${PROJECT_ID}/webapp/enduser/bundle:/tmp
# - ${TMPDIR_GRAND_PARENT}/curr/loud-build/${PROJECT_ID}/webapp/admin/bundle/programs/server/assets/app/config/apn-cert.pem:/private/config/apn-cert.pem
# - ${TMPDIR_GRAND_PARENT}/curr/loud-build/${PROJECT_ID}/webapp/admin/bundle/programs/server/assets/app/config/apn-key.pem:/private/config/apn-key.pem
- ${TMPDIR_GRAND_PARENT}/${bundleNormal}/loud-build/${PROJECT_ID}/webapp/enduser/bundle:/tmp
- ${TMPDIR_GRAND_PARENT}/${bundleNormal}/loud-build/${PROJECT_ID}/webapp/admin/bundle/programs/server/assets/app/config/apn-cert.pem:/private/config/apn-cert.pem
- ${TMPDIR_GRAND_PARENT}/${bundleNormal}/loud-build/${PROJECT_ID}/webapp/admin/bundle/programs/server/assets/app/config/apn-key.pem:/private/config/apn-key.pem
environment:
- MONGO_SERVICE_HOST=loudmongo
- MONGO_SERVICE_PORT=$GKE_MONGO_PORT
- MONGO_URL=mongodb://loudmongo:$GKE_MONGO_PORT/admin
- METEOR_SETTINGS=${METEOR_SETTINGS}
# when sending to port 587 will NOT get routed through email content filter however port 25 will get routed
- MAIL_URL=smtp://support@${GKE_DOMAIN_NAME}:ignore@loudmail:587/
# - MAIL_URL=smtp://support@${GKE_DOMAIN_NAME}:ignore@loudmail:25/
links:
- loudmongo
# - loudmail
- loudmail
ports:
- 127.0.0.1:${PORT_ENDUSER}:3000
# networks:
# - loudthink-network
working_dir: /tmp
command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf
loudmongo:
# image: mongo
# image: mongo:3.6
image: $GKE_MONGO_IMAGE
hostname: mongo
container_name: loud_mongo
restart: always
ports:
- 127.0.0.1:$GKE_MONGO_PORT:$GKE_MONGO_PORT
# logpath:
# - /data/logs/mongo.log
volumes:
- /cryptdata7/var/data/db:/data/db
# - /cryptdata7/var/data/logs:/data/logs
loud-devops:
# image: dind
image: localhost:5000/hygge/loudweb-dind/1904082202
hostname: devops
container_name: loud_devops
restart: always
ports:
- 127.0.0.1:9000:9000
# expose:
# - 9000
# networks:
# - loudthink-network
# environment:
# - SSH_AUTH_SOCK=$SSH_AUTH_SOCK
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /usr/bin/docker:/usr/bin/docker
# - $SSH_AUTH_SOCK:/ssh-agent
- /usr/lib/x86_64-linux-gnu/libltdl.so.7:/usr/lib/x86_64-linux-gnu/libltdl.so.7
- /usr/lib/x86_64-linux-gnu/libgpm.so.2:/usr/lib/x86_64-linux-gnu/libgpm.so.2
- /home/khepri/src/github.com/loudthink:/inner_home/khepri/src/github.com/loudthink
- /home/khepri/.docker:/inner_home/khepri/.docker
- /cryptdata5/var/tools/inner_home/khepri:/inner_home/khepri
- $GKE_DIND_SUPERVISOR_LOG_DIR:/var/log/supervisor
- /cryptdata5/var/tools/usr/local/go:/usr/local/go
- /cryptdata6/var/log/tmp/khepri01:/cryptdata6/var/log/tmp/khepri01
- /cryptdata6/var/log/tmp/shared:/cryptdata6/var/log/tmp/shared
- /cryptdata5/var/tools/usr/local/bin:/usr/local/bin
- /cryptdata:/cryptdata
- /cryptdata2:/cryptdata2
- /cryptdata4:/cryptdata4
- /cryptdata5:/cryptdata5
- /cryptdata6:/cryptdata6
- /cryptdata7:/cryptdata7
# - /etc/letsencrypt/live:/etc/letsencrypt/live
# - /etc/letsencrypt/archive/medssenger-dev.medstack.net:/etc/letsencrypt/archive/medssenger-dev.medstack.net
- /usr/local/ssl:/usr/local/ssl
# following line HOME is only used during install as referenced in loudspeed/build/bin/setup_devops_sudo_cmds.sh to copy over .ssh files
# - ${HOME}:${HOME}
# - /var/lib/docker:/var/lib/docker
command: /usr/bin/supervisord -c /etc/supervisor/supervisord.conf
loudmail:
image: tvial/docker-mailserver:latest
hostname: mail
domainname: ${GKE_DOMAIN_NAME}
container_name: loud_mail
restart: always
environment:
- PERMIT_DOCKER=network
- SSL_TYPE=letsencrypt
- ONE_DIR=1
- DMS_DEBUG=1
- SPOOF_PROTECTION=0
- REPORT_RECIPIENT=1
- ENABLE_SPAMASSASSIN=0
- ENABLE_CLAMAV=0
- ENABLE_FAIL2BAN=1
- ENABLE_POSTGREY=0
cap_add:
- NET_ADMIN
- SYS_PTRACE
ports:
- "25:25"
- "587:587"
- "465:465"
volumes:
- ${GKE_MAIL_EHOOK_DIR}/data/:/var/mail/
# - ${GKE_MAIL_EHOOK_DIR}/state/:/var/mail-state/
- ${GKE_MAIL_EHOOK_DIR}/state/:${GKE_MAIL_EHOOK_INNER_STATE_DIR}/
- ${GKE_MAIL_EHOOK_DIR}/config/:/tmp/docker-mailserver/
- ${GKE_MAIL_EHOOK_OUTPUT_DIR}/:${GKE_MAIL_EHOOK_INNER_OUTPUT_DIR}/
- ${GKE_MAIL_EHOOK_HOST_DIR}/:/local_ehook/
# - /etc/letsencrypt/:/etc/letsencrypt/
- ${GKE_LETSENCRYPT_DIR}/:/etc/letsencrypt/
- ${GKE_MAIL_EHOOK_DIR}/log/:/var/log/mail/
与任何配置文本文件一样,模板的概念很有用......例如,如果您的名为 GKE_COMPOSE_YAML 的 yaml 文件包含 say
foobar: some_token
some_token
您可以通过在预处理阶段进行字符串搜索和替换来更新硬编码
sed -i -- "s|some_token|${my_current_token}|g" $GKE_COMPOSE_YAML
根据您的进程用于解析 yaml 文件的内容,它可能允许按照 yaml 文件中的以下代码段将环境变量嵌入到您的 yaml 文件中
foobar: $my_current_token
当您的环境包含可能已使用定义的 var my_current_token 时
export my_current_token=something.cool.here
其它你可能感兴趣的问题