{"$schema":"https://domains.younndai.com/schemas/domain.json","domain":"yon.devops","version":"1.0","status":"active","state":"active","tier":"official","verified":true,"score":1,"notice":null,"description":"Deployment pipelines, infrastructure observability, incident management, and SRE workflows with SLO tracking","defaultMode":"struct","defaultProfile":"core","defaultFormat":"canon","records":[{"tag":"DEPLOY","fields":[{"name":"rid","type":"string","example":"dep:api-gw-v2.4.1","required":true,"description":"Record ID"},{"name":"service","type":"string","example":"api-gateway","required":false,"description":"Service Name"},{"name":"version","type":"string","example":"v2.4.1","required":false,"description":"Version"},{"name":"commit","type":"string","example":"a1b2c3d","required":false,"description":"Commit SHA"},{"enum":["dev","staging","production","canary"],"name":"environment","type":"string","example":"production","required":false,"description":"Target Environment: dev, staging, production, canary"},{"enum":["pending","running","success","failed","rollback"],"name":"status","type":"string","example":"success","required":false,"description":"Deploy Status: pending, running, success, failed, rollback"}],"description":"Deployment event — tracks version rollout with commit and status state machine"},{"tag":"PIPELINE","fields":[{"name":"rid","type":"string","example":"pipe:main-4521","required":true,"description":"Record ID"},{"name":"name","type":"string","example":"deploy-production","required":false,"description":"Pipeline Name"},{"name":"stage","type":"string","example":"integration-test","required":false,"description":"Current Stage"},{"enum":["queued","running","passed","failed","skipped"],"name":"status","type":"string","example":"passed","required":false,"description":"Stage Status: queued, running, passed, failed, skipped"},{"name":"duration_sec","type":"int","unit":"sec","example":"342","required":false,"description":"Duration in sec"},{"enum":["push","merge","schedule","manual","api"],"name":"trigger","type":"string","example":"merge","required":false,"description":"Trigger Type: push, merge, schedule, manual, api"}],"description":"CI/CD pipeline execution — stage-level tracking with duration and artifacts"},{"tag":"METRIC","fields":[{"name":"name","type":"string","example":"http_request_latency_p99","required":false,"description":"Metric Name"},{"name":"value","type":"float","example":"142.5","required":false,"description":"Metric Value"},{"name":"unit","type":"string","example":"ms","required":false,"description":"Unit"},{"name":"ts","type":"ts","example":"2026-03-01T14:00:00Z","required":false,"description":"Observation Timestamp as ISO 8601 timestamp"},{"enum":["prometheus","datadog","cloudwatch","grafana","custom","opentelemetry"],"name":"source","type":"string","example":"api-gateway-prod","required":false,"description":"Source (6 allowed values)"},{"name":"threshold","type":"float","example":"200.0","required":false,"description":"Alert Threshold"}],"description":"Infrastructure metric — time-series observation with unit and threshold alerting"},{"tag":"INCIDENT","fields":[{"name":"rid","type":"string","example":"inc:2026-0042","required":true,"description":"Record ID"},{"enum":["sev1","sev2","sev3","sev4"],"name":"severity","type":"string","example":"sev1","required":false,"description":"Severity: sev1, sev2, sev3, sev4"},{"name":"title","type":"string","example":"API latency spike >500ms","required":false,"description":"Incident Title"},{"enum":["detected","investigating","mitigated","resolved","postmortem"],"name":"status","type":"string","example":"investigating","required":false,"description":"Incident Status: detected, investigating, mitigated, resolved, postmortem"},{"name":"commander","type":"string","example":"on-call:platform","required":false,"description":"Incident Commander"},{"name":"ttd_min","type":"int","unit":"min","example":"3","required":false,"description":"Time to Detect in min"},{"name":"ttr_min","type":"int","unit":"min","example":"47","required":false,"description":"Time to Resolve in min"}],"description":"Operational incident — severity-classified event with SLO impact tracking"},{"tag":"SLO","fields":[{"name":"service","type":"string","example":"api-gateway","required":false,"description":"Service Name"},{"name":"objective","type":"string","example":"availability","required":false,"description":"Objective"},{"name":"target_pct","type":"float","unit":"%","range":[0,100],"example":"99.95","required":false,"description":"Target in % (0–100)"},{"name":"current_pct","type":"float","unit":"%","range":[0,100],"example":"99.97","required":false,"description":"Current in % (0–100)"},{"enum":["1h","6h","24h","7d","28d","30d"],"name":"window","type":"string","example":"30d","required":false,"description":"Measurement Window (6 allowed values)"},{"name":"budget_remaining","type":"float","unit":"%","example":"0.62","required":false,"description":"Budget Remaining in %"}],"description":"Service level objective — error budget tracking with burn-rate alerting"},{"tag":"CHANGE","fields":[{"name":"rid","type":"string","example":"chg:CHG-2026-0087","required":true,"description":"Record ID"},{"enum":["standard","normal","emergency"],"name":"type","type":"string","example":"normal","required":false,"description":"Change Type: standard, normal, emergency"},{"name":"description","type":"string","example":"Database schema migration v42","required":false,"description":"Description"},{"enum":["low","medium","high"],"name":"risk_level","type":"string","example":"medium","required":false,"description":"Risk Level: low, medium, high"},{"name":"approver","type":"string","example":"VP Engineering","required":false,"description":"Approver"},{"name":"rollback_plan","type":"string","example":"Revert migration via flyway undo","required":false,"description":"Rollback Plan"}],"description":"Change management record — ITIL-aligned change tracking with risk assessment"},{"tag":"RUNBOOK","fields":[{"name":"step","type":"int","example":"1","required":false,"description":"Step Number"},{"name":"action","type":"string","example":"Check API gateway error rate","required":false,"description":"Action Description"},{"name":"command","type":"string","example":"kubectl get pods -n api-gateway","required":false,"description":"Command"},{"name":"expected","type":"string","example":"All pods Running, 0 restarts","required":false,"description":"Expected Output"},{"name":"automated","type":"bool","example":"false","required":false,"description":"Automated indicator"}],"description":"Runbook step — executable operational procedure for incident response or maintenance"}],"schemaHash":null,"recordCount":7,"totalFieldCount":42,"meta":{"links":[{"url":"https://dora.dev/","type":"standard","label":"DORA Metrics"},{"url":"https://openslo.com/","type":"standard","label":"OpenSLO"},{"url":"https://sre.google/sre-book/table-of-contents/","type":"reference","label":"Google SRE Book"},{"url":"https://opentelemetry.io/","type":"standard","label":"OpenTelemetry"}],"related":[{"domain":"yon.infrastructure","reason":"Infrastructure metrics and topology","relationship":"often paired"},{"domain":"yon.security","reason":"Security incident response shares lifecycle","relationship":"data overlap"},{"domain":"yon.compliance","reason":"Change management audit requirements","relationship":"regulatory overlap"}],"tagline":"Deployment pipelines and SRE observability for infrastructure AI","use_cases":[{"id":"cicd-pipeline","tags":["DEPLOY","INCIDENT","METRIC","CHANGE"],"steps":["Trigger @BUILD from commit with artifact versioning","Execute @PIPELINE stages (lint, test, security scan)","Promote @DEPLOY to staging with canary release strategy","Monitor @METRIC (latency, error rate, saturation) post-deploy","Roll back @DEPLOY if @METRIC SLO breach detected"],"title":"CI/CD Pipeline Observability","example":"@DEPLOY rid=dep:1 | service=\"api-gateway\" | version=\"v2.4.1\" | environment=\"production\" | strategy=\"canary\" | status=\"healthy\"","tags_used":["BUILD","PIPELINE","DEPLOY","METRIC"],"description":"Correlate deployments, incidents, and metric anomalies to measure DORA metrics (lead time, deployment frequency, MTTR, change failure rate) and identify bottlenecks."},{"id":"incident-response","tags":["INCIDENT","METRIC","DEPLOY","CHANGE","CONFIG"],"steps":["Detect anomaly from @METRIC threshold or ML model","Create @INCIDENT with severity, affected services, and timeline","Correlate @LOG entries across services for root cause","Review recent @DEPLOY and @CONFIG changes in blast radius","Resolve @INCIDENT and publish blameless post-mortem"],"title":"Automated Incident Response","example":"@INCIDENT rid=inc:1 | severity=\"P1\" | title=\"API latency spike >500ms\" | status=\"investigating\" | affected_services=\"api-gateway,auth\"","tags_used":["METRIC","INCIDENT","LOG","DEPLOY","CONFIG"],"description":"Detect incidents from metric anomalies and alerts, auto-page on-call responders, and track resolution timelines. Link incidents to deployments for root cause analysis."}],"highlights":["DEPLOY","INCIDENT","SLO"],"tag_context":{"SLO":{"purpose":"Service level objective with target percentage, current measurement, and error budget tracking","when_to_use":"SLO definition, burn-rate alerting, reliability review meetings, error budget policy enforcement","related_standards":["Google SRE Book (Ch. 4)","OpenSLO"]},"CHANGE":{"purpose":"ITIL-aligned change management record with risk level, approval, and rollback plan","when_to_use":"Change advisory board submissions, emergency change logging, risk assessment, audit trails","related_standards":["ITIL Change Enablement","ISO 20000-1","IEEE 828 SCM"]},"DEPLOY":{"purpose":"Deployment event tracking version rollout across environments with commit lineage","when_to_use":"Release recording, rollback documentation, canary promotion, deployment frequency (DORA) measurement","related_standards":["OCI Image Spec","Kubernetes API","ArgoCD GitOps"]},"METRIC":{"purpose":"Infrastructure time-series metric with unit, source, and threshold for alert evaluation","when_to_use":"Observability dashboards, threshold alerting, capacity planning, SLI measurement","related_standards":["OpenTelemetry Metrics","Prometheus Exposition","StatsD"]},"RUNBOOK":{"purpose":"Executable operational procedure step for incident response, maintenance, or toil automation","when_to_use":"Incident response playbooks, operational procedure documentation, toil automation candidates, knowledge capture","related_standards":["ITIL Knowledge Mgmt","Ansible Playbook","PagerDuty Runbook"]},"INCIDENT":{"purpose":"Operational incident with severity, lifecycle status, commander assignment, and time-to-resolve","when_to_use":"Incident declaration, on-call response, postmortem documentation, MTTR tracking, SLO budget burn","related_standards":["PagerDuty Events API v2","ITIL Incident Mgmt","ISO 27035"]},"PIPELINE":{"purpose":"CI/CD pipeline execution with stage-level tracking, duration metrics, and trigger source","when_to_use":"Build observability, stage failure analysis, pipeline optimization, lead time measurement","related_standards":["Tekton Pipeline CRD","GitHub Actions Workflow","Jenkins Pipeline"]}}}}