Splitting Flows Data Flow Spec into main and flow files

A data flow spec can be broken up into a main (ending with _main.json|yaml) and flow (ending with _flow.json|yaml) spec file.

The main spec file will contain the main pipeline configuration and the flow spec file will contain the flow configuration and are joined by having the same dataFlowId.

To achieve this, the main spec file will have the structure described in the Creating a Flows Data Flow Spec Reference schema without the _flow-group-configuration property and this will instead be moved to the flow spec file The flow spec file will have the structure described in the _flow-group-configuration schema but the dataFlowID is now required as it will serve as the link to the main spec.

Below is a sample of how a Data Flow Spec can be split into main and flow spec files:

Original Data Flow Spec file (single unsplit file):

{
    "dataFlowId": "etp5stg",
    "dataFlowGroup": "etp5",
    "dataFlowType": "flow",
    "targetFormat": "delta",
    "targetDetails": {
        "table": "staging_table_mrg_p5",
        "schemaPath": "",
        "tableProperties": {
            "delta.enableChangeDataFeed": "true"
        },
        "partitionColumns": []
    },
    "cdcSettings": {
        "keys": [
            "CONTRACT_ID"
        ],
        "sequence_by": "EXTRACT_DTTM",
        "where": "",
        "ignore_null_updates": true,
        "except_column_list": [
            "__START_AT",
            "__END_AT"
        ],
        "scd_type": "2",
        "track_history_column_list": [],
        "track_history_except_column_list": []
    },
    "dataQualityExpectationsEnabled": false,
    "quarantineMode": "off",
    "quarantineTargetDetails": {},
    "flowGroups": [

        {
            "flowGroupId": "et1",
            "stagingTables": {
                "staging_table_apnd_p5": {
                    "type": "ST",
                    "schemaPath": ""
                }
            },
            "flows": {
                "f_contract": {
                    "flowType": "append_view",
                    "flowDetails": {
                        "targetTable": "staging_table_apnd_p5",
                        "sourceView": "v_brz_contract"
                    },
                    "views": {
                        "v_brz_contract": {
                            "mode": "stream",
                            "sourceType": "delta",
                            "sourceDetails": {
                                "database": "main.bronze_test_4",
                                "table": "contract",
                                "cdfEnabled": true,
                                "selectExp": [
                                    "*"
                                ],
                                "whereClause": []
                            }
                        }
                    }
                },
                "f_loan": {
                    "flowType": "append_view",
                    "flowDetails": {
                        "targetTable": "staging_table_apnd_p5",
                        "sourceView": "v_brz_loan"
                    },
                    "views": {
                        "v_brz_loan": {
                            "mode": "stream",
                            "sourceType": "delta",
                            "sourceDetails": {
                                "database": "main.bronze_test_4",
                                "table": "loan",
                                "cdfEnabled": true,
                                "selectExp": [
                                    "*"
                                ],
                                "whereClause": []
                            }
                        }
                    }
                },
                "f_merge": {
                    "flowType": "merge",
                    "flowDetails": {
                        "targetTable": "staging_table_mrg_p5",
                        "sourceView": "staging_table_apnd_p5"
                    }
                }
            }
        }
    ]
}

Split Data Flow Spec into main and flow files:

Main file:

{
    "dataFlowId": "etp5stg",
    "dataFlowGroup": "etp5",
    "dataFlowType": "flow",
    "targetFormat": "delta",
    "targetDetails": {
        "table": "staging_table_mrg_p5",
        "schemaPath": "",
        "tableProperties": {
            "delta.enableChangeDataFeed": "true"
        },
        "partitionColumns": []
    },
    "cdcSettings": {
        "keys": [
            "CONTRACT_ID"
        ],
        "sequence_by": "EXTRACT_DTTM",
        "where": "",
        "ignore_null_updates": true,
        "except_column_list": [
            "__START_AT",
            "__END_AT"
        ],
        "scd_type": "2",
        "track_history_column_list": [],
        "track_history_except_column_list": []
    },
    "dataQualityExpectationsEnabled": false,
    "quarantineMode": "off",
    "quarantineTargetDetails": {}
}

Flow file:

{
    "dataFlowId": "etp5stg",
    "flowGroupId": "et1",
    "stagingTables": {
        "staging_table_apnd_p5": {
            "type": "ST",
            "schemaPath": ""
        }
    },
    "flows": {
        "f_contract": {
            "flowType": "append_view",
            "flowDetails": {
                "targetTable": "staging_table_apnd_p5",
                "sourceView": "v_brz_contract"
            },
            "views": {
                "v_brz_contract": {
                    "mode": "stream",
                    "sourceType": "delta",
                    "sourceDetails": {
                        "database": "main.bronze_test_4",
                        "table": "contract",
                        "cdfEnabled": true,
                        "selectExp": [
                            "*"
                        ],
                        "whereClause": []
                    }
                }
            }
        },
        "f_loan": {
            "flowType": "append_view",
            "flowDetails": {
                "targetTable": "staging_table_apnd_p5",
                "sourceView": "v_brz_loan"
            },
            "views": {
                "v_brz_loan": {
                    "mode": "stream",
                    "sourceType": "delta",
                    "sourceDetails": {
                        "database": "main.bronze_test_4",
                        "table": "loan",
                        "cdfEnabled": true,
                        "selectExp": [
                            "*"
                        ],
                        "whereClause": []
                    }
                }
            }
        },
        "f_merge": {
            "flowType": "merge",
            "flowDetails": {
                "targetTable": "staging_table_mrg_p5",
                "sourceView": "staging_table_apnd_p5"
            }
        }
    }
}