- table: "product_category" # this defines the table to be created
dest-zone: "02-silver" # defines the zone where delta table will be generated
source-storage-mount: "/mnt/datalake" # defines the mounting point where the source data is located
source-storage-path: # location in the Data Lakehouse where the source files are stored
source-storage-path-depth: "*/*/*" # the depth under the path where the system will find files
source-data-type: "hive" # the type of data being queried, supported types are: hive, delta, parquet, json, csv, text
dest-database: "silver" # destination zone in the Data Lakehouse
dest-area: "master" # area within the Data Lakehouse zone where the data will be stored
dest-table: "product_category" # Name of the table to be created within the Data Lakehouse
expectations-behaviour: "warn" # how to handle expecation failures : warn or fail
merge-type: "overwrite"
# the type of Merge to apply to the destination table in the Data Lakehouse:
# supported Merge-type values are:
# overwrite : default : overwrites the destination with the source table
# append : appends the data from the source table to the destination table
# overwrite-by-key : deletes all rows from the destination where they exist in the source, then appends the source to the destination
# type-1 : inserts new rows into destination where key(s) in source and destination do not match
# updates rows in destination where key(s) in source and destination match
# type-1-identity : inserts new rows into destination where identity-key in source and destination do not match
# updates rows in destination where identity-key in source and destination match
# excluded identity-key from the update fields array
# type-2 : inserts new rows into destination where identity-key in source and destination do not match
# inserts new rows into destination where identity-key in source and destination match and type-2-keys do not match
# invalidate and expire prior valid version of the record
# updates rows in destination where identity-key in source and destination match and type-2-keys match
select-expression: # a select expression used to query and transform the source delta tables and create a new table/entity
"select
product_category_name
, category_group
, category_parent_group
from silver.categorys
"
merge-columns: # An array of columns to be used as the merge keys when merge strategy requires columns to use for merging or deleting
qualify-order: # utilized in type-II transformation when you need to specify a custom order by for your qualify statement
where: # an array of predicate values to be used to filter the data
exclude-cols: # an array of columns to exclude from the final dataframe definitaion
aggregation: # allows you to express aggregations for your dataframe
group-by: # columns to group by
aggregates: # aggregations to apply
pivot: # allows you to apply pivot functionality to the final dataframe
group-by: # array of columns to group by in the pivot
pivot-val: # row value to convert to column
pivot-agg-val: # row value to convert to value for the converted column header - current functionality pivots on strings and uses first aggregation
final-select-def: # final select definition you may want to impose after all transforms complete
final-select-where: # an array of select predicates to impose on your final select statement
table-def:
columns:
- product_category_name string comment 'Product category name'
- category_group string comment 'Product Group'
- category_parent_group string comment 'Product Parent Group'
comment: Common Product Category Table
tbl-properties:
- "'zone' = 'gold'"
- "'type' = 'master'"
- "'quality' = 'curated'"
expectations: # great expectation testing suite definition
- expectation_type : expect_column_values_to_not_be_null
kwargs:
"column" : "product_category_name"
meta: