Skip to main content

PrivaceraCloud Documentation

Sample CloudFormation template

:

The following CloudFormation template can be used to EMR cluster. You can modify the template based on your requirements (if required).

Common variables from the previous setup steps should be kept the same.

Sample CloudFormation template:

{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Create EMR Cluster - Native Ranger Integration with Privacera",
"Parameters": {
    "ClusterName": {
    "Description": "Name of the emr cluster",
    "Type": "String",
    "Default": "Privacera-EMR-Native-Ranger"
    },
    "EMRVersion": {
    "Description": "EMR Native Ranger integation is supported from 5.32 onwards. e.g. emr-5.32.0, emr-5.33.0, etc.",
    "Type": "String",
    "Default": "emr-5.32.0"
    },
    "MasterSecurityGroup": {
    "Description": "Security Group ID for EMR Master Node Group. e.g. sg-xxxxxxx",
    "Type": "String",
    "Default": ""
    },
    "SlaveSecurityGroup": {
    "Description": "Security Group ID for EMR Slave Node Group. e.g. sg-xxxxxxx",
    "Type": "String",
    "Default": ""
    },
    "ServiceAccessSecurityGroup": {
    "Description": "Security Group ID for EMR ServiceAccessSecurity. Fill this property only if you are creating EMR in a Private Network. e.g. sg-xxxxxxx",
    "Type": "String",
    "Default": ""
    },
    "NodeSubnetId": {
    "Description": "Subnet id for the cluster nodes. e.g. subnet-xxxx",
    "Type": "String",
    "Default": ""
    },
    "SecurityConfig": {
    "Description": "SecurityConfiguration name that will be attached to the EMR Cluster. e.g emr-native-privacera-sec-conf",
    "Type": "String",
    "Default": "emr-native-privacera-sec-conf"
    },
    "HiveMetaStoreWarehouseS3Path": {
    "Description": "Hive metastore warehouse s3 path. e.g. s3://hive-warehouse/data",
    "Type": "String",
    "Default": ""
    },
    "NodeKeyPair": {
    "Description": "An existing EC2 key pair to SSH into the node of cluster. e.g. privacera-test-pair",
    "Type": "String",
    "Default": ""
    },
    "NodeMarketType": {
    "Description": "Node Instance market type. e.g. SPOT, ON_DEMAND",
    "Type": "String",
    "Default": ""
    },
    "KdcAdminPassword": {
    "Description": "The password used within the cluster for the kadmin service.",
    "Type": "String",
    "Default": ""
    },
    "CrossRealmTrustPrincipalPassword": {
    "Description": "The cross-realm trust principal password, which much be identical across realms.",
    "Type": "String",
    "Default": ""
    },
    "RangerAuditsSetupScriptUrl": {
    "Description": "Get from--> PCloud Portal >> Access Manager >> Settings >> ApiKey >> Click Info Icon >> AWS EMR Native Ranger Plugin Section >> Ranger Audit Setup Script >> Copy URL",
    "Type": "String",
    "Default": ""
    },
    "EmrMasterNodeCount": {
    "Description": "Node count for Master. e.g. 1",
    "Type": "Number",
    "Default": 1
    },
    "EmrCoreNodeCount": {
    "Description": "Node count for Core. e.g. 1",
    "Type": "Number",
    "Default": 1
    },
    "EmrNodeInstanceType": {
    "Description": "e.g. m5.large, m5.2xlarge, r5.xlarge,etc. ",
    "Type": "String",
    "Default": ""
    },
    "EmrTerminationProtection": {
    "Description": "To enable termination protection. Can be true/false",
    "Type": "String",
    "Default": "true"
    },
    "EmrLogsPath": {
    "Description": "S3 location for emr logs storage. e.g. s3://privacera-emr/logs",
    "Type": "String",
    "Default": ""
    },
    "EmrNativePrivaceraInstanceRole": {
    "Description": "IAM Role which will be attached to all Instances in the Cluster. Should have minimal permissions. e.g. emr_native_privacera_restricted_instance_role",
    "Type": "String",
    "Default": "emr_native_privacera_restricted_instance_role"
    },
    "EmrDefaultRole": {
    "Description": "Default role attached to EMR Cluster for performing cluster related activities. This should be a pre-created one. e.g. EMR_DefaultRole",
    "Type": "String",
    "Default": "EMR_DefaultRole"
    },
    "EmrHiveMetastoreConnectionUrl": {
    "Description": "JDBC Connection URL for connecting to hive. e.g. jdbc:mysql://<jdbc-host>:3306/<hive-db-name>?createDatabaseIfNotExist=true",
    "Type": "String",
    "Default": ""
    },
    "EmrHiveMetastoreConnectionDriver": {
    "Description": "JDBC Driver Name. e.g. org.mariadb.jdbc.Driver",
    "Type": "String",
    "Default": ""
    },
    "EmrHiveMetastoreConnectionUsername": {
    "Description": "JDBC UserName",
    "Type": "String",
    "Default": ""
    },
    "EmrHiveMetastoreConnectionPassword": {
    "Description": "JDBC Password",
    "Type": "String",
    "Default": ""
    }
},
"Resources": {
    "EMRCLUSTER": {
    "Type": "AWS::EMR::Cluster",
    "Properties": {
        "Name": {
        "Ref": "ClusterName"
        },
        "KerberosAttributes": {
        "Realm": "EC2.INTERNAL",
        "KdcAdminPassword": {
            "Ref": "KdcAdminPassword"
        },
        "CrossRealmTrustPrincipalPassword": {
            "Ref": "CrossRealmTrustPrincipalPassword"
        }
        },
        "SecurityConfiguration": {
        "Ref": "SecurityConfig"
        },
        "VisibleToAllUsers": true,
        "EbsRootVolumeSize": 15,
        "Instances": {
        "MasterInstanceGroup": {
            "InstanceCount": {
            "Ref": "EmrMasterNodeCount"
            },
            "InstanceType": {
            "Fn::Sub": "${EmrNodeInstanceType}"
            },
            "Market": {
            "Fn::Sub": "${NodeMarketType}"
            },
            "Name": "Master Instance Group"
        },
        "CoreInstanceGroup": {
            "InstanceCount": {
            "Ref": "EmrCoreNodeCount"
            },
            "InstanceType": {
            "Fn::Sub": "${EmrNodeInstanceType}"
            },
            "Market": {
            "Fn::Sub": "${NodeMarketType}"
            },
            "Name": "Core Instance Group"
        },
        "Ec2KeyName": {
            "Ref": "NodeKeyPair"
        },
        "EmrManagedSlaveSecurityGroup": {
            "Fn::Sub": "${SlaveSecurityGroup}"
        },
        "EmrManagedMasterSecurityGroup": {
            "Fn::Sub": "${MasterSecurityGroup}"
        },
        "ServiceAccessSecurityGroup": {
            "Fn::Sub": "${ServiceAccessSecurityGroup}"
        },
        "Ec2SubnetId": {
            "Fn::Sub": "${NodeSubnetId}"
        },
        "TerminationProtected": {
            "Fn::Sub": "${EmrTerminationProtection}"
        }
        },
        "BootstrapActions": [
        {
            "Name": "Configure Ranger Audits for Master Node",
            "ScriptBootstrapAction": {
            "Path": "s3://elasticmapreduce/bootstrap-actions/run-if",
            "Args": [
                {
                "Fn::Sub": "instance.isMaster=true"
                },
                {
                "Fn::Sub": "wget ${RangerAuditsSetupScriptUrl}; chmod +x ./privacera_emr_native.sh ; sudo ./privacera_emr_native.sh"
                }
            ]
            }
        },
        {
            "Name": "Configure Ranger Audits for Worker Nodes",
            "ScriptBootstrapAction": {
            "Path": "s3://elasticmapreduce/bootstrap-actions/run-if",
            "Args": [
                {
                "Fn::Sub": "instance.isMaster=false"
                },
                {
                "Fn::Sub": "wget ${RangerAuditsSetupScriptUrl}; chmod +x ./privacera_emr_native.sh ; sudo ./privacera_emr_native.sh"
                }
            ]
            }
        }
        ],
        "Applications": [
        {
            "Name": "Hive"
        },
        {
            "Name": "Spark"
        },
        {
            "Name": "Zeppelin"
        },
        {
            "Name": "Livy"
        },
        {
            "Name": "Hue"
        }
        ],
        "Configurations": [
        {
            "Classification": "spark",
            "ConfigurationProperties": {
            "maximizeResourceAllocation": "true"
            },
            "Configurations": []
        },
        {
            "Classification": "spark-hive-site",
            "ConfigurationProperties": {
            "hive.metastore.warehouse.dir": {
                "Ref": "HiveMetaStoreWarehouseS3Path"
            }
            }
        },
        {
            "Classification": "hive-site",
            "ConfigurationProperties": {
            "javax.jdo.option.ConnectionURL": {
                "Fn::Sub": "${EmrHiveMetastoreConnectionUrl}"
            },
            "javax.jdo.option.ConnectionDriverName": {
                "Fn::Sub": "${EmrHiveMetastoreConnectionDriver}"
            },
            "javax.jdo.option.ConnectionUserName": {
                "Fn::Sub": "${EmrHiveMetastoreConnectionUsername}"
            },
            "javax.jdo.option.ConnectionPassword": {
                "Fn::Sub": "${EmrHiveMetastoreConnectionPassword}"
            },
            "hive.metastore.warehouse.dir": {
                "Ref": "HiveMetaStoreWarehouseS3Path"
            }
            }
        }
        ],
        "LogUri": {
        "Fn::Sub": "${EmrLogsPath}"
        },
        "JobFlowRole": {
        "Fn::Sub": "${EmrNativePrivaceraInstanceRole}"
        },
        "ServiceRole": {
        "Fn::Sub": "${EmrDefaultRole}"
        },
        "ReleaseLabel": {
        "Fn::Sub": "${EMRVersion}"
        }
    }
    }
}
}

To know about how to create a stack using CloudFormation template, refer Create CloudFormation stack topic.