# Intercambio de código #declaracion de componentes agente.sources = sr1 agente.channels = chn1 agente.sinks = snk1 #configuracion del source agente.sources.sr1.type = netcat agente.sources.sr1.bind = localhost agente.sources.sr1.port = 55555 agente.sources.sr1.channels = chn1 #configuracion del channel agente.channels.chn1.type = memory #La cantidad maxima de eventos almacenados en el canal agente.channels.chn1.capacity = 1000 #La cantidad maxima de eventos que el canal capturara de la fuente por transacción agente.channels.chn1.transactionCapacity = 100 #definimos la configuracion del Sink agente.sinks.snk1.type = hdfs agente.sinks.snk1.hdfs.path = hdfs://node1:8020/user/alumno/flume-puerto #DataStream no comprimira el archivo de salida agente.sinks.snk1.hdfs.fileType = DataStream agente.sinks.snk1.channel = chn1 sudo flume-ng agent --conf /etc/flume-ng/conf --conf-file ejemplo_flume.conf -name agente -Dflume.root.logger=INFO,console [alumno@pasarela ~]$ https://file.io/OEHdVoQrwmHb -rw-r--r-- 2 alumno supergroup 2893226 2022-02-21 00:39 filmoteca.csv sudo rm /containers/dfs/dn/current/BP-1483051556-172.18.0.2-1598871810253/current/finalized/subdir0/subdir25/blk_1073748451 https://www.udrop.com/6ogd/distribuidores.parquet https://www.udrop.com/6oge/paises.avro https://www.udrop.com/6ogi/demo.parquet Componente1 35 Componente2 22 Componente3 Componente4 129 Componente5 -1 Componente6 -999 CREATE TABLE IF NOT EXISTS articulos (articulo String, precio Int) COMMENT 'Detalles tabla articulos' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n' STORED AS TEXTFILE; sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 \ --target-dir /user/alumno/movies --table movie -m 1 --driver com.mysql.jdbc.Driver --as-parquetfile sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 \ --target-dir /user/alumno/peliculas --table movie --hive-import --hive-table movies -m 2 \ --driver com.mysql.jdbc.Driver sqoop import --connect jdbc:mysql://node1:3306/movielens --username root --password hadoop123 \ --table movie --columns "name, year" --where "year > 1998" --fields-terminated-by ',' \ --target-dir /user/root/peliculas --compression-codec snappy -m 1 sqoop import --connect jdbc:mysql://node1/movielens --username root \ --password hadoop123 --target-dir /user/root/peliculas --table movie \ --fields-terminated-by ',' -m 1 --driver com.mysql.jdbc.Driver ## 1 sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 --target-dir /user/root/pelisdel98 --table movie --where "year > 1998" --fields-terminated-by ';' -m 2 --driver com.mysql.jdbc.Driver ## 2 mysql> create table movie98 (id INT NOT NULL PRIMARY KEY, nombre VARCHAR(75), ANIO INT); ## 3 sqoop export --connect jdbc:mysql://node1/movielens --username root --password hadoop123 --table movie98 --input-fields-terminated-by ';' --export-dir /user/root/pelisdel98 --update-mode allowinsert -m 1 #!/bin/bash echo "Comprobando si el clusterCDH6 esta inicializado..." sudo docker stop node1.essentials & process_id=$! wait $process_id echo "Iniciando clusterCDH6..." sudo docker start node1.essentials & process_id=$! wait $process_id sleep 6 ssh root@node1 'echo -e "172.18.0.1 pasarela.essentials pasarela" >> /etc/hosts' echo "Proceso finalizado. El cluster estara disponible en unos minutos..." ======================================== https://www.udrop.com/6pPb/paises.avro ========================================== # Nombramos a los componentes del agente NetcatAvroAgent.sources = Netcat NetcatAvroAgent.channels = FileChannel NetcatAvroAgent.sinks = AvroSink # Describimos el origen netcat en localhost:44444 NetcatAvroAgent.sources.Netcat.type = netcat NetcatAvroAgent.sources.Netcat.bind = localhost NetcatAvroAgent.sources.Netcat.port = 44444 # Describimos el destino como Avro en localhost:10003 NetcatAvroAgent.sinks.AvroSink.type = avro NetcatAvroAgent.sinks.AvroSink.hostname = localhost NetcatAvroAgent.sinks.AvroSink.port = 10003 # Unimos el origen y el destino a través del canal de fichero NetcatAvroAgent.sources.Netcat.channels = FileChannel NetcatAvroAgent.sinks.AvroSink.channel = FileChannel NetcatAvroAgent.channels.FileChannel.type = file NetcatAvroAgent.channels.FileChannel.dataDir = /home/alumno/practicas_curso/flume/data NetcatAvroAgent.channels.FileChannel.checkpointDir = /home/alumno/practicas_curso/flume/checkpoint ===== avro-hdfs.conf ==================================== # Nombramos a los componentes del agente AvroHdfsAgent.sources = AvroSource AvroHdfsAgent.channels = MemChannel AvroHdfsAgent.sinks = HdfsSink # Describimos el origen como Avro en localhost:10003 AvroHdfsAgent.sources.AvroSource.type = avro AvroHdfsAgent.sources.AvroSource.bind = localhost AvroHdfsAgent.sources.AvroSource.port = 10003 # Describimos el destino HDFS AvroHdfsAgent.sinks.HdfsSink.type = hdfs AvroHdfsAgent.sinks.HdfsSink.hdfs.path = /user/alumno/flume/avro_data/ AvroHdfsAgent.sinks.HdfsSink.hdfs.fileType = DataStream AvroHdfsAgent.sinks.HdfsSink.hdfs.writeFormat = Text # Unimos el origen y el destino AvroHdfsAgent.sources.AvroSource.channels = MemChannel AvroHdfsAgent.sinks.HdfsSink.channel = MemChannel AvroHdfsAgent.channels.MemChannel.type = memory =============================================== flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf --name AvroHdfsAgent -Dflume.root.logger=INFO,console flume-ng agent --conf /etc/flume-ng/conf --conf-file netcat-avro.conf --name NetcatAvroAgent -Dflume.root.logger=INFO,console =================================================== Practica de HBase create 'articulos', {NAME=>'caracteristicas', VERSIONS =>3}, {NAME=>'datoseconomicos', VERSIONS=>2} hbase(main):005:0> put 'articulos', '001', 'caracteristicas:descripcion', 'Frigorifico' Took 0.0433 seconds hbase(main):006:0> put 'articulos', '001', 'caracteristicas:gama', 'Electrodomesticos' Took 0.0125 seconds hbase(main):007:0> put 'articulos', '002', 'caracteristicas:gama', 'Electrodomesticos' Took 0.0413 seconds hbase(main):008:0> put 'articulos', '002', 'caracteristicas:descripcion', 'Lavadora' Took 0.0268 seconds hbase(main):009:0> put 'articulos', '001', 'datoseconomicos:preciocoste', '280' Took 0.0120 seconds hbase(main):010:0> put 'articulos', '001', 'datoseconomicos:precioventa', '450' Took 0.0122 seconds hbase(main):011:0> put 'articulos', '002', 'datoseconomicos:preciocoste', '320' Took 0.0115 seconds hbase(main):012:0> put 'articulos', '002', 'datoseconomicos:precioventa', '510' Took 0.0182 seconds =========================================== Practica 3 sqoop import --connect jdbc:mysql://localhost/movielens --username root --password hadoop123 --table genre --hbase-create-table --hbase-table generos --column-family datosfilms --hbase-row-key id create external table hbase_generos (key string, ident string, name string) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties ("hbase.columns.mapping" = ":key, datosfilms:ident, datosfilms:name") tblproperties("hbase.table.name" = "generos"); sudo -u kudu kudu cluster ksck node1 CREATE TABLE clientes ( idcliente BIGINT, nombre STRING, apellido STRING, telefono INT, email STRING, PRIMARY KEY(idcliente) ) partition by hash partitions 8 STORED AS KUDU TBLPROPERTIES ('kudu.num_tablet_replicas' = '1'); http://node1:8050/metrics?include_schema=1&amp;metrics=connections_accepted INSERT INTO clientes VALUES (1, "Pedro", "Picapiedra", 654321987, "p.picapiedra@flinstone.com"); INSERT INTO clientes VALUES (2, "Pablo", "Marmol", 654987654, "p.marmol@flinstone.com"); INSERT INTO clientes VALUES (3, "Vilma", "Picapiedra", 651234567, "v.picapiedra@flinstone.com"); INSERT INTO clientes VALUES (4, "Betty", "Marmol", 659012345, "b.marmol@flinstone.com"); delete from clientes where idcliente = 4; alter table clientes rename to newcustomers; ====================================================================== flume-ng agent --conf /etc/flume-ng/conf --conf-file seqgen.conf --name SeqGenAgent \ -Dflume.root.logger=INFO,console flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf --name AvroHdfsAgent -Dflume.root.logger=INFO,console [INFO - org.apache.flume.source.AvroSource.start(AvroSource.java:223)] Avro source AvroSource started. flume-ng agent --conf /etc/flume-ng/conf --conf-file netcat-avro.conf --name NetcatAvroAgent -Dflume.root.logger=INFO,console =============================================== https://www.udrop.com/6rvx/constitucion.txt https://www.udrop.com/6rvy/filmoteca.csv ================================================ # Nombramos las tres fuentes con sus tres sumideros MultiAgent.sources = Netcat Spooldir Exec MultiAgent.channels = FileChannel MemChannel1 MemChannel2 MultiAgent.sinks = AvroSink1 AvroSink2 AvroSink3 # Describimos el primer agente Netcat MultiAgent.sources.Netcat.type = netcat MultiAgent.sources.Netcat.bind = localhost MultiAgent.sources.Netcat.port = 10004 # Describimos el segundo agente Spooldir MultiAgent.sources.Spooldir.type = spooldir MultiAgent.sources.Spooldir.spoolDir = /home/alumno/practicas_curso/flume/spoolDir MultiAgent.sources.Spooldir.deletePolicy = immediate # Describimos el tercer agente Exec MultiAgent.sources.Exec.type = exec MultiAgent.sources.Exec.command = cat /home/alumno/practicas_curso/datos/constitucion.txt # Describimos los tres destinos como Avro en localhost:10003 MultiAgent.sinks.AvroSink1.type = avro MultiAgent.sinks.AvroSink1.hostname = localhost MultiAgent.sinks.AvroSink1.port = 10003 MultiAgent.sinks.AvroSink2.type = avro MultiAgent.sinks.AvroSink2.hostname = localhost MultiAgent.sinks.AvroSink2.port = 10003 MultiAgent.sinks.AvroSink3.type = avro MultiAgent.sinks.AvroSink3.hostname = localhost MultiAgent.sinks.AvroSink3.port = 10003 # Describimos los canales MultiAgent.channels.FileChannel.type = file MultiAgent.channels.FileChannel.dataDir = /home/alumno/practicas_curso/data MultiAgent.channels.FileChannel.checkpointDir = /home/alumno/practicas_curso/flume/checkpoint MultiAgent.channels.MemChannel1.type = memory MultiAgent.channels.MemChannel2.type = memory # Unimos los orígenes y destinos MultiAgent.sources.Netcat.channels = FileChannel MultiAgent.sources.Spooldir.channels = MemChannel1 MultiAgent.sources.Exec.channels = MemChannel2 MultiAgent.sinks.AvroSink1.channel = FileChannel MultiAgent.sinks.AvroSink2.channel = MemChannel1 MultiAgent.sinks.AvroSink3.channel = MemChannel2 =================================== flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf \ --name AvroHdfsAgent -Dflume.root.logger=INFO,console flume-ng agent --conf /etc/flume-ng/conf --conf-file multiagent-avro.conf --name MultiAgent -Dflume.root.logger=INFO,console ====================================== # Ejemplo de programa para trabajar con ps y kill fec=`date +%d-%m` mkdir -p monitor1/$fec nfic=1 while [ $nfic -lt 20 ] do min=`date +%M` echo "Rellenando el fichero $min.$nfic" cont=0 while [ $cont -lt 200 ] do echo monitor1: `date +%H:%M` linea $cont >> monitor1/$fec/monitor1.$min.$nfic sleep 1 cont=`expr $cont + 1` done sleep 5 nfic=`expr $nfic + 1` done echo "Fin del monitor1" kafka-topics --create --zookeeper node1:2181 --replication-factor 1 --partitions 2 --topic cola1 --if-not-exists kafka-console-producer --broker-list node1:9092 --topic cola1 echo -e "Mensaje1\nMensaje2\nMensaje3\nMensaje4" >> mensajes.txt kafka-topics --create --zookeeper node1:2181 --replication-factor 2 --partitions 2 --topic topic03 --if-not-exists kafka-console-consumer --bootstrap-server node1:9092 --topic topic03 --group gconsumer01 kafka-console-producer --broker-list node1:9092 --topic topic03 < mensajes.txt kafka-consumer-groups --bootstrap-server node1:9092 --list =============================================== # Script de generación de mensajes en un fichero fec=`date +%d-%m` min=`date +%M` echo "Rellenando el fichero kafka_$fec/kafka$fec.$min.out" cont=0 while [ $cont -lt 200 ] do echo "mensaje $cont: `date +%H:%M`" >> mensajes/kafka_$fec/kafka$fec.$min.out sleep 1 cont=`expr $cont + 1` done echo "Fin de la generacion de mensajes" =============================================== # Agente SpoolKafka.conf # Name the components on this agent agent1.sources = src agent1.sinks = snk agent1.channels = chn # Configure the source agent1.sources.src.type = spooldir #Folder in Linux agent1.sources.src.spoolDir = /home/alumno/weblogs agent1.sources.src.channels = chn # Use a channel which buffers events in memory agent1.channels.chn.type = memory #The maximum number of events stored in the channel agent1.channels.chn.capacity = 100000 #The maximum number of events the channel will take from a source or give to a sink per transaction agent1.channels.chn.transactionCapacity = 1000 # Configure the sink agent1.sinks.snk.type=org.apache.flume.sink.kafka.KafkaSink agent1.sinks.snk.topic = weblogs agent1.sinks.snk.brokerList = node1:9092 agent1.sinks.snk.batchSize = 20 agent1.sinks.snk.channel = chn ============================================== # Agente KafkaHdfs.conf agent2.sources.kafka-source.type = org.apache.flume.source.kafka.KafkaSource agent2.sources.kafka-source.zookeeperConnect = node1:2181 agent2.sources.kafka-source.topic = weblogs agent2.sources.kafka-source.groupId = flume agent2.sources.kafka-source.channels = memory-channel agent2.sources.kafka-source.kafka.consumer.timeout.ms = 100 agent2.channels.memory-channel.type = memory agent2.channels.memory-channel.capacity = 10000 agent2.channels.memory-channel.transactionCapacity = 1000 agent2.sinks.hdfs-sink.type = hdfs agent2.sinks.hdfs-sink.hdfs.path = hdfs://node1:8020/user/alumno/logs/%y-%m agent2.sinks.hdfs-sink.hdfs.rollInterval = 5 agent2.sinks.hdfs-sink.hdfs.rollSize = 0 agent2.sinks.hdfs-sink.hdfs.rollCount = 0 agent2.sinks.hdfs-sink.hdfs.fileType = DataStream agent2.sinks.hdfs-sink.channel = memory-channel agent2.sources = kafka-source agent2.channels = memory-channel agent2.sinks = hdfs-sink