# Intercambio de código
#declaracion de componentes
agente.sources = sr1
agente.channels = chn1
agente.sinks = snk1
#configuracion del source
agente.sources.sr1.type = netcat
agente.sources.sr1.bind = localhost
agente.sources.sr1.port = 55555
agente.sources.sr1.channels = chn1
#configuracion del channel
agente.channels.chn1.type = memory
#La cantidad maxima de eventos almacenados en el canal
agente.channels.chn1.capacity = 1000
#La cantidad maxima de eventos que el canal capturara de la fuente por transacción
agente.channels.chn1.transactionCapacity = 100
#definimos la configuracion del Sink
agente.sinks.snk1.type = hdfs
agente.sinks.snk1.hdfs.path = hdfs://node1:8020/user/alumno/flume-puerto
#DataStream no comprimira el archivo de salida
agente.sinks.snk1.hdfs.fileType = DataStream
agente.sinks.snk1.channel = chn1
sudo flume-ng agent --conf /etc/flume-ng/conf --conf-file ejemplo_flume.conf -name agente -Dflume.root.logger=INFO,console
[alumno@pasarela ~]$
https://file.io/OEHdVoQrwmHb
-rw-r--r-- 2 alumno supergroup 2893226 2022-02-21 00:39 filmoteca.csv
sudo rm /containers/dfs/dn/current/BP-1483051556-172.18.0.2-1598871810253/current/finalized/subdir0/subdir25/blk_1073748451
https://www.udrop.com/6ogd/distribuidores.parquet
https://www.udrop.com/6oge/paises.avro
https://www.udrop.com/6ogi/demo.parquet
Componente1 35
Componente2 22
Componente3
Componente4 129
Componente5 -1
Componente6 -999
CREATE TABLE IF NOT EXISTS articulos (articulo String, precio Int)
COMMENT 'Detalles tabla articulos'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 \
--target-dir /user/alumno/movies --table movie -m 1 --driver com.mysql.jdbc.Driver --as-parquetfile
sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 \
--target-dir /user/alumno/peliculas --table movie --hive-import --hive-table movies -m 2 \
--driver com.mysql.jdbc.Driver
sqoop import --connect jdbc:mysql://node1:3306/movielens --username root --password hadoop123 \
--table movie --columns "name, year" --where "year > 1998" --fields-terminated-by ',' \
--target-dir /user/root/peliculas --compression-codec snappy -m 1
sqoop import --connect jdbc:mysql://node1/movielens --username root \
--password hadoop123 --target-dir /user/root/peliculas --table movie \
--fields-terminated-by ',' -m 1 --driver com.mysql.jdbc.Driver
## 1
sqoop import --connect jdbc:mysql://node1/movielens --username root --password hadoop123 --target-dir /user/root/pelisdel98 --table movie --where "year > 1998" --fields-terminated-by ';' -m 2 --driver com.mysql.jdbc.Driver
## 2
mysql> create table movie98 (id INT NOT NULL PRIMARY KEY, nombre VARCHAR(75), ANIO INT);
## 3
sqoop export --connect jdbc:mysql://node1/movielens --username root --password hadoop123 --table movie98 --input-fields-terminated-by ';' --export-dir /user/root/pelisdel98 --update-mode allowinsert -m 1
#!/bin/bash
echo "Comprobando si el clusterCDH6 esta inicializado..."
sudo docker stop node1.essentials & process_id=$!
wait $process_id
echo "Iniciando clusterCDH6..."
sudo docker start node1.essentials & process_id=$!
wait $process_id
sleep 6
ssh root@node1 'echo -e "172.18.0.1 pasarela.essentials pasarela" >> /etc/hosts'
echo "Proceso finalizado. El cluster estara disponible en unos minutos..."
========================================
https://www.udrop.com/6pPb/paises.avro
==========================================
# Nombramos a los componentes del agente
NetcatAvroAgent.sources = Netcat
NetcatAvroAgent.channels = FileChannel
NetcatAvroAgent.sinks = AvroSink
# Describimos el origen netcat en localhost:44444
NetcatAvroAgent.sources.Netcat.type = netcat
NetcatAvroAgent.sources.Netcat.bind = localhost
NetcatAvroAgent.sources.Netcat.port = 44444
# Describimos el destino como Avro en localhost:10003
NetcatAvroAgent.sinks.AvroSink.type = avro
NetcatAvroAgent.sinks.AvroSink.hostname = localhost
NetcatAvroAgent.sinks.AvroSink.port = 10003
# Unimos el origen y el destino a través del canal de fichero
NetcatAvroAgent.sources.Netcat.channels = FileChannel
NetcatAvroAgent.sinks.AvroSink.channel = FileChannel
NetcatAvroAgent.channels.FileChannel.type = file
NetcatAvroAgent.channels.FileChannel.dataDir = /home/alumno/practicas_curso/flume/data
NetcatAvroAgent.channels.FileChannel.checkpointDir = /home/alumno/practicas_curso/flume/checkpoint
===== avro-hdfs.conf ====================================
# Nombramos a los componentes del agente
AvroHdfsAgent.sources = AvroSource
AvroHdfsAgent.channels = MemChannel
AvroHdfsAgent.sinks = HdfsSink
# Describimos el origen como Avro en localhost:10003
AvroHdfsAgent.sources.AvroSource.type = avro
AvroHdfsAgent.sources.AvroSource.bind = localhost
AvroHdfsAgent.sources.AvroSource.port = 10003
# Describimos el destino HDFS
AvroHdfsAgent.sinks.HdfsSink.type = hdfs
AvroHdfsAgent.sinks.HdfsSink.hdfs.path = /user/alumno/flume/avro_data/
AvroHdfsAgent.sinks.HdfsSink.hdfs.fileType = DataStream
AvroHdfsAgent.sinks.HdfsSink.hdfs.writeFormat = Text
# Unimos el origen y el destino
AvroHdfsAgent.sources.AvroSource.channels = MemChannel
AvroHdfsAgent.sinks.HdfsSink.channel = MemChannel
AvroHdfsAgent.channels.MemChannel.type = memory
===============================================
flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf --name AvroHdfsAgent -Dflume.root.logger=INFO,console
flume-ng agent --conf /etc/flume-ng/conf --conf-file netcat-avro.conf --name NetcatAvroAgent -Dflume.root.logger=INFO,console
===================================================
Practica de HBase
create 'articulos', {NAME=>'caracteristicas', VERSIONS =>3}, {NAME=>'datoseconomicos', VERSIONS=>2}
hbase(main):005:0> put 'articulos', '001', 'caracteristicas:descripcion', 'Frigorifico'
Took 0.0433 seconds
hbase(main):006:0> put 'articulos', '001', 'caracteristicas:gama', 'Electrodomesticos'
Took 0.0125 seconds
hbase(main):007:0> put 'articulos', '002', 'caracteristicas:gama', 'Electrodomesticos'
Took 0.0413 seconds
hbase(main):008:0> put 'articulos', '002', 'caracteristicas:descripcion', 'Lavadora'
Took 0.0268 seconds
hbase(main):009:0> put 'articulos', '001', 'datoseconomicos:preciocoste', '280'
Took 0.0120 seconds
hbase(main):010:0> put 'articulos', '001', 'datoseconomicos:precioventa', '450'
Took 0.0122 seconds
hbase(main):011:0> put 'articulos', '002', 'datoseconomicos:preciocoste', '320'
Took 0.0115 seconds
hbase(main):012:0> put 'articulos', '002', 'datoseconomicos:precioventa', '510'
Took 0.0182 seconds
===========================================
Practica 3
sqoop import --connect jdbc:mysql://localhost/movielens --username root --password hadoop123 --table genre --hbase-create-table --hbase-table generos --column-family datosfilms --hbase-row-key id
create external table hbase_generos
(key string, ident string, name string)
stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
with serdeproperties ("hbase.columns.mapping" = ":key, datosfilms:ident, datosfilms:name")
tblproperties("hbase.table.name" = "generos");
sudo -u kudu kudu cluster ksck node1
CREATE TABLE clientes
(
idcliente BIGINT,
nombre STRING,
apellido STRING,
telefono INT,
email STRING,
PRIMARY KEY(idcliente)
)
partition by hash partitions 8
STORED AS KUDU
TBLPROPERTIES ('kudu.num_tablet_replicas' = '1');
http://node1:8050/metrics?include_schema=1&metrics=connections_accepted
INSERT INTO clientes VALUES (1, "Pedro", "Picapiedra", 654321987, "p.picapiedra@flinstone.com");
INSERT INTO clientes VALUES (2, "Pablo", "Marmol", 654987654, "p.marmol@flinstone.com");
INSERT INTO clientes VALUES (3, "Vilma", "Picapiedra", 651234567, "v.picapiedra@flinstone.com");
INSERT INTO clientes VALUES (4, "Betty", "Marmol", 659012345, "b.marmol@flinstone.com");
delete from clientes where idcliente = 4;
alter table clientes rename to newcustomers;
======================================================================
flume-ng agent --conf /etc/flume-ng/conf --conf-file seqgen.conf --name SeqGenAgent \
-Dflume.root.logger=INFO,console
flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf --name AvroHdfsAgent -Dflume.root.logger=INFO,console
[INFO - org.apache.flume.source.AvroSource.start(AvroSource.java:223)] Avro source AvroSource started.
flume-ng agent --conf /etc/flume-ng/conf --conf-file netcat-avro.conf --name NetcatAvroAgent -Dflume.root.logger=INFO,console
===============================================
https://www.udrop.com/6rvx/constitucion.txt
https://www.udrop.com/6rvy/filmoteca.csv
================================================
# Nombramos las tres fuentes con sus tres sumideros
MultiAgent.sources = Netcat Spooldir Exec
MultiAgent.channels = FileChannel MemChannel1 MemChannel2
MultiAgent.sinks = AvroSink1 AvroSink2 AvroSink3
# Describimos el primer agente Netcat
MultiAgent.sources.Netcat.type = netcat
MultiAgent.sources.Netcat.bind = localhost
MultiAgent.sources.Netcat.port = 10004
# Describimos el segundo agente Spooldir
MultiAgent.sources.Spooldir.type = spooldir
MultiAgent.sources.Spooldir.spoolDir = /home/alumno/practicas_curso/flume/spoolDir
MultiAgent.sources.Spooldir.deletePolicy = immediate
# Describimos el tercer agente Exec
MultiAgent.sources.Exec.type = exec
MultiAgent.sources.Exec.command = cat /home/alumno/practicas_curso/datos/constitucion.txt
# Describimos los tres destinos como Avro en localhost:10003
MultiAgent.sinks.AvroSink1.type = avro
MultiAgent.sinks.AvroSink1.hostname = localhost
MultiAgent.sinks.AvroSink1.port = 10003
MultiAgent.sinks.AvroSink2.type = avro
MultiAgent.sinks.AvroSink2.hostname = localhost
MultiAgent.sinks.AvroSink2.port = 10003
MultiAgent.sinks.AvroSink3.type = avro
MultiAgent.sinks.AvroSink3.hostname = localhost
MultiAgent.sinks.AvroSink3.port = 10003
# Describimos los canales
MultiAgent.channels.FileChannel.type = file
MultiAgent.channels.FileChannel.dataDir = /home/alumno/practicas_curso/data
MultiAgent.channels.FileChannel.checkpointDir = /home/alumno/practicas_curso/flume/checkpoint
MultiAgent.channels.MemChannel1.type = memory
MultiAgent.channels.MemChannel2.type = memory
# Unimos los orígenes y destinos
MultiAgent.sources.Netcat.channels = FileChannel
MultiAgent.sources.Spooldir.channels = MemChannel1
MultiAgent.sources.Exec.channels = MemChannel2
MultiAgent.sinks.AvroSink1.channel = FileChannel
MultiAgent.sinks.AvroSink2.channel = MemChannel1
MultiAgent.sinks.AvroSink3.channel = MemChannel2
===================================
flume-ng agent --conf /etc/flume-ng/conf --conf-file avro-hdfs.conf \
--name AvroHdfsAgent -Dflume.root.logger=INFO,console
flume-ng agent --conf /etc/flume-ng/conf --conf-file multiagent-avro.conf --name MultiAgent -Dflume.root.logger=INFO,console
======================================
# Ejemplo de programa para trabajar con ps y kill
fec=`date +%d-%m`
mkdir -p monitor1/$fec
nfic=1
while [ $nfic -lt 20 ]
do
min=`date +%M`
echo "Rellenando el fichero $min.$nfic"
cont=0
while [ $cont -lt 200 ]
do
echo monitor1: `date +%H:%M` linea $cont >> monitor1/$fec/monitor1.$min.$nfic
sleep 1
cont=`expr $cont + 1`
done
sleep 5
nfic=`expr $nfic + 1`
done
echo "Fin del monitor1"
kafka-topics --create --zookeeper node1:2181 --replication-factor 1 --partitions 2 --topic cola1 --if-not-exists
kafka-console-producer --broker-list node1:9092 --topic cola1
echo -e "Mensaje1\nMensaje2\nMensaje3\nMensaje4" >> mensajes.txt
kafka-topics --create --zookeeper node1:2181 --replication-factor 2 --partitions 2 --topic topic03 --if-not-exists
kafka-console-consumer --bootstrap-server node1:9092 --topic topic03 --group gconsumer01
kafka-console-producer --broker-list node1:9092 --topic topic03 < mensajes.txt
kafka-consumer-groups --bootstrap-server node1:9092 --list
===============================================
# Script de generación de mensajes en un fichero
fec=`date +%d-%m`
min=`date +%M`
echo "Rellenando el fichero kafka_$fec/kafka$fec.$min.out"
cont=0
while [ $cont -lt 200 ]
do
echo "mensaje $cont: `date +%H:%M`" >> mensajes/kafka_$fec/kafka$fec.$min.out
sleep 1
cont=`expr $cont + 1`
done
echo "Fin de la generacion de mensajes"
===============================================
# Agente SpoolKafka.conf
# Name the components on this agent
agent1.sources = src
agent1.sinks = snk
agent1.channels = chn
# Configure the source
agent1.sources.src.type = spooldir
#Folder in Linux
agent1.sources.src.spoolDir = /home/alumno/weblogs
agent1.sources.src.channels = chn
# Use a channel which buffers events in memory
agent1.channels.chn.type = memory
#The maximum number of events stored in the channel
agent1.channels.chn.capacity = 100000
#The maximum number of events the channel will take from a source or give to a sink per transaction
agent1.channels.chn.transactionCapacity = 1000
# Configure the sink
agent1.sinks.snk.type=org.apache.flume.sink.kafka.KafkaSink
agent1.sinks.snk.topic = weblogs
agent1.sinks.snk.brokerList = node1:9092
agent1.sinks.snk.batchSize = 20
agent1.sinks.snk.channel = chn
==============================================
# Agente KafkaHdfs.conf
agent2.sources.kafka-source.type = org.apache.flume.source.kafka.KafkaSource
agent2.sources.kafka-source.zookeeperConnect = node1:2181
agent2.sources.kafka-source.topic = weblogs
agent2.sources.kafka-source.groupId = flume
agent2.sources.kafka-source.channels = memory-channel
agent2.sources.kafka-source.kafka.consumer.timeout.ms = 100
agent2.channels.memory-channel.type = memory
agent2.channels.memory-channel.capacity = 10000
agent2.channels.memory-channel.transactionCapacity = 1000
agent2.sinks.hdfs-sink.type = hdfs
agent2.sinks.hdfs-sink.hdfs.path = hdfs://node1:8020/user/alumno/logs/%y-%m
agent2.sinks.hdfs-sink.hdfs.rollInterval = 5
agent2.sinks.hdfs-sink.hdfs.rollSize = 0
agent2.sinks.hdfs-sink.hdfs.rollCount = 0
agent2.sinks.hdfs-sink.hdfs.fileType = DataStream
agent2.sinks.hdfs-sink.channel = memory-channel
agent2.sources = kafka-source
agent2.channels = memory-channel
agent2.sinks = hdfs-sink