---
title: CODATA - Unix Shell
tags: CODATA
lang: pt-br
---
# Init git
```bash=
mkdir shell-hands-on
cd shell-hands-on/
git config --global user.name "Raphael Cobe"
git config --global user.email "raphaelmcobe@gmail.com"
git init
ls -a
ls -aF
```
# Download File
```bash=
wget --help
man wget
wget https://gist.githubusercontent.com/raphaelmcobe/9c26d6484bc4e72ef4d9b71173ef6acd/raw/b4db858dcfd094d530f10c3b978db2d8248be5a9/jan2017articles.csv
```
# Inspect file's content
```bash=
head -2 jan2017articles.csv
tail -5 jan2017articles.csv
wc -l jan2017articles.csv
```
# Select only the Title
```bash=
head -2 jan2017articles.csv
cut -d"," -f4 jan2017articles.csv > titles.txt
git status
git add titles.txt
git status
git commit -m "Adding the List of titles"
git status
```
# Order the article by the number of words
```bash=
man sort
head -2 jan2017articles.csv
sort -t"," -k8 jan2017articles.csv
sort -t"," -k8 jan2017articles.csv | head -5
# We need to use the -n to sort numerically
cut -d"," -f8 jan2017articles.csv
sort -t"," -k8 -n jan2017articles.csv
sort -t"," -k8 -n jan2017articles.csv | head -5
sort -t"," -k8 -n jan2017articles.csv | tail -1
```
# Selecting the articles from January 17th
```bash=
grep "17 Jan 2017" jan2017articles.csv > jan-17-articles.txt
git add jan-17-articles.txt
git commit -m "jan 17 articles"
git log --oneline
grep -i linux jan2017articles.csv
```
# Order authors by the number of articles
```bash=
head -2 jan2017articles.csv
cut -d"," -f3 jan2017articles.csv
cut -d"," -f3 jan2017articles.csv | sort
man uniq
cut -d"," -f3 jan2017articles.csv | sort | uniq -c
cut -d"," -f3 jan2017articles.csv | sort | uniq -c | sort -n
# Select top 5 authors
cut -d"," -f3 jan2017articles.csv | sort | uniq -c | sort - n | tail -5 > top5-authors.txt
git add top5-authors.txt
git commit -m "Adding the list of top 5 authors"
git log
git log --oneline
git log --oneline --shortstat
# Only show authors who published more than one article
man uniq
cut -d"," -f3 jan2017articles.csv | sort | uniq -c -d
cut -d"," -f3 jan2017articles.csv | sort | uniq -c -d | sort
```
# Discover words that appear more frequently at the titles
```bash=
head -2 jan2017articles.csv
cut -d"," -f4 jan2017articles.csv
cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]"
cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n"
cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort
cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort | uniq -c
cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort | uniq -c | sort -n | tail -5 > top5-words.txt
git add top5-words.txt
git commit -m "Adding the file with the list of the top 5 words"
git log --oneline --shortstat
```