--- title: CODATA - Unix Shell tags: CODATA lang: pt-br --- # Init git ```bash= mkdir shell-hands-on cd shell-hands-on/ git config --global user.name "Raphael Cobe" git config --global user.email "raphaelmcobe@gmail.com" git init ls -a ls -aF ``` # Download File ```bash= wget --help man wget wget https://gist.githubusercontent.com/raphaelmcobe/9c26d6484bc4e72ef4d9b71173ef6acd/raw/b4db858dcfd094d530f10c3b978db2d8248be5a9/jan2017articles.csv ``` # Inspect file's content ```bash= head -2 jan2017articles.csv tail -5 jan2017articles.csv wc -l jan2017articles.csv ``` # Select only the Title ```bash= head -2 jan2017articles.csv cut -d"," -f4 jan2017articles.csv > titles.txt git status git add titles.txt git status git commit -m "Adding the List of titles" git status ``` # Order the article by the number of words ```bash= man sort head -2 jan2017articles.csv sort -t"," -k8 jan2017articles.csv sort -t"," -k8 jan2017articles.csv | head -5 # We need to use the -n to sort numerically cut -d"," -f8 jan2017articles.csv sort -t"," -k8 -n jan2017articles.csv sort -t"," -k8 -n jan2017articles.csv | head -5 sort -t"," -k8 -n jan2017articles.csv | tail -1 ``` # Selecting the articles from January 17th ```bash= grep "17 Jan 2017" jan2017articles.csv > jan-17-articles.txt git add jan-17-articles.txt git commit -m "jan 17 articles" git log --oneline grep -i linux jan2017articles.csv ``` # Order authors by the number of articles ```bash= head -2 jan2017articles.csv cut -d"," -f3 jan2017articles.csv cut -d"," -f3 jan2017articles.csv | sort man uniq cut -d"," -f3 jan2017articles.csv | sort | uniq -c cut -d"," -f3 jan2017articles.csv | sort | uniq -c | sort -n # Select top 5 authors cut -d"," -f3 jan2017articles.csv | sort | uniq -c | sort - n | tail -5 > top5-authors.txt git add top5-authors.txt git commit -m "Adding the list of top 5 authors" git log git log --oneline git log --oneline --shortstat # Only show authors who published more than one article man uniq cut -d"," -f3 jan2017articles.csv | sort | uniq -c -d cut -d"," -f3 jan2017articles.csv | sort | uniq -c -d | sort ``` # Discover words that appear more frequently at the titles ```bash= head -2 jan2017articles.csv cut -d"," -f4 jan2017articles.csv cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort | uniq -c cut -d"," -f4 jan2017articles.csv | tr "[:upper:]" "[:lower:]" | tr " " "\n" | sort | uniq -c | sort -n | tail -5 > top5-words.txt git add top5-words.txt git commit -m "Adding the file with the list of the top 5 words" git log --oneline --shortstat ```