Thursday, June 19, 2014

awk: Records and Fields

1. Record Separator
text:
 Hello world!!!  
 Amazing world  

text2: (all empty lines are totally empty without having any white spaces)
   
   
 Hello world!  
   
   
 Amazing world!  
   
   
   
   

script_1:
 #! /bin/bash  
   
 #awk view input text as collection of records  
 #Each record is collection of fields.  
   
 #Default value of "RS" is newline operator  
 #So awk will read first record until meeting with  
 #newline operator. Then awk will use the same pattern  
 #to read in the next record  
 awk '{ print $0 }' text  
 #output:  
 #Hello world!!!  
 #Amazing world  
   
 #Change the RS to "!!!". So awk will read the first   
 #record until meeting with "!!!", then read next record  
 #with same pattern  
 awk -v RS="!!!" '{  
   print $0  
 }' text  
 #output:  
 #Hello world  
 #       (newline operator)  
 #Amazing world  
   
 #When RS is empty string, awk will retrieve record paragraph  
 #by paragraph. It will ignore empty lines, and then start reading   
 #in record when there is content(including white spaces), until   
 #meeting with "newline" operator. And then read in record with same  
 #pattern  
 awk -v RS="" '{  
   print $0  
 }' text2  
 #output:  
 #Hello world!  
 #Amazing world!  
   
 #Use regular expression to describe the "RS". Record   
 #separator here is "0 or more !".  
 awk -v RS="!*" '{  
   print $0  
 }' text  
 #output:  
 #Hello world  
 #       (newline operator)  
 #Amazing world  

2. Field Separators
 #! /bin/bash  
   
 #Given the input with leading and trailing white  
 #spaces. Default field separator FS is single   
 #white space, which means ignoring "one ore more"  
 #white spaces, to retrieve field.  
 echo "  Hello world!  " | awk '{  
   print $0;  
   print $1;  
   print $2;  
 }'  
 #output:  
 #  Hello world!    
 #Hello  
 #world!  
   
 #To prove our above theory, change FS to single   
 #white space to test, we get the same result  
 echo "  Hello world!  " | awk -v FS=" " '{  
   print $0;  
   print $1;  
   print $2;  
 }'  
 #output:  
 #  Hello world!    
 #Hello  
 #world!  
   
 #If FS is set to empty string, then each character  
 #in input text will be taken as one field  
 echo " ab" | awk -v FS="" '{  
   print NF;  
   print $0;  
   print $1;  
   print $2;  
   print $3;  
 }'  
 #output:  
 #3  
 # ab   
 #   
 #a  
 #b  
   
 #If FS is set to "[ ]", meaning exactly single white space.  
 #Then "<space>ab<space>" will give 3 fields: "" ab "".  
 echo " ab " | awk -v FS="[ ]" '{  
   print NF;  
   print $0;  
   print $1;  
   print $2;  
   print $3;  
 }'  
 #output:  
 #3  
 # ab   
 #     (empty string)  
 #ab  
 #     (empty string)  
   
 #FS only represents regular expression when it has more than 1  
 #character. Following example, "*" means star character, instead  
 #of any character.  
 echo "Hello*world!" | awk -v FS="*" '{  
   print $1;  
   print $2;  
 }'  
 #output:  
 #Hello  
 #world!  
   
 #":+" means one or more ":". In this case, "::::" and "::"   
 #are both taken field separators.  
 echo "Hello::::Amazing::world!" | awk -v FS=":+" '{  
   print NF;  
   print $1;  
   print $2;  
   print $3;  
 }'  
 #output:  
 #3  
 #Hello  
 #Amazing  
 #world!  

3. Fields
 #! /bin/bash  
   
 echo "Hello Amazing world!" | awk '{  
   print $1, $2, $3  
   #output: Hello Amazing world!  
   
   print $1.6, $(3/2), $"1.1", $"1abc"  
   #output: Hello Hello Hello Hello  
   #This means that all real number will be truncated to  
   #integer number, string will be converted to number  
   
   $3="Chicago";  
   print $0;  
   print $1,$2,$3;  
   #output:  
   #Hello Amazing Chicago  
   #Hello Amazing Chicago  
 }'  

No comments:

Post a Comment