Saturday, June 28, 2014

awk: string functions(2)

1. String Substitution
text:
 Hello world! Amazing world!  
 Aloha world! Ni Hao world!  


script_1:
 #! /bin/bash  
   
 awk '{  
   sub("world","New York", $0);  
   print $0;  
 }' text  
   
 #output:  
 #Hello New York! Amazing world!  
 #Aloha New York! Ni Hao world!  
 #Replace the "world" by "New York", but just for  
 #one occurance  
   
 awk '{  
   sub("world", "New York");  
   print $0;  
 }' text  
 #output:  
 #Hello New York! Amazing world!  
 #Aloha New York! Ni Hao world!  
 #If not having the third parameter of sub, by default  
 #it will apply on $0, record itself  
   
 awk '{  
   gsub("world", "New York", $0);  
   print $0;  
 }' text  
 #output:  
 #Hello New York! Amazing New York!  
 #Aloha New York! Ni Hao New York!  
 #The only difference between sub and gsub is: gsub  
 #applies on all occurances of "world"  
   
 awk '{  
   gsub("world", "New York");  
   print $0;  
 }' text  
 #output:  
 #Hello New York! Amazing New York!  
 #Aloha New York! Ni Hao New York!  
 #If omiting the 3rd parameter, by default gsub will  
 #apply one $0, the record itself  
   
 awk '{  
   gsub("world", "&&");  
   print $0;  
 }' text  
 #output:  
 #Hello worldworld! Amazing worldworld!  
 #Aloha worldworld! Ni Hao worldworld!  
 #"&" represents another presence of the matched string  
   
 awk '{  
   gsub("world", "\&\&");  
   print $0;  
 }' text  
 #output:  
 #Hello &&! Amazing &&!  
 #Aloha &&! Ni Hao &&!  
 #Use back slash to disable the "&" feature and treat  
 #"&" literally  

2. String splitting
text:
  Hello : world!  


script_1:
 #! /bin/bash  
   
 awk '{  
   split($0, arr);  
   for(i in arr)  
   {  
     print arr[i];  
   }  
 }' text  
 #output:  
 #Hello  
 #:  
 #world!  
   
 #If ignoring the 3rd parameter of split, it will use  
 #FS variable as the separator to split the string and  
 #put each part into array  
   
 awk '{  
   split($0, arr, "[ ]");  
   for(i in arr)  
   {  
     print arr[i];  
   }  
 }' text  
 #output:  
 #  
 #Hello  
 #:  
 #world!  
   
 #We use the single white space as the separator, so  
 #the first string we get is the empty string  
   
 awk '{  
   split($0, arr, ":");  
   for(i in arr)  
   {  
     print arr[i];  
   }  
 }' text  
 #Output:  
 # Hello   
 # world!  
   
 #We use the coln as the the separator, so it will  
 #separate the string into 2 parts separated by colon  
   
 echo =========  
   
 awk 'BEGIN {  
   len=split("Hello", arr, "");  
   print len;  
   for(i in arr)  
   {  
     print arr[i];  
   }  
   
   len=split("", arr);  
   print len;  
 }'  
 #Output:  
 #5  
 #H  
 #e  
 #l  
 #l  
 #o  
 #0  
   
 #If using the empty string as the separator, it will just make   
 #each character as one entry in array.  
 #split's returning code is the length of array. If we give the  
 #empty string as the first parameter, then we will clear up the  
 #array  

No comments:

Post a Comment