Sunday, August 31, 2014

Unix Prog: Standard I/O Implementation

1. fileno

fileno is used to get the file descriptor of given file stream

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Return the system file descriptor for STREAM. */  
 extern int fileno (FILE *__stream) __THROW __wur;  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE* fp;  
   
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  printf("file descriptor: %d\n", fileno(fp));  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 file descriptor: 3  

2. FILE struct implementation
 ubuntu@ip-172-31-23-227:~$ less /usr/include/libio.h  
 ......  
 struct _IO_FILE {  
  int _flags;      /* High-order word is _IO_MAGIC; rest is flags. */  
 #define _IO_file_flags _flags  
   
  /* The following pointers correspond to the C++ streambuf protocol. */  
  /* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */  
  char* _IO_read_ptr;  /* Current read pointer */  
  char* _IO_read_end;  /* End of get area. */  
  char* _IO_read_base; /* Start of putback+get area. */  
  char* _IO_write_base; /* Start of put area. */  
  char* _IO_write_ptr; /* Current put pointer. */  
  char* _IO_write_end; /* End of put area. */  
  char* _IO_buf_base;  /* Start of reserve area. */  
  char* _IO_buf_end;  /* End of reserve area. */  
  /* The following fields are used to support backing up and undo. */  
  char *_IO_save_base; /* Pointer to start of non-current get area. */  
  char *_IO_backup_base; /* Pointer to first valid character of backup area */  
  char *_IO_save_end; /* Pointer to end of non-current get area. */  
   
  struct _IO_marker *_markers;  
   
  struct _IO_FILE *_chain;  
   
  int _fileno;  
 #if 0  
  int _blksize;  
 #else  
  int _flags2;  
 #endif  
  _IO_off_t _old_offset; /* This used to be _offset but it's too small. */  
   
 #define __HAVE_COLUMN /* temporary */  
  /* 1+column number of pbase(); 0 is unknown. */  
  unsigned short _cur_column;  
  signed char _vtable_offset;  
  char _shortbuf[1];  
   
  /* char* _save_gptr; char* _save_egptr; */  
   
  _IO_lock_t *_lock;  
 #ifdef _IO_USE_OLD_IO_FILE  
 };  
 ......  
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 typedef struct _IO_FILE FILE;  
 ......  

3. Get the buffer information of file stream
We will use _IO_file_flags and _IO_UNBUFFERED, _IO_LINE_BUF to detect the buffer type of file stream. We also will use _IO_buf_end, _IO_buf_base to detect the buffer size of the file stream.

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 void pr_stdio(const char* name, FILE* fp)  
 {  
  printf("stream = %s.", name);  
  if(fp -> _IO_file_flags & _IO_UNBUFFERED) {  
   printf("unbuffered");  
  } else if(fp -> _IO_file_flags & _IO_LINE_BUF) {  
   printf("line buffered");  
  } else {  
   printf("fully buffered");  
  }  
   
  printf(", buffer size = %ld\n", fp -> _IO_buf_end - fp -> _IO_buf_base);  
 }  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  fputs("test line at standard output.\n", stdout);  
  fputs("test line at standard error.\n", stderr);  
   
  // Test the stream buffer for standard input/output/error  
  pr_stdio("stdin", stdin);  
  pr_stdio("stdout", stdout);  
  pr_stdio("stderr", stderr);  
   
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Test the stream buffer of open file "test.txt"  
  pr_stdio("test.txt", fp);  
   
  // change the buffer  
  char buf[BUFSIZ];  
  setvbuf(fp, buf, _IOFBF, BUFSIZ);  
   
  printf("after changing the buffer to %d\n", BUFSIZ);  
  pr_stdio("test.txt", fp);  
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 test line at standard output.  
 test line at standard error.  
 stream = stdin.fully buffered, buffer size = 0  
 stream = stdout.line buffered, buffer size = 1024  
 stream = stderr.unbuffered, buffer size = 1  
 stream = test.txt.fully buffered, buffer size = 0  
 after changing the buffer to 8192  
 stream = test.txt.fully buffered, buffer size = 8192  

Unix Prog: Formatted I/O(2)

1. Input System Calls

scanf read formatted input from standard input, fscanf read formatted input from specified file stream, sscanf read formatted input from given string.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Read formatted input from STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fscanf (FILE *__restrict __stream,  
           const char *__restrict __format, ...) __wur;  
 /* Read formatted input from stdin.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int scanf (const char *__restrict __format, ...) __wur;  
 /* Read formatted input from S. */  
 extern int sscanf (const char *__restrict __s,  
           const char *__restrict __format, ...) __THROW;  
 ......  

2. Formatted Input example:

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<string.h>  
   
 int main(int argc, char* argv[])  
 {  
  // scanf example, provide ti's address to populate  
  // the result from standard input  
  int ti;  
  if(scanf("%d", &ti) < 0) {  
   printf("scanf error!\n");  
   exit(1);  
  }  
   
  printf("%d\n", ti);  
   
  // fscanf example, provide the string address tc  
  // to populate the string from "test.txt"  
  FILE *fp;  
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  char tc[100];  
  fscanf(fp, "%s", tc);  
   
  printf("%s\n", tc);  
   
  // sscanf example, tc string contains "Hello world!"  
  // provide another string address tc2 to populate the  
  // string from tc  
  char tc2[100];  
  strcpy(tc, "Hello world!");  
  sscanf(tc, "%s", tc2);  
   
  printf("%s\n", tc2);  
  exit(0);  
 }  

shell:
Run the program, it will firstly pause to ask user to input an integer from standard input, we input 100, and then it output the integer. Next step, it read a string from test.txt, it stopped when encountering the space, so the string read in is "Hello", and it output this string to standard output. Lastly, it read string "hello" again from tc char array, and save into tc2 char array, then output the string to standard output.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 100  
 100  
 Hello  
 Hello  

3. Variadic Arguments Version

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Read formatted input from S into argument list ARG.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int vfscanf (FILE *__restrict __s, const char *__restrict __format,  
           _G_va_list __arg)  
    __attribute__ ((__format__ (__scanf__, 2, 0))) __wur;  
   
 /* Read formatted input from stdin into argument list ARG.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int vscanf (const char *__restrict __format, _G_va_list __arg)  
    __attribute__ ((__format__ (__scanf__, 1, 0))) __wur;  
   
 /* Read formatted input from S into argument list ARG. */  
 extern int vsscanf (const char *__restrict __s,  
           const char *__restrict __format, _G_va_list __arg)  
    __THROW __attribute__ ((__format__ (__scanf__, 2, 0)));  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<stdarg.h>  
 #include<unistd.h>  
 #include<string.h>  
   
 // vscanf example  
 void my_io1(const char* fm, ...)  
 {  
  va_list args;  
   
  // va_start indicates that args should be arguments after fm  
  va_start(args, fm);  
   
  // vscanf read data from standard input based on the format specified  
  // by fm, and save data into variable whose address is provided by args  
  vscanf(fm, args);  
  va_end(args);  
 }  
   
 // vfscanf example  
 void my_io2(const char* fm, ...)  
 {  
  va_list args;  
  FILE *fp;  
   
  va_start(args, fm);  
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // vfscanf read data from specified file stream, based on format  
  // specified by fm, and save data into variable whose address is provided  
  // by args  
  vfscanf(fp, fm, args);  
  va_end(args);  
 }  
   
 // vsscanf example  
 void my_io3(const char* sr, const char* fm, ...)  
 {  
  va_list args;  
  va_start(args, fm);  
   
  // sr contains the data source waiting to be read, fm is the format  
  // specifying the reading type, args contain the variable address  
  vsscanf(sr, fm, args);  
  va_end(args);  
 }  
 int main(int argc, char* argv[])  
 {  
  int ti;  
  my_io1("%d", &ti);  
  printf("after vscanf: %d\n", ti);  
   
  char tc1[100];  
  my_io2("%s", tc1);  
  printf("after vfscanf: %s\n", tc1);  
   
  char tc2[100];  
  strcpy(tc1, "Hello world!");  
  my_io3(tc1, "%s", tc2);  
  printf("after vsscanf: %s\n", tc2);  
   
  exit(0);  
 }  

shell:
1) Print out the content of test.txt
2) Run the program, it firstly ask user to input one integer, we type 999
3) It call vscanf to read integer from standard input, and save to variable whose address is provided by args.
4) It call vfscanf to read string from test.txt, and save to character array provided by args, since fscanf will stop when encountering the space, so only "Hello" is read
5) It call vsscanf to read string from provided character array, and save to character array provided by args, it will also stop when encountering the space, so only "Hello" is read.
 ubuntu@ip-172-31-23-227:~$ cat test.txt  
 Hello world!  
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 999  
 after vscanf: 999  
 after vfscanf: Hello  
 after vsscanf: Hello  

Unix Prog: Formatted I/O(1)

1. Output System definition:

printf outputs content to standard output, fprintf outputs content to specified file stream. sprintf outputs to specified char array, snprintf setup one maximum limits, the string after maximum limit will be truncated.

 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Write formatted output to STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fprintf (FILE *__restrict __stream,  
           const char *__restrict __format, ...);  
 /* Write formatted output to stdout.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int printf (const char *__restrict __format, ...);  
 /* Write formatted output to S. */  
 extern int sprintf (char *__restrict __s,  
           const char *__restrict __format, ...) __THROWNL;  
 ......  
 /* Maximum chars of output to write in MAXLEN. */  
 extern int snprintf (char *__restrict __s, size_t __maxlen,  
            const char *__restrict __format, ...)  
 ......  

2. Output Example:
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  // printf example  
  // "%+6.3f" means the minimum length of number is 6  
  // decimal part length is at least 3. Use "+" to fill  
  // empty place at the left.  
  float pi = 3.14159;  
  printf("Pi Value: %+6.3f\n", pi);  
   
  // fprintf example  
  FILE *fp;  
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  fprintf(fp, "Pi Value: %+6.3f\n", pi);  
  fclose(fp);  
   
  // sprintf example  
  char tc[100];  
  sprintf(tc, "Pi Value: %+6.3f", pi);  
  puts(tc);  
   
  // snprintf example  
  snprintf(tc, 9, "Pi Value: %+6.3f\n", pi);  
  puts(tc);  
   
  exit(0);  
 }  

shell:
Run the program, the first line is written by printf, then program write the string to file "test.txt", then use sprintf to write another string(2nd line). The last snprintf defines max length to be 9, so characters after 9th position will be truncated.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 Pi Value: +3.142  
 Pi Value: +3.142  
 Pi Value  
 ubuntu@ip-172-31-23-227:~$ cat test.txt  
 Pi Value: +3.142  

Note: for sprintf, it is programmer's responsibility to make sure there is enough buffer space to contain the formatted string.

3. Variable Version

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Write formatted output to S from argument list ARG.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int vfprintf (FILE *__restrict __s, const char *__restrict __format,  
            _G_va_list __arg);  
 /* Write formatted output to stdout from argument list ARG.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int vprintf (const char *__restrict __format, _G_va_list __arg);  
 /* Write formatted output to S from argument list ARG. */  
 extern int vsprintf (char *__restrict __s, const char *__restrict __format,  
            _G_va_list __arg) __THROWNL;  
 ......  
 extern int vsnprintf (char *__restrict __s, size_t __maxlen,  
            const char *__restrict __format, _G_va_list __arg)  
    __THROWNL __attribute__ ((__format__ (__printf__, 3, 0)));  
 ......  

Everything is same as above functions, but they are designed for variable arguments.

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<stdarg.h>  
   
 // vprintf example  
 void myio_1(const char* fm, ...)  
 {  
  va_list args;  
   
  // va_start indicates that args contain all arguments after  
  // parameter fm(not including fm)  
  va_start(args, fm);  
   
  // In this case, args just contain argument "pi"  
  vprintf(fm, args);  
  va_end(args);  
 }  
   
 // vfprintf example  
 void myio_2(const char* fm, ...)  
 {  
  FILE *fp;  
   
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  va_list args;  
  va_start(args, fm);  
  vfprintf(fp, fm, args);  
  va_end(args);  
   
  fclose(fp);  
 }  
   
 // vsprintf example  
 void myio_3(const char* fm, ...)  
 {  
  va_list args;  
  va_start(args, fm);  
  char tc[100];  
  vsprintf(tc, fm ,args);  
  va_end(args);  
   
  puts(tc);  
 }  
   
 // vsnprintf example  
 void myio_4(const char* fm, ...)  
 {  
  va_list args;  
  va_start(args, fm);  
  char tc[100];  
  vsnprintf(tc, 9, fm, args);  
  va_end(args);  
   
  puts(tc);  
 }  
   
 int main(int argc, char* argv[])  
 {  
  float pi = 3.1415927;  
  myio_1("Pi Value: %+6.3f\n", pi);  
  myio_2("Pi Value: %+6.3f\n", pi);  
  myio_3("Pi Value: %+6.3f", pi);  
  myio_4("Pi Value: %+6.3f", pi);  
  exit(0);  
 }  

shell:
Explanation is same as above, note the last line, which is truncated, because we specify only form at most 9 characters with vsnprintf.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 Pi Value: +3.142  
 Pi Value: +3.142  
 Pi Value  
 ubuntu@ip-172-31-23-227:~$ cat test.txt  
 Pi Value: +3.142  

Unix Prog: Position Stream(2)

1. Tell and seek binary file

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<string.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open the test file  
  if((fp = fopen("test", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Write a string into binary file  
  int ti[3]={1,2,3};  
   
  if(fwrite(ti, sizeof(int), 3, fp) != 3) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  // Read current position  
  long pos;  
   
  if((pos = ftell(fp)) < 0) {  
   printf("ftell error!\n");  
   exit(3);  
  }  
   
  printf("current binary file position: %ld\n", pos);  
   
  // Rewind to the beginning of file  
  if(fseek(fp, 0, SEEK_SET) != 0) {  
   printf("fseek error!\n");  
   exit(4);  
  }  
  printf("current binary file position: %ld\n", ftell(fp));  
   
  // Write small string to file, replaces the first integer(4 bytes)  
  char tc[4]={'a','b','c','d'};  
  if(fwrite(tc, sizeof(char), 4, fp) != 4) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  // Read the string and remaining 2 numbers out  
  rewind(fp);  
  char rtc[4];  
  int rti[2];  
  if(fread(rtc, sizeof(char), 4, fp) != 4) {  
   printf("fread error!\n");  
   exit(3);  
  }  
   
  if(fread(rti, sizeof(int), 2, fp) != 2) {  
   printf("fread error!\n");  
   exit(3);  
  }  
   
  printf("%c%c%c%c %d %d\n", rtc[0], rtc[1], rtc[2], rtc[3], rti[0], rti[1]);  
   
  fclose(fp);  
  exit(0);  
 }  

shell:
Run the program, it will output the position after writing 3 integers(12 bytes), and then the position after rewind the cursor to the beginning of file. At this time, it wrote 4 characters into the file, which is 4 bytes, also replaced the first integer.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 current binary file position: 12  
 current binary file position: 0  
 abcd 2 3  

fseek position mode:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 #define SEEK_SET    0    /* Seek from beginning of file. */  
 #define SEEK_CUR    1    /* Seek from current position. */  
 #define SEEK_END    2    /* Seek from end of file. */  
 ......  

2. fgetpos, fsetpos
These two functions are introduced by ISO C standard, basically they are same as above functions.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/_G_config.h  
 ......   
 typedef struct   
 {   
  __off_t __pos;   
  __mbstate_t __state;   
 } _G_fpos_t;   
 ......  
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h
 ......
 typedef _G_fpos_t fpos_t;  
 ......  
 /* Get STREAM's position.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fgetpos (FILE *__restrict __stream, fpos_t *__restrict __pos);  
 /* Set STREAM's position.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fsetpos (FILE *__stream, const fpos_t *__pos);  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open the file  
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Setup the position to 10  
  fpos_t pos;  
  pos.__pos = 10;  
   
  if(fsetpos(fp, &pos) != 0) {  
   printf("fsetpos error!\n");  
   exit(3);  
  }  
   
  // Get the current position  
  if(fgetpos(fp, &pos) != 0) {  
   printf("fgetpos error!\n");  
   exit(2);  
  }  
   
  printf("current position: %ld\n", pos.__pos);  
   
  fclose(fp);  
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 current position: 10  

Unix Prog: Position Stream(1)

1. System Call Definition
ftell, fseek, rewind are used to give current cursor position of the stream, and re-position the cursor in stream.
ftello, fseeko are almost same as ftell and fseek, the only difference is it use type "off_t" to define the offset, and ftell fseek use "long". "off_t" is implementation dependent, it could be much longer at some systems.

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Seek to a certain position on STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fseek (FILE *__stream, long int __off, int __whence);  
 /* Return the current position of STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern long int ftell (FILE *__stream) __wur;  
 /* Rewind to the beginning of STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern void rewind (FILE *__stream);  
 __END_NAMESPACE_STD  
 ......  
 /* Seek to a certain position on STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fseeko (FILE *__stream, __off_t __off, int __whence);  
 /* Return the current position of STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern __off_t ftello (FILE *__stream) __wur;  
 ......  

2. Tell and seek text file position
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp = fopen("test.txt", "w+");  
  long pos;  
   
  // Write "Hello world!" into text file test.txt  
  if(fputs("Hello world!", fp) < 0) {  
   printf("fputs error!\n");  
   exit(1);  
  }  
   
  // Get the current position: 12  
  if((pos = ftell(fp)) < 0) {  
   printf("ftell error!\n");  
   exit(2);  
  }  
   
  printf("current text file position: %ld\n", pos);  
   
  // Set up the current position to the beginning of test.txt file  
  if(fseek(fp, 0, SEEK_SET) != 0) {  
   printf("fseek error!\n");  
   exit(3);  
  }  
   
  if((pos = ftell(fp)) < 0) {  
   printf("ftell error!\n");  
   exit(2);  
  }  
   
  printf("current text file position: %ld\n", pos);  
   
  // Write "Amazing world!" to file test.txt from beginning  
  // Original words "Hello world!" are replaced.  
  if(fputs("Amazing world!\n", fp) < 0) {  
   printf("fputs error!\n");  
   exit(1);  
  }  
   
  // rewind command will move cursor back to the beginning of text file  
  // And read the text out  
  rewind(fp);  
  char buff[BUFSIZ];  
  if(fgets(buff, BUFSIZ, fp) != buff) {  
   printf("fgets error!\n");  
   exit(3);  
  }  
   
  printf("content read out: %s\n", buff);  
   
  fclose(fp);  
  exit(0);  
 }  

shell:
Run the program, it output the position after writing out "Hello world!", and another position after moving cursor to the beginning of file.
Note that we have one more line after the "Amazing world!", since fgets already read one newline operator, and printf add another newline at the end, so we have one more empty line here.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 current text file position: 12  
 current text file position: 0  
 content read out: Amazing world!  
   

Unix Prog: Binary I/O(2)

1. read and write a couple of structs
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<string.h>  
   
 struct ts  
 {  
  char tc[100];  
 };  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
  struct ts ts1[3];  
  strcpy(ts1[0].tc, "Hello world!");  
  strcpy(ts1[1].tc, "Hello New York!");  
  strcpy(ts1[2].tc, "Hello Boston!");  
   
  // Write the struct ts array into binary file "test"  
  if((fp = fopen("test", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  if(fwrite(ts1, sizeof(struct ts), 3, fp) != 3) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  fclose(fp);  
   
  // Read the struct ts array from binary file "test"  
  if((fp = fopen("test", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  struct ts ts2[3];  
  if(fread(ts2, sizeof(struct ts), 3, fp) != 3) {  
   printf("fopen error!\n");  
   exit(3);  
  }  
   
  fclose(fp);  
   
  // Print out the ts2 content  
  printf("%s\n", ts2[0].tc);  
  printf("%s\n", ts2[1].tc);  
  printf("%s\n", ts2[2].tc);  
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 Hello world!  
 Hello New York!  
 Hello Boston!  

2. Read and Write inconsistent content
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open the test file and write float number 1.22 into it  
  if((fp = fopen("test", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  float tf = 1.22;  
  if(fwrite(&tf, sizeof(tf), 1, fp) != 1) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  fclose(fp);  
   
  // Read the float number out to integer variable  
  if((fp = fopen("test", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  int ti;  
  if(fread(&ti, sizeof(ti), 1, fp) != 1) {  
   printf("fread error!\n");  
   exit(3);  
  }  
   
  printf("%d\n", ti);  
  fclose(fp);  
  exit(0);  
 }  

shell:
We can see that even though it wrote 1.22 into the binary file, but it read out to integer variable, and become an undefined value.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 1067198710  

Note:
fread can only read binary data written in the same system and with same compiling configuration.
1) Different compiling configuration can change the representation of struct, to make it align accurately to save time, or pack tightly to save space. This indicates that different configuration will make the data struct in binary file different.
2) Different system will use different binary formats.
3) In order to fread and fwrite across different systems, we need to use high level protocol agreed by all systems.

Unix Prog: Binary I/O(1)

1. fread, fwrite Definition
We could use fread and fwrite to binary structure into file, which may be painful if using the character, line I/O

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Read chunks of generic data from STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern size_t fread (void *__restrict __ptr, size_t __size,  
            size_t __n, FILE *__restrict __stream) __wur;  
 /* Write chunks of generic data to STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern size_t fwrite (const void *__restrict __ptr, size_t __size,  
            size_t __n, FILE *__restrict __s);  
 ......  

2. read and write basic type

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Write the file numerical numbers into binary file test  
  if((fp = fopen("test", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  float tf[5] = {1.22, 2.33, 3.44, 4.55, 5.66};  
  if(fwrite(tf, sizeof(float), 5, fp) != 5) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  fclose(fp);  
   
  // Re-open the file to make cursor back to beginning  
  // And the read back all numerical numbers  
  if((fp = fopen("test", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(3);  
  }  
   
  float rtf[5];  
  if(fread(rtf, sizeof(float), 5, fp) != 5) {  
   printf("fread error!\n");  
   exit(3);  
  }  
   
  int i;  
  for(i = 0; i < 5; i++) {  
   printf("%f ", rtf[i]);  
  }  
  printf("\n");  
   
  fclose(fp);  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 1.220000 2.330000 3.440000 4.550000 5.660000  

3. read and write struct
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<string.h>  
   
 struct ts {  
  float tf;  
  int ti;  
  char tc[13];  
 };  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
  struct ts temp;  
   
  // Write the struct temp into binary file test  
  if((fp = fopen("test", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  temp.tf = 1.22;  
  temp.ti = 2;  
  strcpy(temp.tc, "Hello world!");  
   
  if(fwrite(&temp, sizeof(temp), 1, fp) != 1) {  
   printf("fwrite error!\n");  
   exit(2);  
  }  
   
  fclose(fp);  
   
  // Re-open the file to move cursor back to beginning  
  // To read the struct back  
  if((fp = fopen("test", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  struct ts temp_read;  
  if(fread(&temp_read, sizeof(temp_read), 1, fp) != 1) {  
   printf("fopen error!\n");  
   exit(3);  
  }  
   
  printf("%f, %d, %s\n", temp_read.tf, temp_read.ti, temp_read.tc);  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 1.220000, 2, Hello world!  

Unix Prog: Standard I/O Efficiency

fileio1.c: -> io1.out
This program use getc to get the input character and then output to standard output with putc.
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  char c;  
  while((c = getc(stdin)) != EOF)  
   if(putc(c, stdout) == EOF) {  
    printf("output error!\n");  
    exit(1);  
   }  
   
  if(ferror(stdin)) {  
   printf("input error!\n");  
   exit(2);  
  }  
   
  exit(0);  
 }  

fileio2.c: -> io2.out
This program use fgetc to get the input character and then output to standard output with fputc.
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  char c;  
  while((c = fgetc(stdin)) != EOF)  
   if(fputc(c, stdout) == EOF) {  
    printf("output error!\n");  
    exit(1);  
   }  
   
  if(ferror(stdin)) {  
   printf("input error!\n");  
   exit(2);  
  }  
   
  exit(0);  
 }  

fileio3.c: -> io3.out
This program use fgets to get the input line and then output to the standard output with fputs.
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  char buf[BUFSIZ];  
  while(fgets(buf, BUFSIZ, stdin) != NULL)  
   if(fputs(buf, stdout) == EOF) {  
    printf("output error!\n");  
    exit(1);  
   }  
   
  if(ferror(stdin)) {  
   printf("input error!\n");  
   exit(2);  
  }  
   
  exit(0);  
 }  

shell:
1) List the file test.txt, which is a big file about 130MB
2) Use fileio1.c program to read input from test.txt and output to output_1.txt
3) Use fileio2.c program to read input from test.txt and output to output_2.txt
4) Use fileio3.c program to read input from test.txt and output to output_3.txt
 ubuntu@ip-172-31-23-227:~$ ls -lrt test.txt  
 -rw-rw-r-- 1 ubuntu ubuntu 130000000 Aug 31 14:30 test.txt  
 ubuntu@ip-172-31-23-227:~$ ./io1.out <test.txt >output_1.txt  
 ubuntu@ip-172-31-23-227:~$ ./io2.out <test.txt >output_2.txt  
 ubuntu@ip-172-31-23-227:~$ ./io3.out <test.txt >output_3.txt  

Summary of Time Consumption of 3 ways:
1) getc and putc use most of the time, especially on user time, which is the time spent on loop inside the program. Since it needs to do much more loop compared to line I/O, so user time is larger.
2) getc and putc spend much less time compared to the way using "read" system call with buffer size 1. Since underlying the implementation, getc and putc will not call corresponding system call whenever it is called once, with the help of stream buffer, the number of time of calling system call "read, write" is much much less.
3) fputs and fgets is much faster than the way using "getc, putc". On user time, since it needs to do much fewer loops, it spends much less user time. On system time(kernel), depending on the implementation of "fputs, fgets", it may be same as the way using "getc,putc", if fgets fputs are implemented with "getc,putc". It may be spending less time if they are implemented with memccpy.
4) In summary, the system time spent by standard I/O is almost same as the way calling read/write system directly with good buffer size. But the way of programming, like more loops, maybe affecting user time(time spent on user space, normally the program developer directly writes) a lot.

Saturday, August 30, 2014

Unix Prog: Line-at-a-time I/O

1) gets: read from the standard input stream, add "null" at the end of string. It doesn't store the newline at the end of string

2) fgets: read from the given file stream, add "null" at the end of string, store the newline if there is

3) puts: write the null-terminated string to standard output, null is not written. And it writes one "new-line" after writing the given string

4) fputs: write the null-terminated string to given file stream, null is not written. It doesn't write the "new-line" after writing the given string.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Write a string to STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fputs (const char *__restrict __s, FILE *__restrict __stream);  
   
 /* Write a string, followed by a newline, to stdout.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int puts (const char *__s);  
 ......  
 /* Get a newline-terminated string of finite length from STREAM. 

    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
 extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream)
      __wur;

 #if !defined __USE_ISOC11 \
     || (defined __cplusplus && __cplusplus <= 201103L)
 /* Get a newline-terminated string from stdin, removing the newline.
    DO NOT USE THIS FUNCTION!!  There is no limit on how much it will read.

    The function has been officially removed in ISO C11.  This opportunity
    is used to also remove it from the GNU feature list.  It is now only
    available when explicitly using an old ISO C, Unix, or POSIX standard.
    GCC defines _GNU_SOURCE when building C++ code and the function is still
    in C++11, so it is also available for C++.

    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
 extern char *gets (char *__s) __wur __attribute_deprecated__;
 ......

Note: gets should not be used, since user is allowed to put in string longer than its buffer size, which will overwrite the memory following the buffer.

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char *argv[])  
 {  
  FILE *fp;  
  char buff[BUFSIZ];  
   
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // It read the first line, including the newline operator in the end, and  
  // then add the null byte  
  if(fgets(buff, BUFSIZ, fp) != buff) {  
   printf("fgets error!\n");  
   exit(2);  
  }  
   
  // It write the first line, including the newline operator which is already  
  // stored in buff in last step, the null byte is not written  
  if(fputs(buff, stdout) < 0) {  
   printf("fputs error!\n");  
   exit(3);  
  }  
   
  // It read the line from stdin, NOT including the newline operator in the end, and  
  // then add the null byte  
  if(gets(buff) != buff) {  
   printf("gets error!\n");  
   exit(4);  
  }  
   
  // It write the line(not having newline), and then "add" the newline operator by itself  
  // the null byte is not written  
  if(puts(buff) < 0) {  
   printf("puts error!\n");  
   exit(5);  
  }  
   
  fclose(fp);  
  exit(0);  
 }  

shell:
1) Print out the content in test.txt
2) run the program, opening the test.txt, output the first line of test.txt: "Hello world!", then it paused to ask user to input one string, and put it out to standard output.
 ubuntu@ip-172-31-23-227:~$ cat test.txt  
 Hello world!  
 Amazing world!  
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 Hello world!  
 Hello New York  
 Hello New York  

Unix Prog: Character at-a-time I/O(2)

1. ungetc
ungetc system call is used to push back characters to file stream. Normally modern system only allows pushing back one character.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Push a character back onto the input buffer of STREAM.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int ungetc (int __c, FILE *__stream);  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open the file "test.txt" containing only one character 'a'  
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Read 2 characters, 2nd one is EOF  
  printf("%c ", getc(fp));  
  printf("%d\n", fgetc(fp));  
   
  // Push the character 'k' back to file stream  
  if(ungetc('k', fp) != 'k') {  
   printf("ungetc error!\n");  
   exit(1);  
  }  
   
  // Read 2 characters again  
  printf("%c ", getc(fp));  
  printf("%d\n", fgetc(fp));  
   
  fclose(fp);  
   
  exit(0);  
 }  

shell:
Run the program opening the "test.txt" containing only one character. It firstly read 2 characters to encounter the EOF. And use "ungetc" system call to push a character 'k' back to stream, this action will clear up file stream's EOF flag, at this time, file stream contains only 1 character 'k'(not on disk). Lastly, it read 2 characters again to encounter the EOF.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 a -1  
 k -1  

2. Character output I/O
putc, fputc, and putchar are used to output the character I/O stream.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Write a character to STREAM.  
   
   These functions are possible cancellation points and therefore not  
   marked with __THROW.  
   
   These functions is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fputc (int __c, FILE *__stream);  
 extern int putc (int __c, FILE *__stream);  
   
 /* Write a character to stdout.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int putchar (int __c);  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char *argv[])  
 {  
  FILE *fp;  
   
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Use putc to write 'a' to file stream  
  if(putc('a', fp) != 'a') {  
   printf("putc error!\n");  
   exit(2);  
  }  
   
  // Use fputc to write 'b' to file stream  
  if(fputc('b', fp) != 'b') {  
   printf("fputc error!\n");  
   exit(3);  
  }  
   
  fclose(fp);  
   
  // Use putchar to write 'k' to stdout stream  
  putchar('k');  
  exit(0);  
 }  

shell:
Run the program who write 'a' 'b' to test.txt and then write 'k' to standard output.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 kubuntu@ip-172-31-23-227:~$ cat test.txt  
 ab  

Unix Prog: Character at-a-time I/O(1)

1. Read Character from stream

getc, fgetc, getchar system calls are used to read one character at-a-time.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Read a character from STREAM.  
   
   These functions are possible cancellation points and therefore not  
   marked with __THROW. */  
 extern int fgetc (FILE *__stream);  
 extern int getc (FILE *__stream);  
   
 /* Read a character from stdin.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int getchar (void);  
 ......  

The difference between getc and fgetc: getc can be implemented as one macro, fgetc is guaranteed to be a function, calls to fgetc probably takes longer time.
getchar is equal to getc(stdin)

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  char c1, c2;  
  c1 = getc(fp);  
  c2 = fgetc(fp);  
  printf("c1: %c, c2: %c\n", c1, c2);  
  printf("input one char: \n");  
  printf("%c\n", getchar());  
   
  fclose(fp);  
   
  exit(0);  
 }  

shell:
Run the program, it will read two characters and then instruct the user to input one character, user input 'k', getchar() will catch this char from stdin and output to stdout.
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 c1: a, c2: b  
 input one char:  
 k  
 k  

2. Read error
When reading from the stream, both error and encountering the end will return the same negative value. So we need: ferror, feof and clearerr to differentiate the returning information.

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Clear the error and EOF indicators for STREAM. */  
 extern void clearerr (FILE *__stream) __THROW;  
 /* Return the EOF indicator for STREAM. */  
 extern int feof (FILE *__stream) __THROW __wur;  
 /* Return the error indicator for STREAM. */  
 extern int ferror (FILE *__stream) __THROW __wur;  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open the file "test.txt" containing only one character 'a'  
  if((fp = fopen("test.txt", "r+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Read 2 characters, 2nd one is EOF  
  printf("%c ", getc(fp));  
  printf("%d\n", fgetc(fp));  
   
  // run ferror, feof to detect if it is one error or EOF  
  printf("ferror: %d feof: %d\n", ferror(fp), feof(fp));  
   
  if(ferror(fp) > 0) {  
   printf("error on stream!\n");  
  } else {  
   printf("no error!\n");  
  }  
   
  if(feof(fp) > 0) {  
   printf("reached the end of file\n");  
  } else {  
   printf("not reaching the end\n");  
  }  
   
  // Clear up the error flag at FILE struct  
  clearerr(fp);  
  printf("After clearerr, ferror: %d, feof: %d\n", ferror(fp), feof(fp));  
   
  fclose(fp);  
   
  exit(0);  
 }  

shell:
Run the program it will open the "test.txt" containing only one character. It tries to get 2 characters from "test.txt", the first character is "a", the second one is -1, which is EOF, meaning end of file. At this time, FILE's feof flag is turned on. After calling clearerr, all error flags are turned off.
 ubuntu@ip-172-31-23-227:~$ ./a.out  
 a -1  
 ferror: 0 feof: 1  
 no error!  
 reached the end of file  
 After clearerr, ferror: 0, feof: 0  

Unix Prog: Standard I/O -- Open Stream(2)

1. freopen

freopen will open the existing file and associate it to one existing stream.
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE *fp;  
   
  // Open "test.txt" for reading and writing, and associate the  
  // stdout stream to this file. Firstly, it will clear the stream  
  // (standard output), then clear stream orientation if there is,  
  // lastly associate the "test.txt" to existing stream stdout.  
  if((fp = freopen("test.txt", "w+", stdout)) == NULL) {  
   printf("freopen error!\n");  
   exit(1);  
  }  
   
  // output the string to standard output, which refers to test.txt now  
  printf("Hello world!\n");  
   
  // output the string to file descriptor fp.  
  fputs("Amazing world!\n", fp);  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 ubuntu@ip-172-31-23-227:~$ cat test.txt  
 Hello world!  
 Amazing world!  

2. fdopen

fdopen will associate a new standard I/O stream to one existing file descriptor.

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<fcntl.h>  
   
 int main(int argc, char* argv[])  
 {  
  int fd;  
  FILE *fp;  
   
  // Open the file for "READ ONLY"  
  if((fd = open("test.txt", O_RDONLY)) == -1) {  
   printf("open error!\n");  
   exit(1);  
  }  
   
  // Associate a standard I/O stream to above file descriptor  
  // but the I/O stream is for writing only, which is in conflict  
  // with file descriptor's mode -> "O_RDONLY"  
  // This conflict will make fdopen return NULL  
  if((fp = fdopen(fd, "w")) == NULL) {  
   printf("fdopen w error!\n");  
  }  
   
  // This one is ok, since the mode of fdopen is consistent with  
  // file descriptor's mode  
  if((fp = fdopen(fd, "r")) == NULL) {  
   printf("fdopen r error!\n");  
  }  
   
  // fclose is used close the file stream
  fclose(fp);

  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 fdopen w error!  

Note:
1) "w", "w+" can't truncate the file in this system call, since the file already existed.
2) "w", "w+", "a", "a+" can't create new files since file already exists.
3) For stream opened for reading and writing, output can not be directly followed by the input without an intervening fflush, fseek, fsetpos, or rewind. Input cannot be directly followed by output without an intervening fseek, fsetpos, or rewind, or and input operation that encounters the end of file.

Unix Prog: Standard I/O -- Open Stream(1)

1. Open stream
fopen, freopen, fdopen system calls are used to create file streams.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Open a file and create a new stream for it.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern FILE *fopen (const char *__restrict __filename,  
           const char *__restrict __modes) __wur;  
 /* Open a file, replacing an existing stream with it.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern FILE *freopen (const char *__restrict __filename,  
            const char *__restrict __modes,  
            FILE *__restrict __stream) __wur;  
 ......  
 /* Create a new stream that refers to an existing system file descriptor. */  
 extern FILE *fdopen (int __fd, const char *__modes) __THROW __wur;  
 ......  

2. File open mode(fopen)

Following mode are allowed at unix:
"r", "rb", "w", "wb", "a", "ab", "r+", "rb+", "w+", "wb+", "a+", "ab+"

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 #define OWN_BUFFSIZE 1024  
   
 int main(int argc, char* argv[])  
 {  
  FILE* fp;  
  char buf[OWN_BUFFSIZE];  
   
  // Open the test1.txt for reading  
  // Note: it won't truncate the file or create the file  
  if((fp = fopen("test1.txt", "r")) == NULL) {  
   printf("fopen r error!\n");  
   exit(1);  
  }  
   
  // Open the test1 for reading, the only difference is  
  // "rb" indicates that test1 is binary file  
  if((fp = fopen("test1", "rb")) == NULL) {  
   printf("fopen rb error!\n");  
   exit(1);  
  }  
   
  // Open the test2.txt for writing only.  
  // If file exists, truncate it to 0  
  // If file doesn't exist, create a new one  
  if((fp = fopen("test2.txt", "w")) == NULL) {  
   printf("fopen w error!\n");  
   exit(1);  
  }  
   
  // Open the test2 for writing only, the only difference is  
  // "wb" indicates that test2 is binary file  
  if((fp = fopen("test2", "wb")) == NULL) {  
   printf("fopen wb error!\n");  
   exit(1);  
  }  
   
  // Open for appending at the end of file.  
  // If file doesn't exist, create new file.  
  if((fp = fopen("test3.txt", "a")) == NULL) {  
   printf("fopen a error!\n");  
   exit(1);  
  }  
   
  // Open for appending at the end of file  
  // The only difference is: "ab" indicates that  
  // test is binary file  
  if((fp = fopen("test3", "ab")) == NULL) {  
   printf("fopen ab error!\n");  
   exit(1);  
  }  
   
  // Open for reading and writing  
  // file has to exist, otherwise, fail  
  if((fp = fopen("test4.txt", "r+")) == NULL) {  
   printf("fopen r+ error!\n");  
   exit(1);  
  }  
   
  // Open for reading and writing, the only difference is:  
  // "rb+"(or "r+b") indicates that test4 is binary file  
  if((fp = fopen("test4", "rb+")) == NULL) {  
   printf("fopen rb+ error!\n");  
   exit(1);  
  }  
   
  // Open for reading and writing  
  // If file doesn't exist, create one  
  // If file already exists, truncate to 0  
  if((fp = fopen("test5.txt", "w+")) == NULL) {  
   printf("fopen w+ error!\n");  
   exit(1);  
  }  
   
  // Open for reading and writing, the only difference is  
  // "wb+" indicates that test5 is one binary file  
  if((fp = fopen("test5", "wb+")) == NULL) {  
   printf("fopen wb+ error!\n");  
   exit(1);  
  }  
   
  // Open for reading and appending.  
  // If file doesn't exist, create one.  
  if((fp = fopen("test6.txt", "a+")) == NULL) {  
   printf("fopen a+ error!\n");  
   exit(1);  
  }  
   
  // Open for reading and appending, the only difference is:  
  // "ab+" indicates that test6 is one binary file  
  if((fp = fopen("test6", "ab+")) == NULL) {  
   printf("fopen ab+ error!\n");  
   exit(1);  
  }  
   
  exit(0);  
 }  


Unix Prog: Standard I/O -- Buffering

1. Standard I/O Buffering
The goal of the buffering is to use the minimum number of read and write calls.
1) Fully Buffered: files residing on disk are normally fully buffered.Actual I/O takes place when the buffer is fully filled. The buffer is allocated by "malloc" when the first time I/O is performed.
I/O could also take place when fflush is called on stream

2) Line Buffered, actual I/O takes place when the newline character is encountered on input or output. It is normally used on terminal: standard input or output. I/O make also take place when input is requested through standard i/o lib from either: a) unbuffered stream or b) line-buffered stream

3) Unbuffered: standard I/O lib doesn't buffer characters. standard error stream is normally unbuffered.

ISO C standard:
Standard input/output are fully buffered, as long as they don't refer to interactive devices. Standard error is never fully buffered.

Actual Implementation:
Standard error is not buffered.
Other streams are line buffered if referring to interactive device otherwise fully buffered.

2. Change Stream Buffer
setbuf and setvbuf system calls are used to change the stream buffer.

system call definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* The possibilities for the third argument to `setvbuf'. */  
 #define _IOFBF 0        /* Fully buffered. */  
 #define _IOLBF 1        /* Line buffered. */  
 #define _IONBF 2        /* No buffering. */  
 ......  
 __BEGIN_NAMESPACE_STD  
 /* If BUF is NULL, make STREAM unbuffered.  
   Else make it use buffer BUF, of size BUFSIZ. */  
 extern void setbuf (FILE *__restrict __stream, char *__restrict __buf) __THROW;  
 /* Make STREAM use buffering mode MODE.  
   If BUF is not NULL, use N bytes of it for buffering;  
   else allocate an internal buffer N bytes long. */  
 extern int setvbuf (FILE *__restrict __stream, char *__restrict __buf,  
           int __modes, size_t __n) __THROW;  
 __END_NAMESPACE_STD  
 ......  

BUFSIZ definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/_G_config.h  
 ......  
 #define _G_BUFSIZ 8192  
 ......  
 ubuntu@ip-172-31-23-227:~$ less /usr/include/libio.h  
 ......  
 #include <_G_config.h>  
 ......  
 #define _IO_BUFSIZ _G_BUFSIZ  
 ......  
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Default buffer size. */  
 #ifndef BUFSIZ  
 # define BUFSIZ _IO_BUFSIZ  
 #endif  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  FILE* fp;  
  char buf[BUFSIZ];  
   
  if((fp = fopen("test.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  // Setup the buffer, buffer size has to be block size  
  // System will decide if it is full buffer or line buffered  
  // For regular file residing on disk, it is normally full buffer  
  // For interactive device, it is normally line buffer  
  printf("current linux block size: %d\n", BUFSIZ);  
  setbuf(fp, buf);  

  // Disable the buffer: un-buffer now
  setbuf(fp, NULL);
   
  // Setup the stream to be full buffer, size is BUFSIZ  
  if(setvbuf(fp, buf, _IOFBF, BUFSIZ) < 0) {  
   printf("setvbuf error!\n");  
   exit(2);  
  }  

  // Setup to full buffer, but provide buf pointer as null
  // Then system will allocate the buffer automatically with
  // default BUFSIZ(4th argument is ignored)
  if(setvbuf(fp, NULL, _IOFBF, 0) < 0) {
    printf("setvbuf error!\n");
    exit(2);
  }
   
  // Setup the stream to be line buffer, size is BUFSIZ  
  if(setvbuf(fp, buf, _IOLBF, BUFSIZ) < 0) {  
   printf("setvbuf error!\n");  
   exit(2);  
  }  

  // Setup to line buffer, but provide buf pointer as null
  // Then system will allocate the buffer automatically with
  // default BUFSIZ
  if(setvbuf(fp, NULL, _IOLBF, 0) < 0) {
    printf("setvbuf error!\n");
    exit(2);
  }
   
  // Setup the stream to be non-buffer, size is BUFSIZ  
  // In this case buf and BUFSIZ are ignored by system  
  if(setvbuf(fp, buf, _IONBF, BUFSIZ) < 0) {  
   printf("setvbuf error!\n");  
   exit(2);  
  }  
   
  exit(0);  
 }  

3. Impact of customized buffer size
fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 #define OWN_BUFFSIZE 1024  
   
 int main(int argc, char* argv[])  
 {  
  FILE* fp;  
  char buf[OWN_BUFFSIZE];  
   
  printf("System block buffer size: %d\n", BUFSIZ);  
  printf("Our own block buffer size: %d\n", OWN_BUFFSIZE);  
   
  //By default, regular file is fully buffered  
  //when process is terminated, the content in buffer will be released  
  // to disk  
  if((fp = fopen("test1.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  fputs("Hello world!\n", fp);  
   
  // We open the test2.txt with customisized buffer, in some  
  // implementations, if using different buffer size compared to  
  // default block buffer size BUFSIZ constant, the system will not  
  // release the buffer after the stream is closed. So we need to call  
  // flush to make sure content is released to disk.  
  if((fp = fopen("test2.txt", "w+")) == NULL) {  
   printf("fopen error!\n");  
   exit(1);  
  }  
   
  if(setvbuf(fp, buf, _IOFBF, OWN_BUFFSIZE) != 0) {  
   printf("setvbuf error!\n");  
   exit(2);  
  }  
   
  fputs("Hello world!\n", fp);  
  fflush(fp);  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 System block buffer size: 8192  
 Our own block buffer size: 1024  
 ubuntu@ip-172-31-23-227:~$ cat test1.txt  
 Hello world!  
 ubuntu@ip-172-31-23-227:~$ cat test2.txt  
 Hello world!  

Definition of "fflush":
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Flush STREAM, or all streams if STREAM is NULL.  
   
   This function is a possible cancellation point and therefore not  
   marked with __THROW. */  
 extern int fflush (FILE *__stream);  
 ......  

Unix Prog: Standard I/O -- Basic

1. File Orientation

When we create one file using standard I/O library, we say that we have associated the stream with the file.

With ASCII character, a single character occupies one byte. With international character, a single character occupies more than one byte.

Byte-oriented: stream will use ASCII character set
Wide-oriented: stream will use international character set

When a stream is created, it has no orientation, but if we use multibyte I/O function on it(<wchar.h>), it will become wide-oriented. If we use single byte I/O function on it, it will become byte-oriented.

2.fwide

fwide system is used to change one stream's orientation.

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/wchar.h  
 ......  
 /* Select orientation for stream. */  
 extern int fwide (__FILE *__fp, int __mode) __THROW;  
 ......  

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<wchar.h>  
 #include<errno.h>  
   
 int main(int argc, char* argv[])  
 {  
  int or;  
   
  errno = 0;  
  FILE *pf = fopen("test.txt", "w+");  
  printf("errno: %d\n", errno);  
  or = fwide(pf, 0);  
  printf("current orientation: %d\n", or);  
  or = fwide(pf, 1);  
  printf("After changing to wide-orientation: %d\n", or);  
   
  // Following call doesn't work, fwide can't change a stream  
  // orientation if it is already oriented. or will still be 1  
  // in following case.  
  or = fwide(pf, -1);  
  printf("After changing to byte-orientation: %d\n", or);  
   
  exit(0);  
 }  

shell:
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 errno: 0  
 current orientation: 0  
 After changing to wide-orientation: 1  
 After changing to byte-orientation: 1  

3. standard input/output/error
When a process is created, there streams are created automatically: standard input, standard output and standard error.These streams are associated with 3 file descriptors: STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO

definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/unistd.h  
 ......  
 /* Standard file descriptors. */  
 #define STDIN_FILENO  0    /* Standard input. */  
 #define STDOUT_FILENO  1    /* Standard output. */  
 #define STDERR_FILENO  2    /* Standard error output. */  
 ......  
 ubuntu@ip-172-31-23-227:~$ less /usr/include/stdio.h  
 ......  
 /* Standard streams. */  
 extern struct _IO_FILE *stdin;     /* Standard input stream. */  
 extern struct _IO_FILE *stdout;     /* Standard output stream. */  
 extern struct _IO_FILE *stderr;     /* Standard error output stream. */  
 /* C89/C99 say they're macros. Make them happy. */  
 #define stdin stdin  
 #define stdout stdout  
 #define stderr stderr  
 ......  

Tuesday, August 26, 2014

Unix Prog: Files -- Directories(4)

1. chdir, fchdir, getcwd

chdir, fchdir can be used to change the current directory of current process.
Note: current directory is the property of process, each process has its own current directory.

Definition:
 ubuntu@ip-172-31-23-227:~$ less /usr/include/unistd.h  
 ......  
 /* Change the process's working directory to PATH. */  
 extern int chdir (const char *__path) __THROW __nonnull ((1)) __wur;  
   
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K8  
 /* Change the process's working directory to the one FD is open on. */  
 extern int fchdir (int __fd) __THROW __wur;  
 ......  
 /* Get the pathname of the current working directory,
    and put it in SIZE bytes of BUF.  Returns NULL if the
    directory couldn't be determined or SIZE was too small.
    If successful, returns BUF.  In GNU, if BUF is NULL,
    an array is allocated with `malloc'; the array is SIZE
    bytes long, unless SIZE == 0, in which case it is as
    big as necessary.  */
 extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
......

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
   
 int main(int argc, char* argv[])  
 {  
  char buff[256];  
   
  // Get the current working directory  
  if(getcwd(buff, 256) == NULL) {  
   printf("getcwd error!\n");  
   exit(1);  
  }  
   
  printf("Cwd=%s\n", buff);  
   
  // Change to another directory  
  if(chdir("testdir/slnk") < 0) {  
   printf("chdir error!\n");  
   exit(2);  
  }  
   
  // Output the current working directory again  
  if(getcwd(buff, 256) == NULL) {  
   printf("getcwd error!\n");  
   exit(3);  
  }  
   
  printf("Cwd=%s\n", buff);  
   
  exit(0);  
 }  

shell:
1) Run "pwd" command to list the current working directory
2) Run the io.out to change current working directory to /home/ubuntu/testdir/slnk", but it comes to the /home/ubuntu/testdir. The reason is: chdir follows symbolic link, and "testdir/slnk" is one symbolic link, which is pointing to its parent directory. Here is how getcwd works: it follows ".." entry in local directory to keep going up and output the entire chain, and it starts from /home/ubuntu/testdir, so the string it got is "/home/ubuntu/testdir".
3) Run the pwd again to list the current working directory. Since "current working directory" is the property of process, it only changed the working directory of the process where program was running, but not changed the working directory of the "shell" process.

 ubuntu@ip-172-31-23-227:~$ pwd  
 /home/ubuntu  
 ubuntu@ip-172-31-23-227:~$ ./io.out  
 Cwd=/home/ubuntu  
 Cwd=/home/ubuntu/testdir  
 ubuntu@ip-172-31-23-227:~$ pwd  
 /home/ubuntu  

2. Device Special Files

For each file system mounted on disk, it has both major device number and minor device number. We can use macros "major" and "minor" to get its major device number and minor device number by applying the macro on st_dev and st_rdev(for character special file and block special file) fields at "struct stat".

fileio.c:
 #include<stdio.h>  
 #include<stdlib.h>  
 #include<unistd.h>  
 #include<sys/sysmacros.h>  
 #include<sys/stat.h>  
   
 int main(int argc, char* argv[])  
 {  
  struct stat buf;  
   
  int i;  
  for(i=1; i < argc; i++) {  
   printf("%s: ", argv[i]);  
   
   // Get the i-node information  
   if(stat(argv[i], &buf) < 0) {  
    printf("stat error!\n");  
    exit(1);  
   }  
   
   // Print out device number  
   printf("dev = %d/%d ", major(buf.st_dev), minor(buf.st_dev));  
   
   // Print out device number for character special and block special files  
   if(S_ISCHR(buf.st_mode) || S_ISBLK(buf.st_mode)) {  
    printf("(%s) rdev = %d/%d", (S_ISCHR(buf.st_mode))? "character" : "block",  
        major(buf.st_rdev), minor(buf.st_rdev));  
   }  
   
   printf("\n");  
  }  
   
  exit(0);  
 }  

shell:
1) Run the program against one block special file
2) Run the program against one character special file
3) Run the program against one regular directory.
 ubuntu@ip-172-31-23-227:~$ ./io.out /dev/xvda  
 /dev/xvda: dev = 0/5 (block) rdev = 202/0  
 ubuntu@ip-172-31-23-227:~$ ./io.out /dev/tty1  
 /dev/tty1: dev = 0/5 (character) rdev = 4/1  
 ubuntu@ip-172-31-23-227:~$ ./io.out /home/ubuntu  
 /home/ubuntu: dev = 202/1