Click here to get back home

extract all hotmail email addresses in a file and store in separate file

 HomeNewsGroups | Search | About
 comp.lang.perl.misc    Post an article   get this group's latest topics as an RSS feed add this group's latest topics to your My MSN content add this group's latest topics to your My Yahoo content
Subject Author Date
extract all hotmail email addresses in a file and store in separate file Dennis 06-18-2008
Posted by pete on June 19, 2008, 12:17 am
Please log in for more thread options
Dennis wrote:
> Hi, I have a text file that contents a list of email addresses like
> this:
>
> "foo@yahoo.com"
> "tom@hotmail.com"
> "jerry@gmail.com"
> "tommy@apple.com"
>
> I like to
>
> 1. Strip out the " characters and just leave the email addresses on
> each line.
> 2. extract out the hotmail addresses and store it into another file.
> The hotmail addresses in the original file would be deleted.
>
> Thanks for any help

/* BEGIN new.c output */

Original original file contents:
"foo@yahoo.com"
"tom@hotmail.com"
"jerry@gmail.com"
"tommy@apple.com"

Final original file contents:
foo@yahoo.com
jerry@gmail.com
tommy@apple.com

Final other file contents:
tom@hotmail.com

/* END new.c output */

/* BEGIN new.c */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#define STRINGS \
{ "\"foo@yahoo.com\"", "\"tom@hotmail.com\"", \
"\"jerry@gmail.com\"", "\"tommy@apple.com\""}

struct list_node {
struct list_node *next;
void *data;
};

typedef struct list_node list_type;

void squeeze(char *s1, const int s2);
int get_line(char **lineptr, size_t *n, FILE *stream);
int list_fputs(const list_type *node, FILE *stream);
list_type *list_append
(list_type **head, list_type *tail, void *data, size_t size);
void list_free(list_type *node, void (*free_data)(void *));

int main (void)
{
int rc;
size_t n;
char fn[2][L_tmpnam];
FILE *fp[2];
char *string[] = STRINGS;
size_t size = 0;
char *buff = NULL;
list_type *head = NULL;
list_type *tail = NULL;

puts("/* BEGIN new.c output */\n");
/*
** Create input file
*/
tmpnam(fn[0]);
tmpnam(fn[1]);
fp[0] = fopen(fn[0], "w");
if (fp[0] == NULL) {
fputs("fopen(fn[0]), \"w\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
for (n = 0; n != sizeof string / sizeof *string; ++n) {
fprintf(fp[0], "%s\n", string[n]);
}
fclose(fp[0]);
/*
** Read input file into list
*/
fp[0] = fopen(fn[0], "r");
if (fp[0] == NULL) {
fputs("fopen(fn[0], \"r\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
while ((rc = get_line(&buff, &size, fp[0])) > 0) {
tail = list_append(&head, tail, buff, rc);
if (tail == NULL) {
fputs("tail == NULL\n", stderr);
break;
}
}
fclose(fp[0]);
/*
** Display input file contents
*/
puts("Original original file contents:");
list_fputs(head, stdout);
putchar('\n');
/*
** Strip out quotes from strings in memory
*/
for (tail = head; tail != NULL; tail = tail -> next) {
squeeze(tail -> data, '"');
}
/*
** Create output files
*/
fp[0] = fopen(fn[0], "w");
if (fp[0] == NULL) {
fputs("fopen(fn[0]), \"w\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
fp[1] = fopen(fn[1], "w");
if (fp[1] == NULL) {
remove(fn[0]);
fputs("fopen(fn[1]), \"w\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
for (tail = head; tail != NULL; tail = tail -> next) {
if (strstr(tail -> data, "hotmail") == NULL) {
fprintf(fp[0], "%s\n", tail -> data);
} else {
fprintf(fp[1], "%s\n", tail -> data);
}
}
list_free(head, free);
tail = head = NULL;
fclose(fp[0]);
fclose(fp[1]);
/*
** Read original file
** Display original file contents
*/
fp[0] = fopen(fn[0], "r");
if (fp[0] == NULL) {
fputs("fopen(fn[0], \"r\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
puts("Final original file contents:");
while ((rc = get_line(&buff, &size, fp[0])) > 0) {
puts(buff);
}
putchar('\n');
fclose(fp[0]);
/*
** Read other file
** Display other file contents
*/
fp[1] = fopen(fn[1], "r");
if (fp[1] == NULL) {
fputs("fopen(fn[1], \"r\") == NULL\n", stderr);
exit(EXIT_FAILURE);
}
puts("Final other file contents:");
while ((rc = get_line(&buff, &size, fp[1])) > 0) {
puts(buff);
}
putchar('\n');
free(buff);
buff = NULL;
size = 0;
fclose(fp[1]);
remove(fn[0]);
remove(fn[1]);
puts("/* END new.c output */");
return 0;
}

void squeeze(char *s1, const int c)
{
char *p;

for (p = s1; *s1 != ''; ++s1) {
if (c != *s1) {
*p++ = *s1;
}
}
*p = '';
}

int get_line(char **lineptr, size_t *n, FILE *stream)
{
int rc;
void *p;
size_t count;
/*
** The (char) casts in this function are not required
** by the rules of the C programming language.
*/
count = 0;
while ((rc = getc(stream)) != EOF
|| !feof(stream) && !ferror(stream))
{
++count;
if (count == (size_t)-2) {
if (rc != '\n') {
(*lineptr)[count] = '';
(*lineptr)[count - 1] = (char)rc;
} else {
(*lineptr)[count - 1] = '';
}
break;
}
if (count + 2 > *n) {
p = realloc(*lineptr, count + 2);
if (p == NULL) {
if (*n > count) {
if (rc != '\n') {
(*lineptr)[count] = '';
(*lineptr)[count - 1] = (char)rc;
} else {
(*lineptr)[count - 1] = '';
}
} else {
if (*n != 0) {
**lineptr = '';
}
ungetc(rc, stream);
}
count = 0;
break;
}
*lineptr = p;
*n = count + 2;
}
if (rc != '\n') {
(*lineptr)[count - 1] = (char)rc;
} else {
(*lineptr)[count - 1] = '';
break;
}
}
if (rc != EOF || !feof(stream) && !ferror(stream)) {
rc = INT_MAX > count ? count : INT_MAX;
} else {
if (*n > count) {
(*lineptr)[count] = '';
}
}
return rc;
}

int list_fputs(const list_type *node, FILE *stream)
{
int rc = 0;

while (node != NULL
&& (rc = fputs(node -> data, stream)) != EOF
&& (rc = putc('\n', stream)) != EOF)
{
node = node -> next;
}
return rc;
}

list_type *list_append
(list_type **head, list_type *tail, void *data, size_t size)
{
list_type *node;

node = malloc(sizeof *node);
if (node != NULL) {
node -> next = NULL;
node -> data = malloc(size);
if (node -> data != NULL) {
memcpy(node -> data, data, size);
if (*head != NULL) {
tail -> next = node;
} else {
*head = node;
}
} else {
free(node);
node = NULL;
}
}
return node;
}

void list_free(list_type *node, void (*free_data)(void *))
{
list_type *next_node;

while (node != NULL) {
next_node = node -> next;
free_data(node -> data);
free(node);
node = next_node;
}
}

/* END new.c */

--
pete

Posted by Marc Bissonnette on June 19, 2008, 12:58 am
Please log in for more thread options

> Dennis wrote:
>> Hi, I have a text file that contents a list of email addresses like
>> this:
>>
>> "foo@yahoo.com"
>> "tom@hotmail.com"
>> "jerry@gmail.com"
>> "tommy@apple.com"
>>
>> I like to
>>
>> 1. Strip out the " characters and just leave the email addresses on
>> each line.
>> 2. extract out the hotmail addresses and store it into another file.
>> The hotmail addresses in the original file would be deleted.
>>
>> Thanks for any help
>
> /* BEGIN new.c output */
>
> Original original file contents:
> "foo@yahoo.com"
> "tom@hotmail.com"
> "jerry@gmail.com"
> "tommy@apple.com"
>
> Final original file contents:
> foo@yahoo.com
> jerry@gmail.com
> tommy@apple.com
>
> Final other file contents:
> tom@hotmail.com
>
> /* END new.c output */
>
> /* BEGIN new.c */
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <limits.h>
>
> #define STRINGS \
> { "\"foo@yahoo.com\"", "\"tom@hotmail.com\"", \
> "\"jerry@gmail.com\"", "\"tommy@apple.com\""}
>
> struct list_node {
> struct list_node *next;
> void *data;
> };
>
> typedef struct list_node list_type;
>
> void squeeze(char *s1, const int s2);
> int get_line(char **lineptr, size_t *n, FILE *stream);
> int list_fputs(const list_type *node, FILE *stream);
> list_type *list_append
> (list_type **head, list_type *tail, void *data, size_t size);
> void list_free(list_type *node, void (*free_data)(void *));
>
> int main (void)
> {
> int rc;
> size_t n;
> char fn[2][L_tmpnam];
> FILE *fp[2];
> char *string[] = STRINGS;
> size_t size = 0;
> char *buff = NULL;
> list_type *head = NULL;
> list_type *tail = NULL;
>
> puts("/* BEGIN new.c output */\n");
> /*
> ** Create input file
> */
> tmpnam(fn[0]);
> tmpnam(fn[1]);
> fp[0] = fopen(fn[0], "w");
> if (fp[0] == NULL) {
> fputs("fopen(fn[0]), \"w\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> for (n = 0; n != sizeof string / sizeof *string; ++n) {
> fprintf(fp[0], "%s\n", string[n]);
> }
> fclose(fp[0]);
> /*
> ** Read input file into list
> */
> fp[0] = fopen(fn[0], "r");
> if (fp[0] == NULL) {
> fputs("fopen(fn[0], \"r\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> while ((rc = get_line(&buff, &size, fp[0])) > 0) {
> tail = list_append(&head, tail, buff, rc);
> if (tail == NULL) {
> fputs("tail == NULL\n", stderr);
> break;
> }
> }
> fclose(fp[0]);
> /*
> ** Display input file contents
> */
> puts("Original original file contents:");
> list_fputs(head, stdout);
> putchar('\n');
> /*
> ** Strip out quotes from strings in memory
> */
> for (tail = head; tail != NULL; tail = tail -> next) {
> squeeze(tail -> data, '"');
> }
> /*
> ** Create output files
> */
> fp[0] = fopen(fn[0], "w");
> if (fp[0] == NULL) {
> fputs("fopen(fn[0]), \"w\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> fp[1] = fopen(fn[1], "w");
> if (fp[1] == NULL) {
> remove(fn[0]);
> fputs("fopen(fn[1]), \"w\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> for (tail = head; tail != NULL; tail = tail -> next) {
> if (strstr(tail -> data, "hotmail") == NULL) {
> fprintf(fp[0], "%s\n", tail -> data);
> } else {
> fprintf(fp[1], "%s\n", tail -> data);
> }
> }
> list_free(head, free);
> tail = head = NULL;
> fclose(fp[0]);
> fclose(fp[1]);
> /*
> ** Read original file
> ** Display original file contents
> */
> fp[0] = fopen(fn[0], "r");
> if (fp[0] == NULL) {
> fputs("fopen(fn[0], \"r\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> puts("Final original file contents:");
> while ((rc = get_line(&buff, &size, fp[0])) > 0) {
> puts(buff);
> }
> putchar('\n');
> fclose(fp[0]);
> /*
> ** Read other file
> ** Display other file contents
> */
> fp[1] = fopen(fn[1], "r");
> if (fp[1] == NULL) {
> fputs("fopen(fn[1], \"r\") == NULL\n", stderr);
> exit(EXIT_FAILURE);
> }
> puts("Final other file contents:");
> while ((rc = get_line(&buff, &size, fp[1])) > 0) {
> puts(buff);
> }
> putchar('\n');
> free(buff);
> buff = NULL;
> size = 0;
> fclose(fp[1]);
> remove(fn[0]);
> remove(fn[1]);
> puts("/* END new.c output */");
> return 0;
> }
>
> void squeeze(char *s1, const int c)
> {
> char *p;
>
> for (p = s1; *s1 != ''; ++s1) {
> if (c != *s1) {
> *p++ = *s1;
> }
> }
> *p = '';
> }
>
> int get_line(char **lineptr, size_t *n, FILE *stream)
> {
> int rc;
> void *p;
> size_t count;
> /*
> ** The (char) casts in this function are not required
> ** by the rules of the C programming language.
> */
> count = 0;
> while ((rc = getc(stream)) != EOF
> || !feof(stream) && !ferror(stream))
> {
> ++count;
> if (count == (size_t)-2) {
> if (rc != '\n') {
> (*lineptr)[count] = '';
> (*lineptr)[count - 1] = (char)rc;
> } else {
> (*lineptr)[count - 1] = '';
> }
> break;
> }
> if (count + 2 > *n) {
> p = realloc(*lineptr, count + 2);
> if (p == NULL) {
> if (*n > count) {
> if (rc != '\n') {
> (*lineptr)[count] = '';
> (*lineptr)[count - 1] = (char)rc;
> } else {
> (*lineptr)[count - 1] = '';
> }
> } else {
> if (*n != 0) {
> **lineptr = '';
> }
> ungetc(rc, stream);
> }
> count = 0;
> break;
> }
> *lineptr = p;
> *n = count + 2;
> }
> if (rc != '\n') {
> (*lineptr)[count - 1] = (char)rc;
> } else {
> (*lineptr)[count - 1] = '';
> break;
> }
> }
> if (rc != EOF || !feof(stream) && !ferror(stream)) {
> rc = INT_MAX > count ? count : INT_MAX;
> } else {
> if (*n > count) {
> (*lineptr)[count] = '';
> }
> }
> return rc;
> }
>
> int list_fputs(const list_type *node, FILE *stream)
> {
> int rc = 0;
>
> while (node != NULL
> && (rc = fputs(node -> data, stream)) != EOF
> && (rc = putc('\n', stream)) != EOF)
> {
> node = node -> next;
> }
> return rc;
> }
>
> list_type *list_append
> (list_type **head, list_type *tail, void *data, size_t size)
> {
> list_type *node;
>
> node = malloc(sizeof *node);
> if (node != NULL) {
> node -> next = NULL;
> node -> data = malloc(size);
> if (node -> data != NULL) {
> memcpy(node -> data, data, size);
> if (*head != NULL) {
> tail -> next = node;
> } else {
> *head = node;
> }
> } else {
> free(node);
> node = NULL;
> }
> }
> return node;
> }
>
> void list_free(list_type *node, void (*free_data)(void *))
> {
> list_type *next_node;
>
> while (node != NULL) {
> next_node = node -> next;
> free_data(node -> data);
> free(node);
> node = next_node;
> }
> }
>
> /* END new.c */
>

Wow - All that just to separate @hotmail.com from anything else ? I'm
glad I stuck with perl :)

--
Marc Bissonnette
Looking for a new ISP? http://www.canadianisp.com
Largest ISP comparison site across Canada.

Posted by Bartc on June 19, 2008, 5:13 am
Please log in for more thread options

>
>> Dennis wrote:
>>> Hi, I have a text file that contents a list of email addresses like
>>> this:

>> /* BEGIN new.c output */

>><snip 250+ lines of C >

> Wow - All that just to separate @hotmail.com from anything else ? I'm
> glad I stuck with perl :)

I think pete just enjoys writing huge amounts of C code. Or showing off..

I thought my 50-line answer (posted to comp.lang.c only) might have been a
bit long because it didn't make clever use of scanf(), but at least it could
deal with /any number/ of email addresses from a file.

This code I /think/ only deals with the 4 email addresses in the OP's
example..

--
Bartc



Posted by vippstar on June 19, 2008, 5:18 am
Please log in for more thread options
>
>
>
> >> Dennis wrote:
> >>> Hi, I have a text file that contents a list of email addresses like
> >>> this:
> >> /* BEGIN new.c output */
> >><snip 250+ lines of C >
> > Wow - All that just to separate @hotmail.com from anything else ? I'm
> > glad I stuck with perl :)
>
> I think pete just enjoys writing huge amounts of C code. Or showing off..
Or using concrete functions he has written in the past to write
concrete programs.
<snip>


Posted by Bartc on June 19, 2008, 5:55 am
Please log in for more thread options

>>
>>
>>
>> >> Dennis wrote:
>> >>> Hi, I have a text file that contents a list of email addresses like
>> >>> this:
>> >> /* BEGIN new.c output */
>> >><snip 250+ lines of C >
>> > Wow - All that just to separate @hotmail.com from anything else ? I'm
>> > glad I stuck with perl :)
>>
>> I think pete just enjoys writing huge amounts of C code. Or showing off..

> Or using concrete functions he has written in the past to write
> concrete programs.

I thought it was some sort of unwritten rule here that when posting code
solutions you tend not to import large elements of your own library.
Otherwise everyone would post their own different version of getline() and
so on.

And also there's the possibility, as seems to have happened here, of using
something inappropriate just because it's there. There's no reason at all to
use a linked list to read all the input into memory (and risking
out-of-memory or thrashing for large input).

(Although I suspect pete may have created this over-the-top solution on
purpose..)

> concrete programs.

Which is more concrete, this code which has a memory requirement of N or
code using fixed memory?

--
Bartc



Similar ThreadsPosted
Regex for finding email addresses inside text file January 27, 2005, 1:44 am
how to separate all but www addresses? September 26, 2004, 1:41 pm
Validating email addresses August 10, 2004, 9:26 pm
parsing email addresses July 18, 2005, 10:32 pm
Regexp for email addresses. February 14, 2007, 4:55 pm
FAQ 4.65 How can I store a multidimensional array in a DBM file? May 12, 2005, 11:03 am
FAQ: How can I store a multidimensional array in a DBM file? October 10, 2004, 5:10 pm
FAQ 4.65 How can I store a multidimensional array in a DBM file? February 13, 2005, 12:03 am
FAQ 4.65 How can I store a multidimensional array in a DBM file? July 28, 2005, 10:03 am
FAQ 4.65 How can I store a multidimensional array in a DBM file? September 17, 2005, 10:03 pm

Our other projects:

Art Dolls, Fairies and Mermaids - Sunnyfaces.net

Roy's Linux, Programming and Search Engines messages

1-Script XML SitemapXML Sitemap