Editing my c code for this question
4) Write a C program named a4q4.c which reads the standard input and extract HTML-style tags from it. We consider that the HTML-style tags are all strings starting with character “less-than” (<) and end with the character “greater-than” (>). In case that there is a tag starting with ‘<’ but not finishing with ‘>’ at all, then print it all the way to the end of input. Each tag should start at a new line. Input The input is in a free textual form. Output The output consists of a list of tags in the order that they appear in text. Each tag should start on a new line. All content of a tag should be printed in the same way as it appears in input. If a tag is not finished, it should still be printed. The sample input and output below illustrate behaviour of the program. Hint : The program should read input character by character and immediatelly produce output or not based on the context.
The output for this code is mixing beteween the string and html tags. Please help me as soon as possible.
Sample Input: This is sample intermediate text tag> and another example character may appear outside of a tag. and also
#include
#include #include #include /* curl write callback, to fill tidy\'s input buffer... */ uint write_cb(char *in, uint size, uint nmemb, TidyBuffer *out) { uint r; r = size * nmemb; tidyBufAppend(out, in, r); return r; } /* Traverse the document tree */ void dumpNode(TidyDoc doc, TidyNode tnod, int indent) { TidyNode child; for(child = tidyGetChild(tnod); child; child = tidyGetNext(child) ) { ctmbstr name = tidyNodeGetName(child); if(name) { /* if it has a name, then it\'s an HTML tag ... */ TidyAttr attr; printf(\"%*.*s%s \", indent, indent, \"<\", name); /* walk the attribute list */ for(attr=tidyAttrFirst(child); attr; attr=tidyAttrNext(attr) ) { printf(tidyAttrName(attr)); tidyAttrValue(attr)?printf(\"=\\\"%s\\\" \", tidyAttrValue(attr)):printf(\" \"); } printf(\">\ \"); } else { /* if it doesn\'t have a name, then it\'s probably text, cdata, etc... */ TidyBuffer buf; tidyBufInit(&buf); tidyNodeGetText(doc, child, &buf); printf(\"%*.*s\ \", indent, indent, buf.bp?(char *)buf.bp:\"\"); tidyBufFree(&buf); } dumpNode(doc, child, indent + 4); /* recursive */ } } int main(int argc, char **argv) { CURL *curl; char curl_errbuf[CURL_ERROR_SIZE]; TidyDoc tdoc; TidyBuffer docbuf = {0}; TidyBuffer tidy_errbuf = {0}; int err; if(argc == 2) { curl = curl_easy_init(); curl_easy_setopt(curl, CURLOPT_URL, argv[1]); curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L); curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_cb); tdoc = tidyCreate(); tidyOptSetBool(tdoc, TidyForceOutput, yes); /* try harder */ tidyOptSetInt(tdoc, TidyWrapLen, 4096); tidySetErrorBuffer(tdoc, &tidy_errbuf); tidyBufInit(&docbuf); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &docbuf); err=curl_easy_perform(curl); if(!err) { err = tidyParseBuffer(tdoc, &docbuf); /* parse the input */ if(err >= 0) { err = tidyCleanAndRepair(tdoc); /* fix any problems */ if(err >= 0) { err = tidyRunDiagnostics(tdoc); /* load tidy error buffer */ if(err >= 0) { dumpNode(tdoc, tidyGetRoot(tdoc), 0); /* walk the tree */ fprintf(stderr, \"%s\ \", tidy_errbuf.bp); /* show errors */ } } } } else fprintf(stderr, \"%s\ \", curl_errbuf); /* clean-up */ curl_easy_cleanup(curl); tidyBufFree(&docbuf); tidyBufFree(&tidy_errbuf); tidyRelease(tdoc); return err; } else printf(\"usage: %s \ \", argv[0]); return 0; }