Fri Apr 18 21:13:10 2008 UTC ()
libfetch-2.5:
Add basic index parsing support for HTTP based on the ftpio.c code in
pkg_install. Permission to use the 3-clause BSD license from Thomas
Klausner in private mail.


(joerg)
diff -r1.7 -r1.8 pkgsrc/net/libfetch/Makefile
diff -r1.13 -r1.14 pkgsrc/net/libfetch/files/http.c

cvs diff -r1.7 -r1.8 pkgsrc/net/libfetch/Makefile (expand / switch to unified diff)

--- pkgsrc/net/libfetch/Makefile 2008/04/16 01:01:50 1.7
+++ pkgsrc/net/libfetch/Makefile 2008/04/18 21:13:10 1.8
@@ -1,17 +1,17 @@ @@ -1,17 +1,17 @@
1# $NetBSD: Makefile,v 1.7 2008/04/16 01:01:50 joerg Exp $ 1# $NetBSD: Makefile,v 1.8 2008/04/18 21:13:10 joerg Exp $
2# 2#
3 3
4DISTNAME= libfetch-2.4 4DISTNAME= libfetch-2.5
5CATEGORIES= net 5CATEGORIES= net
6MASTER_SITES= # empty 6MASTER_SITES= # empty
7DISTFILES= # empty 7DISTFILES= # empty
8 8
9NO_CHECKSUM= yes 9NO_CHECKSUM= yes
10 10
11MAINTAINER= joerg@NetBSD.org 11MAINTAINER= joerg@NetBSD.org
12HOMEPAGE= http://www.FreeBSD.org/ 12HOMEPAGE= http://www.FreeBSD.org/
13COMMENT= Library to access HTTP/FTP server 13COMMENT= Library to access HTTP/FTP server
14#LICENSE= modified-bsd 14#LICENSE= modified-bsd
15 15
16PKG_DESTDIR_SUPPORT= user-destdir 16PKG_DESTDIR_SUPPORT= user-destdir
17 17

cvs diff -r1.13 -r1.14 pkgsrc/net/libfetch/files/http.c (expand / switch to unified diff)

--- pkgsrc/net/libfetch/files/http.c 2008/04/16 15:10:18 1.13
+++ pkgsrc/net/libfetch/files/http.c 2008/04/18 21:13:10 1.14
@@ -1,16 +1,18 @@ @@ -1,16 +1,18 @@
1/* $NetBSD: http.c,v 1.13 2008/04/16 15:10:18 joerg Exp $ */ 1/* $NetBSD: http.c,v 1.14 2008/04/18 21:13:10 joerg Exp $ */
2/*- 2/*-
3 * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav 3 * Copyright (c) 2000-2004 Dag-Erling Coïdan Smørgrav
 4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
 5 * Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
4 * All rights reserved. 6 * All rights reserved.
5 * 7 *
6 * Redistribution and use in source and binary forms, with or without 8 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 9 * modification, are permitted provided that the following conditions
8 * are met: 10 * are met:
9 * 1. Redistributions of source code must retain the above copyright 11 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer 12 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged. 13 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 15 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 16 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products 17 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission. 18 * derived from this software without specific prior written permission.
@@ -1140,32 +1142,219 @@ fetchPutHTTP(struct url *URL, const char @@ -1140,32 +1142,219 @@ fetchPutHTTP(struct url *URL, const char
1140 */ 1142 */
1141int 1143int
1142fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags) 1144fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1143{ 1145{
1144 fetchIO *f; 1146 fetchIO *f;
1145 1147
1146 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags); 1148 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1147 if (f == NULL) 1149 if (f == NULL)
1148 return (-1); 1150 return (-1);
1149 fetchIO_close(f); 1151 fetchIO_close(f);
1150 return (0); 1152 return (0);
1151} 1153}
1152 1154
 1155enum http_states {
 1156 ST_NONE,
 1157 ST_LT,
 1158 ST_LTA,
 1159 ST_TAGA,
 1160 ST_H,
 1161 ST_R,
 1162 ST_E,
 1163 ST_F,
 1164 ST_HREF,
 1165 ST_HREFQ,
 1166 ST_TAG,
 1167 ST_TAGAX,
 1168 ST_TAGAQ
 1169};
 1170
 1171struct index_parser {
 1172 enum http_states state;
 1173 struct url_ent *ue;
 1174 int list_size, list_len;
 1175};
 1176
 1177static size_t
 1178parse_index(struct index_parser *parser, const char *buf, size_t len)
 1179{
 1180 char *end_attr, p = *buf;
 1181
 1182 switch (parser->state) {
 1183 case ST_NONE:
 1184 /* Plain text, not in markup */
 1185 if (p == '<')
 1186 parser->state = ST_LT;
 1187 return 1;
 1188 case ST_LT:
 1189 /* In tag -- "<" already found */
 1190 if (p == '>')
 1191 parser->state = ST_NONE;
 1192 else if (p == 'a' || p == 'A')
 1193 parser->state = ST_LTA;
 1194 else if (!isspace((unsigned char)p))
 1195 parser->state = ST_TAG;
 1196 return 1;
 1197 case ST_LTA:
 1198 /* In tag -- "<a" already found */
 1199 if (p == '>')
 1200 parser->state = ST_NONE;
 1201 else if (p == '"')
 1202 parser->state = ST_TAGAQ;
 1203 else if (isspace((unsigned char)p))
 1204 parser->state = ST_TAGA;
 1205 else
 1206 parser->state = ST_TAG;
 1207 return 1;
 1208 case ST_TAG:
 1209 /* In tag, but not "<a" -- disregard */
 1210 if (p == '>')
 1211 parser->state = ST_NONE;
 1212 return 1;
 1213 case ST_TAGA:
 1214 /* In a-tag -- "<a " already found */
 1215 if (p == '>')
 1216 parser->state = ST_NONE;
 1217 else if (p == '"')
 1218 parser->state = ST_TAGAQ;
 1219 else if (p == 'h' || p == 'H')
 1220 parser->state = ST_H;
 1221 else if (!isspace((unsigned char)p))
 1222 parser->state = ST_TAGAX;
 1223 return 1;
 1224 case ST_TAGAX:
 1225 /* In unknown keyword in a-tag */
 1226 if (p == '>')
 1227 parser->state = ST_NONE;
 1228 else if (p == '"')
 1229 parser->state = ST_TAGAQ;
 1230 else if (isspace((unsigned char)p))
 1231 parser->state = ST_TAGA;
 1232 return 1;
 1233 case ST_TAGAQ:
 1234 /* In a-tag, unknown argument for keys. */
 1235 if (p == '>')
 1236 parser->state = ST_NONE;
 1237 else if (p == '"')
 1238 parser->state = ST_TAGA;
 1239 return 1;
 1240 case ST_H:
 1241 /* In a-tag -- "<a h" already found */
 1242 if (p == '>')
 1243 parser->state = ST_NONE;
 1244 else if (p == '"')
 1245 parser->state = ST_TAGAQ;
 1246 else if (p == 'r' || p == 'R')
 1247 parser->state = ST_R;
 1248 else if (isspace((unsigned char)p))
 1249 parser->state = ST_TAGA;
 1250 else
 1251 parser->state = ST_TAGAX;
 1252 return 1;
 1253 case ST_R:
 1254 /* In a-tag -- "<a hr" already found */
 1255 if (p == '>')
 1256 parser->state = ST_NONE;
 1257 else if (p == '"')
 1258 parser->state = ST_TAGAQ;
 1259 else if (p == 'e' || p == 'E')
 1260 parser->state = ST_E;
 1261 else if (isspace((unsigned char)p))
 1262 parser->state = ST_TAGA;
 1263 else
 1264 parser->state = ST_TAGAX;
 1265 return 1;
 1266 case ST_E:
 1267 /* In a-tag -- "<a hre" already found */
 1268 if (p == '>')
 1269 parser->state = ST_NONE;
 1270 else if (p == '"')
 1271 parser->state = ST_TAGAQ;
 1272 else if (p == 'f' || p == 'F')
 1273 parser->state = ST_F;
 1274 else if (isspace((unsigned char)p))
 1275 parser->state = ST_TAGA;
 1276 else
 1277 parser->state = ST_TAGAX;
 1278 return 1;
 1279 case ST_F:
 1280 /* In a-tag -- "<a href" already found */
 1281 if (p == '>')
 1282 parser->state = ST_NONE;
 1283 else if (p == '"')
 1284 parser->state = ST_TAGAQ;
 1285 else if (p == '=')
 1286 parser->state = ST_HREF;
 1287 else if (!isspace((unsigned char)p))
 1288 parser->state = ST_TAGAX;
 1289 return 1;
 1290 case ST_HREF:
 1291 /* In a-tag -- "<a href=" already found */
 1292 if (p == '>')
 1293 parser->state = ST_NONE;
 1294 else if (p == '"')
 1295 parser->state = ST_HREFQ;
 1296 else if (!isspace((unsigned char)p))
 1297 parser->state = ST_TAGA;
 1298 return 1;
 1299 case ST_HREFQ:
 1300 /* In href of the a-tag */
 1301 end_attr = memchr(buf, '"', len);
 1302 if (end_attr == NULL)
 1303 return 0;
 1304 *end_attr = '\0';
 1305 parser->state = ST_TAGA;
 1306 fetch_add_entry(&parser->ue, &parser->list_size, &parser->list_len, buf, NULL);
 1307 return end_attr + 1 - buf;
 1308 }
 1309 abort();
 1310}
 1311
1153/* 1312/*
1154 * List a directory 1313 * List a directory
1155 */ 1314 */
1156struct url_ent * 1315struct url_ent *
1157fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags) 1316fetchFilteredListHTTP(struct url *url, const char *pattern, const char *flags)
1158{ 1317{
1159 fprintf(stderr, "fetchFilteredListHTTP(): not implemented\n"); 1318 fetchIO *f;
1160 return (NULL); 1319 char buf[2 * PATH_MAX];
 1320 size_t buf_len, processed, sum_processed;
 1321 ssize_t read_len;
 1322 struct index_parser state;
 1323
 1324 state.state = ST_NONE;
 1325 state.ue = NULL;
 1326 state.list_size = state.list_len = 0;
 1327
 1328 f = fetchGetHTTP(url, flags);
 1329 if (f == NULL)
 1330 return NULL;
 1331
 1332 buf_len = 0;
 1333
 1334 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
 1335 buf_len += read_len;
 1336 sum_processed = 0;
 1337 do {
 1338 processed = parse_index(&state, buf + sum_processed, buf_len);
 1339 buf_len -= processed;
 1340 sum_processed += processed;
 1341 } while (processed != 0 && buf_len > 0);
 1342 memmove(buf, buf + sum_processed, buf_len);
 1343 }
 1344
 1345 fetchIO_close(f);
 1346 if (read_len < 0) {
 1347 free(state.ue);
 1348 state.ue = NULL;
 1349 }
 1350 return state.ue;
1161} 1351}
1162 1352
1163/* 1353/*
1164 * List a directory 1354 * List a directory
1165 */ 1355 */
1166struct url_ent * 1356struct url_ent *
1167fetchListHTTP(struct url *url, const char *flags) 1357fetchListHTTP(struct url *url, const char *flags)
1168{ 1358{
1169 fprintf(stderr, "fetchListHTTP(): not implemented\n"); 1359 return fetchFilteredList(url, "*", flags);
1170 return (NULL); 
1171} 1360}